From dbfed13426ab54f8a5809102545d0b4f2492161f Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Sun, 27 Sep 2015 21:51:02 +0200 Subject: [PATCH] naive exact pattern match --- .../src/arbology/exact/ExactPatternMatch.cpp | 129 ++++++++++++++++++ .../src/arbology/exact/ExactPatternMatch.h | 52 +++++++ 2 files changed, 181 insertions(+) create mode 100644 alib2algo/src/arbology/exact/ExactPatternMatch.cpp create mode 100644 alib2algo/src/arbology/exact/ExactPatternMatch.h diff --git a/alib2algo/src/arbology/exact/ExactPatternMatch.cpp b/alib2algo/src/arbology/exact/ExactPatternMatch.cpp new file mode 100644 index 0000000000..6e805a0356 --- /dev/null +++ b/alib2algo/src/arbology/exact/ExactPatternMatch.cpp @@ -0,0 +1,129 @@ +/* + * ExactPatternMatch.cpp + * + * Created on: 9. 2. 2014 + * Author: Jan Travnicek + */ + +#include "ExactPatternMatch.h" +#include <exception/AlibException.h> +#include <tree/ranked/RankedTree.h> +#include <tree/ranked/RankedPattern.h> +#include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedPattern.h> +#include <tree/ranked/PrefixRankedBarTree.h> +#include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/unranked/UnrankedTree.h> +#include <tree/unranked/UnrankedPattern.h> +#include "SubtreeJumpTable.h" + +#include <deque> +#include <foreach> + +namespace arbology { + +namespace exact { + +std::set<unsigned> ExactPatternMatch::match(const tree::Tree& subject, const tree::Tree& pattern) { + return getInstance().dispatch(subject.getData(), pattern.getData()); +} + +bool ExactPatternMatch::matchHelper(const tree::UnrankedNode& subject, const tree::UnrankedNode& pattern, const alphabet::Symbol& subtreeVariable) { + if(pattern.getSymbol() == subtreeVariable) return true; + if(subject.getSymbol() != pattern.getSymbol()) return false; + if(subject.getChildren().size() != pattern.getChildren().size()) return false; + for(const std::tuple<const tree::UnrankedNode*, const tree::UnrankedNode*>& childs : std::make_pair_foreach(subject.getChildren(), pattern.getChildren())) { + if(!matchHelper(*std::get<0>(childs), *std::get<1>(childs), subtreeVariable)) return false; + } + return true; +} + +bool ExactPatternMatch::matchHelper(const tree::RankedNode& subject, const tree::RankedNode& pattern, const alphabet::RankedSymbol& subtreeVariable) { + if(pattern.getSymbol() == subtreeVariable) return true; + if(subject.getSymbol() != pattern.getSymbol()) return false; + // ranked symbols are the same; test for number of children is not needed + for(const std::tuple<const tree::RankedNode*, const tree::RankedNode*>& childs : std::make_pair_foreach(subject.getChildren(), pattern.getChildren())) { + if(!matchHelper(*std::get<0>(childs), *std::get<1>(childs), subtreeVariable)) return false; + } + return true; +} + +void ExactPatternMatch::matchInternal(unsigned& index, std::set<unsigned>& occ, const tree::UnrankedNode& subject, const tree::UnrankedNode& pattern, const alphabet::Symbol& subtreeVariable) { + if(matchHelper(subject, pattern, subtreeVariable)) occ.insert(index); + index++; + for(const tree::UnrankedNode* child : subject.getChildren()) { + matchInternal(index, occ, *child, pattern, subtreeVariable); + } +} + +void ExactPatternMatch::matchInternal(unsigned& index, std::set<unsigned>& occ, const tree::RankedNode& subject, const tree::RankedNode& pattern, const alphabet::RankedSymbol& subtreeVariable) { + if(matchHelper(subject, pattern, subtreeVariable)) occ.insert(index); + index++; + for(const tree::RankedNode* child : subject.getChildren()) { + matchInternal(index, occ, *child, pattern, subtreeVariable); + } +} + +std::set<unsigned> ExactPatternMatch::match(const tree::UnrankedTree& subject, const tree::UnrankedPattern& pattern) { + unsigned i = 0; + std::set<unsigned> occ; + matchInternal(i, occ, subject.getRoot(), pattern.getRoot(), pattern.getSubtreeWildcard()); + return occ; +} + +auto ExactPatternMatchUnrankedTree = ExactPatternMatch::RegistratorWrapper<std::set<unsigned>, tree::UnrankedTree, tree::UnrankedPattern>(ExactPatternMatch::getInstance(), ExactPatternMatch::match); + +std::set<unsigned> ExactPatternMatch::match(const tree::RankedTree& subject, const tree::RankedPattern& pattern) { + unsigned i = 0; + std::set<unsigned> occ; + matchInternal(i, occ, subject.getRoot(), pattern.getRoot(), pattern.getSubtreeWildcard()); + return occ; +} + +auto ExactPatternMatchRankedTree = ExactPatternMatch::RegistratorWrapper<std::set<unsigned>, tree::RankedTree, tree::RankedPattern>(ExactPatternMatch::getInstance(), ExactPatternMatch::match); + +std::set<unsigned> ExactPatternMatch::match(const tree::PrefixRankedTree& subject, const tree::PrefixRankedPattern& pattern) { + std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); + + std::set<unsigned> occ; + for(unsigned i = 0; i + pattern.getContent().size() <= subject.getContent().size(); i++) { + unsigned j = 0; + unsigned offset = i; + for(; j < pattern.getContent().size(); j++) { + if(pattern.getContent()[j] == subject.getContent()[offset+j]) offset++; + else if(pattern.getContent()[j] == pattern.getSubtreeWildcard()) offset = subjectSubtreeJumpTable[offset]; + else break; + } + + if( j == pattern.getContent().size() ) + occ.insert(i); + } + return occ; +} + +auto ExactPatternMatchPrefixRankedTree = ExactPatternMatch::RegistratorWrapper<std::set<unsigned>, tree::PrefixRankedTree, tree::PrefixRankedPattern>(ExactPatternMatch::getInstance(), ExactPatternMatch::match); + +std::set<unsigned> ExactPatternMatch::match(const tree::PrefixRankedBarTree& subject, const tree::PrefixRankedBarPattern& pattern) { + std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); + + std::set<unsigned> occ; + for(unsigned i = 0; i + pattern.getContent().size() <= subject.getContent().size(); i++) { + unsigned j = 0; + unsigned offset = i; + for(; j < pattern.getContent().size(); j++) { + if(pattern.getContent()[j] == subject.getContent()[offset+j]) offset++; + else if(pattern.getContent()[j] == pattern.getSubtreeWildcard()) offset = subjectSubtreeJumpTable[offset]; + else break; + } + + if( j == pattern.getContent().size() ) + occ.insert(i); + } + return occ; +} + +auto ExactPatternMatchPrefixRankedBarTree = ExactPatternMatch::RegistratorWrapper<std::set<unsigned>, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern>(ExactPatternMatch::getInstance(), ExactPatternMatch::match); + +} /* namespace exact */ + +} /* namespace arbology */ diff --git a/alib2algo/src/arbology/exact/ExactPatternMatch.h b/alib2algo/src/arbology/exact/ExactPatternMatch.h new file mode 100644 index 0000000000..099055afab --- /dev/null +++ b/alib2algo/src/arbology/exact/ExactPatternMatch.h @@ -0,0 +1,52 @@ +/* + * ExactPatternMatch.h + * + * Created on: 9. 2. 2014 + * Author: Jan Travnicek + */ + +#ifndef _EXACT_PATTERN_MATCH_H_ +#define _EXACT_PATTERN_MATCH_H_ + +#include <tree/Tree.h> +#include <tree/TreeFeatures.h> +#include <tree/ranked/RankedNode.h> +#include <tree/unranked/UnrankedNode.h> +#include <set> +#include <common/multipleDispatch.hpp> + +namespace arbology { + +namespace exact { + +class ExactPatternMatch : public std::DoubleDispatch<std::set<unsigned>, tree::TreeBase, tree::TreeBase> { +public: + /** + * Performs conversion. + * @return left regular grammar equivalent to source automaton. + */ + static std::set<unsigned> match(const tree::Tree& subject, const tree::Tree& pattern); + + static std::set<unsigned> match(const tree::UnrankedTree& subject, const tree::UnrankedPattern& pattern); + static std::set<unsigned> match(const tree::RankedTree& subject, const tree::RankedPattern& pattern); + static std::set<unsigned> match(const tree::PrefixRankedTree& subject, const tree::PrefixRankedPattern& pattern); + static std::set<unsigned> match(const tree::PrefixRankedBarTree& subject, const tree::PrefixRankedBarPattern& pattern); +private: + static bool matchHelper(const tree::UnrankedNode& subject, const tree::UnrankedNode& pattern, const alphabet::Symbol& subtreeVariable); + static bool matchHelper(const tree::RankedNode& subject, const tree::RankedNode& pattern, const alphabet::RankedSymbol& subtreeVariable); + + static void matchInternal(unsigned& index, std::set<unsigned>& occ, const tree::UnrankedNode& subject, const tree::UnrankedNode& pattern, const alphabet::Symbol& subtreeVariable); + static void matchInternal(unsigned& index, std::set<unsigned>& occ, const tree::RankedNode& subject, const tree::RankedNode& pattern, const alphabet::RankedSymbol& subtreeVariable); + +public: + static ExactPatternMatch& getInstance() { + static ExactPatternMatch res; + return res; + } +}; + +} /* namespace exact */ + +} /* namespace arbology */ + +#endif /* _EXACT_PATTERN_MATCH_H_ */ -- GitLab