/* * KnuthMorrisPratt.h * * Created on: 5. 11. 2014 * Author: Jan Travnicek */ #ifndef _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ #define _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ #include <set> #include <vector> #include <tree/properties/BorderArrayNaive.h> #include <tree/properties/SubtreeJumpTable.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedPattern.h> namespace arbology { namespace exact { /** * Implementation of BMH for MI(E+\eps)-EVY course 2014 * To get rid of zeros in BCS table we ignore last haystack character */ class KnuthMorrisPratt { public: /** * Search for pattern in linear string. * @return set set of occurences */ template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ); }; template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) { return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) ); } template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern ); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset++; j++; } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; j += 2; } else { break; } } // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heuristics i += j - ba[j]; } return occ; } template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) { return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) ); } template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern ); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset++; j++; } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) ) { // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; j++; } else { break; } } // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heristics i += j - ba[j]; } return occ; } } /* namespace exact */ } /* namespace arbology */ #endif /* _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ */