/* * KnuthMorrisPratt.h * * Created on: 5. 11. 2014 * Author: Jan Travnicek */ #ifndef _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ #define _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ #include <alib/measure> #include <alib/set> #include <alib/vector> #include <tree/properties/BorderArrayNaive.h> #include <tree/properties/SubtreeJumpTable.h> #include <tree/properties/ExactSubtreeRepeatsNaive.h> #include <tree/exact/ForwardOccurrenceTest.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedBarNonlinearPattern.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedPattern.h> #include <tree/ranked/PrefixRankedNonlinearPattern.h> namespace arbology { namespace exact { /** * Implementation of BMH for MI(E+\eps)-EVY course 2014 * To get rid of zeros in BCS table we ignore last haystack character */ class KnuthMorrisPratt { public: /** * Search for pattern in linear string. * @return set set of occurences */ template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarTree < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarPattern < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedTree < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType > & pattern ); }; template < class SymbolType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarTree < SymbolType > & pattern ) { return match ( subject, tree::PrefixRankedBarPattern < SymbolType > ( pattern ) ); } template < class SymbolType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarPattern < SymbolType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > construct = tree::properties::BorderArrayNaive::construct ( pattern ); //measurements::start("Algorithm", measurements::Type::MAIN); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); unsigned Spos = pattern.getContent ( ).size ( ); for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); ++ i ) { if ( pattern.getContent ( ) [ i ] == pattern.getSubtreeWildcard ( ) ) { Spos = i; break; } } // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // index to the pattern unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i ); // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); if ( j == 0 ) { i += 1; } else { unsigned shift = j - construct [ j ]; // shift heuristics i += shift; j = std::min ( Spos, j ) - shift; } } //measurements::end(); return occ; } template < class SymbolType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > construct = tree::properties::BorderArrayNaive::construct ( pattern ); //measurements::start("Algorithm", measurements::Type::MAIN); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); tree::PrefixRankedBarTree < unsigned > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); unsigned Spos = pattern.getContent ( ).size ( ); for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); ++ i ) { if ( pattern.getContent ( ) [ i ] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).contains ( pattern.getContent ( ) [ i ] ) ) { Spos = i; break; } } // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // index to the pattern unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i ); // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); if ( j == 0 ) { i += 1; } else { unsigned shift = j - construct [ j ]; // shift heuristics i += shift; j = std::min ( Spos, j ) - shift; } } //measurements::end(); return occ; } template < class SymbolType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedTree < SymbolType > & pattern ) { return match ( subject, tree::PrefixRankedPattern < SymbolType > ( pattern ) ); } template < class SymbolType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > construct = tree::properties::BorderArrayNaive::construct ( pattern ); //measurements::start("Algorithm", measurements::Type::MAIN); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); unsigned Spos = pattern.getContent ( ).size ( ); for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); ++ i ) { if ( pattern.getContent ( ) [ i ] == pattern.getSubtreeWildcard ( ) ) { Spos = i; break; } } // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // index to the pattern unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i ); // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); if ( j == 0 ) { i += 1; } else { unsigned shift = j - construct [ j ]; // shift heuristics i += shift; j = std::min ( Spos, j ) - shift; } } //measurements::end(); return occ; } template < class SymbolType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > construct = tree::properties::BorderArrayNaive::construct ( pattern ); //measurements::start("Algorithm", measurements::Type::MAIN); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); tree::PrefixRankedTree < unsigned > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); unsigned Spos = pattern.getContent ( ).size ( ); for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); ++ i ) { if ( pattern.getContent ( ) [ i ] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).contains ( pattern.getContent ( ) [ i ] ) ) { Spos = i; break; } } // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // index to the pattern unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i ); // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); if ( j == 0 ) { i += 1; } else { unsigned shift = j - construct [ j ]; // shift heuristics i += shift; j = std::min ( Spos, j ) - shift; } } //measurements::end(); return occ; } } /* namespace exact */ } /* namespace arbology */ #endif /* _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ */