/* * KnuthMorrisPratt.h * * Created on: 5. 11. 2014 * Author: Jan Travnicek */ #ifndef _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ #define _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ #include <measure> #include <alib/set> #include <alib/vector> #include <tree/properties/BorderArrayNaive.h> #include <tree/properties/SubtreeJumpTable.h> #include <tree/properties/ExactSubtreeRepeatsNaive.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedBarNonlinearPattern.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedPattern.h> #include <tree/ranked/PrefixRankedNonlinearPattern.h> namespace arbology { namespace exact { /** * Implementation of BMH for MI(E+\eps)-EVY course 2014 * To get rid of zeros in BCS table we ignore last haystack character */ class KnuthMorrisPratt { public: /** * Search for pattern in linear string. * @return set set of occurences */ template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ); }; template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) { return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) ); } template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern ); //measurements::start("Algorithm", measurements::Type::MAIN); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset++; j++; } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) { // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; j += 2; } else { break; } } // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heuristics i += j - ba[j]; } //measurements::end(); return occ; } template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern ); ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; //measurements::start("Algorithm", measurements::Type::MAIN); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // clear the current state of variable to subtree repeat variablesSetting.clear(); // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset++; j++; } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) { // check nonlinear variable if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) break; variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); } // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; j = j + 2; } else { break; } } // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heuristics i += j - ba[j]; } //measurements::end(); return occ; } template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) { return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) ); } template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern ); //measurements::start("Algorithm", measurements::Type::MAIN); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset++; } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) ) { // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; } else { break; } j = j + 1; } // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heristics i += j - ba[j]; } //measurements::end(); return occ; } template < class SymbolType, class RankType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern ); ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; //measurements::start("Algorithm", measurements::Type::MAIN); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject unsigned i = 0; // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { // clear the current state of variable to subtree repeat variablesSetting.clear(); // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset++; } else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { // check nonlinear variable if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) break; variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); } // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; } else { break; } j = j + 1; } // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heristics i += j - ba[j]; } //measurements::end(); return occ; } } /* namespace exact */ } /* namespace arbology */ #endif /* _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ */