/* * ReversedBoyerMooreHorspool.h * * Created on: 5. 11. 2014 * Author: Jan Travnicek */ #ifndef _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_ #define _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_ #include <alib/set> #include <alib/map> #include <common/ranked_symbol.hpp> #include <tree/properties/ReversedBadCharacterShiftTable.h> #include <tree/properties/SubtreeJumpTable.h> #include <tree/properties/ExactSubtreeRepeatsNaive.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedPattern.h> #include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedNonlinearPattern.h> #include <tree/ranked/PrefixRankedBarNonlinearPattern.h> namespace arbology { namespace exact { /** * Implementation of BMH for MI(E+\eps)-EVY course 2014 * To get rid of zeros in BCS table we ignore last haystack character */ class ReversedBoyerMooreHorspool { public: /** * Search for pattern in linear string. * @return set set of occurences */ template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ); template < class SymbolType, class RankType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ); }; template < class SymbolType, class RankType > ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) { return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) ); } template < class SymbolType, class RankType > ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); // index to the subject int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; // main loop of the algorithm over all possible indexes where the pattern can start while ( i >= 0 ) { // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset = offset + 1; j = j + 1; } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) { // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; j = j + 2; } else { break; } } // match was found if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heuristics i -= bcs[subject.getContent ( )[i]]; } return occ; } template < class SymbolType, class RankType > ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; // main loop of the algorithm over all possible indexes where the pattern can start while ( i >= 0 ) { // clear the current state of variable to subtree repeat variablesSetting.clear(); // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset = offset + 1; j = j + 1; } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) { // check nonlinear variable if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) break; variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); } // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; j = j + 2; } else { break; } } // match was found if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heuristics i -= bcs[subject.getContent ( )[i]]; } return occ; } template < class SymbolType, class RankType > ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) { return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) ); } template < class SymbolType, class RankType > ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); // index to the subject int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; // main loop of the algorithm over all possible indexes where the pattern can start while ( i >= 0 ) { // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) // match of symbol offset = offset + 1; else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; else break; j = j + 1; } // match was found if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heristics i -= bcs[subject.getContent ( )[i]]; } return occ; } template < class SymbolType, class RankType > ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; // main loop of the algorithm over all possible indexes where the pattern can start while ( i >= 0 ) { // clear the current state of variable to subtree repeat variablesSetting.clear(); // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) // match of symbol offset = offset + 1; else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { // check nonlinear variable if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) break; variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); } // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; } else break; j = j + 1; } // match was found if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); // shift heristics i -= bcs[subject.getContent ( )[i]]; } return occ; } } /* namespace exact */ } /* namespace arbology */ #endif /* _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_ */