diff --git a/alib2algo/src/arbology/exact/QuickSearch.h b/alib2algo/src/arbology/exact/QuickSearch.h index 3fde07a6308f4a59fb1c9722dec42e63714ae785..4d7470110ef60d50844ae94b09b14850c5d73c36 100644 --- a/alib2algo/src/arbology/exact/QuickSearch.h +++ b/alib2algo/src/arbology/exact/QuickSearch.h @@ -16,6 +16,7 @@ #include <tree/properties/QuickSearchBadCharacterShiftTable.h> #include <tree/properties/SubtreeJumpTable.h> #include <tree/properties/ExactSubtreeRepeatsNaive.h> +#include <tree/exact/BackwardOccurrenceTest.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarPattern.h> @@ -50,7 +51,7 @@ ext::set < unsigned > QuickSearch::match ( const tree::PrefixRankedBarTree < Sym return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) ); } -template < class SymbolType, class RankType > // CURRENTLY WORKING HERE +template < class SymbolType, class RankType > ext::set < unsigned > QuickSearch::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::QuickSearchBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern @@ -61,36 +62,17 @@ ext::set < unsigned > QuickSearch::match ( const tree::PrefixRankedBarTree < Sym // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { + // pair j and offset + ext::pair < int, int > jOffset = tree::exact::BackwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i ); - // index to the pattern - int j = pattern.getContent ( ).size ( ) - 1; - - // offset to the subject - int offset = i + j; - - while ( ( j >= 0 ) && ( offset >= 0 ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { - // match of symbol - offset = offset - 1; - j = j - 1; - } else if ( ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) /* && ( pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) { - // match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - j = j - 2; - } else { - break; - } - } - - // match was found - if ( j == -1 ) occ.insert ( offset + 1); + // match was found + if ( jOffset.first == -1 ) occ.insert ( jOffset.second + 1); if ( i + pattern.getContent ( ).size ( ) == subject.getContent ( ).size ( ) ) { break; } // shift heuristics - //std::cout << "At: " << i << ", shifting by: " << bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( )]] << ", according to symb at: " << i + pattern.getContent ( ).size ( ) << "!" << std::endl; i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( )]]; } @@ -101,7 +83,6 @@ template < class SymbolType, class RankType > ext::set < unsigned > QuickSearch::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { ext::set < unsigned > occ; ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::QuickSearchBadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern - ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); @@ -111,41 +92,11 @@ ext::set < unsigned > QuickSearch::match ( const tree::PrefixRankedBarTree < Sym // main loop of the algorithm over all possible indexes where the pattern can start while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { - // clear the current state of variable to subtree repeat - variablesSetting.clear(); - - // index to the pattern - int j = pattern.getContent ( ).size ( ) - 1; - - // offset to the subject - int offset = i + j; - - while ( ( j >= 0 ) && ( offset >= 0 ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { - // match of symbol - offset = offset - 1; - j = j - 1; - } else if ( ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) /* && ( pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) { - // else match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - j = j - 2; - - // check nonlinear variable - if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j + 1 ] ) ) { - auto setting = variablesSetting.find ( pattern.getContent ( )[ j + 1 ] ); - - if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset + 1 ].getSymbol ( ) != setting->second ) - break; - - variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j + 1 ], repeats.getContent( )[ offset + 1 ].getSymbol ( ) ) ); - } - } else { - break; - } - } + // pair j and offset + ext::pair < int, int > jOffset = tree::exact::BackwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i ); - // match was found - if ( j == -1 ) occ.insert ( offset + 1); + // match was found + if ( jOffset.first == -1 ) occ.insert ( jOffset.second + 1); if ( i + pattern.getContent ( ).size ( ) == subject.getContent ( ).size ( ) ) { break; diff --git a/alib2algo/src/tree/properties/QuickSearchBadCharacterShiftTable.h b/alib2algo/src/tree/properties/QuickSearchBadCharacterShiftTable.h index d0cb8af17297f250213f93c932b07d7a892178c1..4895407e54b224c9cfe4e3b4a29e64c020f2b25e 100644 --- a/alib2algo/src/tree/properties/QuickSearchBadCharacterShiftTable.h +++ b/alib2algo/src/tree/properties/QuickSearchBadCharacterShiftTable.h @@ -12,6 +12,8 @@ #include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedBarNonlinearPattern.h> +#include "LastVariableOffsetBack.h" + #include <alib/set> #include <alib/map> @@ -52,11 +54,7 @@ ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > QuickSearchB // find the distance between the end of the pattern and the index // of the last symbol representing the variable - unsigned lastSOffset = pattern.getContent ( ).size ( ); - - for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); i++ ) - if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) - lastSOffset = pattern.getContent ( ).size ( ) - i; + size_t lastSOffset = LastVariableOffsetBack::offset ( pattern ); // limit the shift by occurrence of the last variable @@ -64,13 +62,13 @@ ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > QuickSearchB if ( symbol == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( symbol ) || symbol == pattern.getVariablesBar ( ) ) continue; - size_t tmp = lastSOffset; + size_t tmp = lastSOffset + 1; if ( ! pattern.getBars ( ).count ( symbol ) ) // size of the smallest subtree containing given terminal depend // on the arity of the terminal tmp += ( size_t ) symbol.getRank ( ) * 2; - else if ( tmp >= 2 ) + else // bar symbols match the variable bar which is one symbol after // the last variable, conditioned because of the case S S| where // the -1 would cause shift by 0 -- illegal @@ -81,7 +79,7 @@ ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > QuickSearchB } // limit the shift by position of symbols within the pattern - for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); i++ ) { // last symbol is concerned here + for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); i++ ) { if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) || pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) continue;