diff --git a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp index e8e345eb76bbe0954b1f94b77705671bafe290fe..96171faaf20ccc859988f6b830db8dc1071dac73 100644 --- a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp +++ b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp @@ -8,7 +8,9 @@ #include "ReversedBadCharacterShiftTable.h" #include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> #include <tree/ranked/PrefixRankedPattern.h> +#include <tree/ranked/PrefixRankedNonlinearPattern.h> namespace arbology { @@ -19,13 +21,19 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs } std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) { + return bcs ( tree::PrefixRankedBarNonlinearPattern ( pattern ) ); +} + +auto ReversedBadCharacterShiftTablePrefixRankedBarPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( ReversedBadCharacterShiftTable::bcs ); + +std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedBarNonlinearPattern & pattern ) { const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( ); std::map < alphabet::RankedSymbol, size_t > bcs; // initialisation of bcs table to the size of the pattern for ( const alphabet::RankedSymbol & symbol : alphabet ) { - if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; + if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( symbol ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) ); } @@ -41,7 +49,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs // limit the shift by occurrence of the last variable for ( const alphabet::RankedSymbol & symbol : alphabet ) { - if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; + if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( symbol ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; size_t tmp = firstSBarOffset; @@ -61,7 +69,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs // limit the shift by position of symbols within the pattern for ( unsigned i = pattern.getContent ( ).size ( ) - 1; i >= 1; i-- ) { // first symbol is not concerned - if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) ) continue; + if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) || ( pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) ) continue; size_t tmp = i; @@ -72,16 +80,22 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs return bcs; } -auto ReversedBadCharacterShiftTablePrefixRankedBarPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( ReversedBadCharacterShiftTable::bcs ); +auto ReversedBadCharacterShiftTablePrefixRankedBarNonlinearPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarNonlinearPattern > ( ReversedBadCharacterShiftTable::bcs ); std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedPattern & pattern ) { + return bcs ( tree::PrefixRankedNonlinearPattern ( pattern ) ); +} + +auto ReversedBadCharacterShiftTablePrefixRankedPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedPattern > ( ReversedBadCharacterShiftTable::bcs ); + +std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedNonlinearPattern & pattern ) { const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( ); std::map < alphabet::RankedSymbol, size_t > bcs; // initialisation of bcs table to the size of the pattern for ( const alphabet::RankedSymbol & symbol : alphabet ) { - if ( symbol == pattern.getSubtreeWildcard ( ) ) continue; + if ( symbol == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( symbol ) ) continue; bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) ); } @@ -91,14 +105,14 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs unsigned firstSOffset = pattern.getContent ( ).size ( ) + 1; for ( int i = ( int ) pattern.getContent ( ).size ( ) - 1; i >= 0; i-- ) - if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) + if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) firstSOffset = i; if ( firstSOffset == 0 ) firstSOffset = 1; // limit the shift by occurrence of the last variable for ( const alphabet::RankedSymbol & symbol : alphabet ) { - if ( symbol == pattern.getSubtreeWildcard ( ) ) continue; + if ( symbol == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( symbol ) ) continue; if ( bcs[symbol] > firstSOffset ) bcs[symbol] = firstSOffset; @@ -106,7 +120,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs // limit the shift by position of symbols within the pattern for ( unsigned i = pattern.getContent ( ).size ( ) - 1; i >= 1; i-- ) { // first symbol is not concerned - if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) continue; + if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) continue; size_t tmp = i; if ( bcs[pattern.getContent ( )[i]] > tmp ) @@ -116,7 +130,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs return bcs; } -auto ReversedBadCharacterShiftTablePrefixRankedPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedPattern > ( ReversedBadCharacterShiftTable::bcs ); +auto ReversedBadCharacterShiftTablePrefixRankedNonlinearPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedNonlinearPattern > ( ReversedBadCharacterShiftTable::bcs ); } /* namespace exact */ diff --git a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h index 909c4301ca29d93b2f1cac61e5cc4fb781bb73a2..2699bfe9d8ff4a5ac41a06be18b3628b37d563f2 100644 --- a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h +++ b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h @@ -33,7 +33,9 @@ public: static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern ); static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern ); + static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarNonlinearPattern & pattern ); static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedPattern & pattern ); + static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedNonlinearPattern & pattern ); }; diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp index c6ddbfb88e4f4dbcd1c3c0d9511208c2f4f80bf2..6d79deba9493db624de7bef318607524de52ae06 100644 --- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp +++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp @@ -8,12 +8,15 @@ #include "ReversedBoyerMooreHorspool.h" #include "ReversedBadCharacterShiftTable.h" #include "SubtreeJumpTable.h" +#include "ExactSubtreeRepeatsNaive.h" #include <tree/Tree.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedPattern.h> #include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/ranked/PrefixRankedNonlinearPattern.h> +#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> #include <alphabet/RankedSymbol.h> #include <map> @@ -75,6 +78,64 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRank auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( ReversedBoyerMooreHorspool::match ); +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarNonlinearPattern & pattern ) { + std::set < unsigned > occ; + std::map < alphabet::RankedSymbol, size_t > bcs = ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); + std::map < alphabet::RankedSymbol, alphabet::Symbol > variablesSetting; + + tree::PrefixRankedBarTree repeats = ExactSubtreeRepeatsNaive::repeats ( subject ); + + // index to the subject + int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i >= 0 ) { + // clear the current state of variable to subtree repeat + variablesSetting.clear(); + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { + // match of symbol + offset = offset + 1; + j = j + 1; + } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { + // check nonlinear variable + if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { + auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); + + if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) + break; + + variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); + } + + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + j = j + 2; + } else { + break; + } + } + + // match was found + if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); + + // shift heuristics + i -= bcs[subject.getContent ( )[i]]; + } + + return occ; +} + +auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarNonlinearPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarNonlinearPattern > ( ReversedBoyerMooreHorspool::match ); + std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) { return match ( subject, tree::PrefixRankedPattern ( pattern ) ); } @@ -123,6 +184,63 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRank auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( ReversedBoyerMooreHorspool::match ); +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedNonlinearPattern & pattern ) { + std::set < unsigned > occ; + std::map < alphabet::RankedSymbol, size_t > bcs = ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); + std::map < alphabet::RankedSymbol, alphabet::Symbol > variablesSetting; + + tree::PrefixRankedTree repeats = ExactSubtreeRepeatsNaive::repeats ( subject ); + + // index to the subject + int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i >= 0 ) { + // clear the current state of variable to subtree repeat + variablesSetting.clear(); + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) + // match of symbol + offset = offset + 1; + else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { + // check nonlinear variable + if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { + auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); + + if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) + break; + + variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); + } + + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + } else + break; + + j = j + 1; + } + + // match was found + if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); + + // shift heristics + i -= bcs[subject.getContent ( )[i]]; + } + + return occ; +} + +auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedNonlinearPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedNonlinearPattern > ( ReversedBoyerMooreHorspool::match ); + } /* namespace exact */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h index 9837c35d378700041bb3caad74bd96b3a8f8e1d8..ebde38651c0940e28d51d5c6024a243f2f64b33e 100644 --- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h +++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h @@ -30,8 +30,10 @@ public: static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern ); static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ); + static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarNonlinearPattern & pattern ); static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ); static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ); + static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedNonlinearPattern & pattern ); }; diff --git a/tests.aarbology.sh b/tests.aarbology.sh index 7fa8f9b88a72ce47484f20fc7b31f92a5b23d4cc..fee36759d7d5047a0e08880ee3d3df9f8ad029c7 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -367,6 +367,8 @@ runTestNonlinearPattern "Exact Pattern Match (NonlinearPattern PrefixRankedBar)" runTestNonlinearPattern "Exact Boyer Moore Horspool (NonlinearPattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" +runTestNonlinearPattern "Exact Reversed Boyer Moore Horspool (NonlinearPattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" +runTestNonlinearPattern "Exact Reversed Boyer Moore Horspool (NonlinearPattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRankedBar)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set" runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRanked)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set" runTestPattern "Exact Dead Zone Using Bad Character Shift And Border Array (Pattern PrefixRanked)" "./aarbology2 -a deadZoneUsingBadCharacterShiftAndBorderArray -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"