diff --git a/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp b/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp index 6a77943a823bd813731769b9d876ef452e1fd65a..14c200ebbfc6bd86464cce0d98366ee35076bb11 100644 --- a/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp +++ b/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp @@ -32,7 +32,7 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const // find the distance between the end of the pattern and the index // of the last symbol representing the variable - int lastSOffset = pattern.getContent ( ).size ( ); + unsigned lastSOffset = pattern.getContent ( ).size ( ); for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); i++ ) if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) diff --git a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp index 4ce91df34a727bb605aad019f70d4f7c7aaba6ab..4fc72d00f26ade7c5535202332ae2ce2a3498306 100644 --- a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp +++ b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp @@ -9,6 +9,7 @@ #include <exception/AlibException.h> #include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/ranked/PrefixRankedPattern.h> namespace arbology { @@ -32,7 +33,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs // find the distance between the beginning of the pattern and the index // of the first symbol representing the variable's bar - int firstSBarOffset = pattern.getContent ( ).size ( ) + 1; + unsigned firstSBarOffset = pattern.getContent ( ).size ( ) + 1; for (int i = (int) pattern.getContent ( ).size ( ) - 1; i >= 0; i--) { if ( pattern.getContent ( )[i].getSymbol() == pattern.getVariablesBarSymbol ( ) ) firstSBarOffset = i; @@ -72,6 +73,50 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs auto ReversedBadCharacterShiftTablePrefixRankedBarPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( ReversedBadCharacterShiftTable::getInstance ( ), ReversedBadCharacterShiftTable::bcs ); +std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedPattern & pattern ) { + const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( ); + + std::map < alphabet::RankedSymbol, size_t > bcs; + + // initialisation of bcs table to the size of the pattern + for ( const alphabet::RankedSymbol & symbol : alphabet ) { + if ( symbol == pattern.getSubtreeWildcard ( ) ) continue; + + bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) ); + } + + // find the distance between the beginning of the pattern and the index + // of the first symbol representing the variable's bar + unsigned firstSOffset = pattern.getContent ( ).size ( ) + 1; + for (int i = (int) pattern.getContent ( ).size ( ) - 1; i >= 0; i--) { + if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) + firstSOffset = i; + } + + if(firstSOffset == 0) firstSOffset = 1; + + // limit the shift by occurrence of the last variable + for ( const alphabet::RankedSymbol & symbol : alphabet ) { + if ( symbol == pattern.getSubtreeWildcard ( ) ) continue; + + if ( bcs[symbol] > firstSOffset ) + bcs[symbol] = firstSOffset; + } + + // limit the shift by position of symbols within the pattern + for ( unsigned i = pattern.getContent ( ).size ( ) - 1; i >= 1 ; i-- ) // first symbol is not concerned + if ( pattern.getContent ( )[i] != pattern.getSubtreeWildcard ( ) ) { + size_t tmp = i; + + if ( bcs[pattern.getContent ( )[i]] > tmp ) + bcs[pattern.getContent ( )[i]] = tmp; + } + + return bcs; +} + +auto ReversedBadCharacterShiftTablePrefixRankedPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedPattern > ( ReversedBadCharacterShiftTable::getInstance ( ), ReversedBadCharacterShiftTable::bcs ); + } /* namespace exact */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h index 8777d9e4c42cb00a4b1ec3b0bd5bec58bdef69dc..a7552e37a3d44c2ba1d92997afd44ee14983cd6d 100644 --- a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h +++ b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h @@ -33,6 +33,7 @@ public: static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern ); static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern ); + static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedPattern & pattern ); static ReversedBadCharacterShiftTable & getInstance ( ) { static ReversedBadCharacterShiftTable res; diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp index 9e352c8ad75ee0a4dd2e346677a92e37ef83946c..e3d2df93222828e9f06004e24edfdbd4758d7905 100644 --- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp +++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp @@ -11,7 +11,9 @@ #include <exception/AlibException.h> #include <tree/Tree.h> +#include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedBarTree.h> +#include <tree/ranked/PrefixRankedPattern.h> #include <tree/ranked/PrefixRankedBarPattern.h> #include <alphabet/RankedSymbol.h> @@ -74,6 +76,54 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRank auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( ReversedBoyerMooreHorspool::getInstance ( ), ReversedBoyerMooreHorspool::match ); +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) { + return match ( subject, tree::PrefixRankedPattern ( pattern ) ); +} + +auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedTree = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedTree > ( ReversedBoyerMooreHorspool::getInstance ( ), ReversedBoyerMooreHorspool::match ); + +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ) { + std::set < unsigned > occ; + std::map < alphabet::RankedSymbol, size_t > bcs = ReversedBadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); + + // index to the subject + int i = (int) subject.getContent().size() - pattern.getContent().size() + 1; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i >= 0 ) { + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent().size() ) && ( offset < subject.getContent().size() ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { + // match of symbol + offset = offset + 1; + } else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) { + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + } else { + break; + } + j = j + 1; + } + + // match was found + if ( j == pattern.getContent().size() ) occ.insert ( i ); + + // shift heristics + i -= bcs[subject.getContent ( )[i]]; + } + + return occ; +} + +auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( ReversedBoyerMooreHorspool::getInstance ( ), ReversedBoyerMooreHorspool::match ); + } /* namespace exact */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h index 637d2fb52d5988403fe5ed49d0687a657117c8e1..0f7c7c621085aa2a3c7b18e92d6b0e3bac7c5611 100644 --- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h +++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h @@ -30,6 +30,8 @@ public: static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern ); static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ); + static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ); + static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ); static ReversedBoyerMooreHorspool & getInstance ( ) { static ReversedBoyerMooreHorspool res; diff --git a/tests.aarbology.sh b/tests.aarbology.sh index de707b0b62aa88e0abf78f951ac6e5db04083e28..38be0dae539920c9d0eecd195f2ac166a2e56850 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -268,6 +268,7 @@ runTestSubtree "Exact Subtree Automaton (Tree)" "./arun2 -t occurrences -a <(./a runTestPattern "Exact Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" +runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRankedBar)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set" runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRanked)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set" runTestPattern "Exact Pattern Matching Automaton (Pattern Tree)" "./arun2 -t occurrences -a <(./aarbology2 -a exactPatternMatchingAutomaton -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"