diff --git a/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp b/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp index 08ceca90ae7a6543a59934a34331f584bfeb3a2e..a26403119d90d3fc40e199d0217d19213ac78214 100644 --- a/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp +++ b/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp @@ -8,6 +8,7 @@ #include "BadCharacterShiftTable.h" #include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> namespace arbology { @@ -18,13 +19,19 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const } std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) { + return bcs ( tree::PrefixRankedBarNonlinearPattern ( pattern ) ); +} + +auto BadCharacterShiftTablePrefixRankedBarPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( BadCharacterShiftTable::bcs ); + +std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarNonlinearPattern & pattern ) { const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( ); std::map < alphabet::RankedSymbol, size_t > bcs; // initialisation of bcs table to the size of the pattern for ( const alphabet::RankedSymbol & symbol : alphabet ) { - if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; + if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( symbol ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) ); } @@ -34,13 +41,13 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const unsigned lastSOffset = pattern.getContent ( ).size ( ); for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); i++ ) - if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) + if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) lastSOffset = pattern.getContent ( ).size ( ) - i - 1; // limit the shift by occurrence of the last variable for ( const alphabet::RankedSymbol & symbol : alphabet ) { - if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; + if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( symbol ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; size_t tmp = lastSOffset; @@ -60,7 +67,7 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const // limit the shift by position of symbols within the pattern for ( unsigned i = 0; i < pattern.getContent ( ).size ( ) - 1; i++ ) { // last symbol is not concerned - if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) ) continue; + if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) || ( pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) ) continue; size_t tmp = pattern.getContent ( ).size ( ) - i - 1; @@ -71,7 +78,7 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const return bcs; } -auto BadCharacterShiftTablePrefixRankedBarPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( BadCharacterShiftTable::bcs ); +auto BadCharacterShiftTablePrefixRankedBarNonlinearPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarNonlinearPattern > ( BadCharacterShiftTable::bcs ); } /* namespace exact */ diff --git a/alib2algo/src/arbology/exact/BadCharacterShiftTable.h b/alib2algo/src/arbology/exact/BadCharacterShiftTable.h index fd09c8cdfc511332e28c686a80bb71474ff5a065..afcf093448263416028c7c96b3a2f25b3ab99611 100644 --- a/alib2algo/src/arbology/exact/BadCharacterShiftTable.h +++ b/alib2algo/src/arbology/exact/BadCharacterShiftTable.h @@ -33,6 +33,7 @@ public: static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern ); static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern ); + static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarNonlinearPattern & pattern ); }; diff --git a/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp index 1d0905226c05a10110847c2070b5805e128cb403..4c3b3058f42d3940366e6cb5ec3dbd0d9d134640 100644 --- a/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp +++ b/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp @@ -8,10 +8,12 @@ #include "BoyerMooreHorspool.h" #include "BadCharacterShiftTable.h" #include "SubtreeJumpTable.h" +#include "ExactSubtreeRepeatsNaive.h" #include <tree/Tree.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> #include <alphabet/RankedSymbol.h> #include <map> @@ -73,6 +75,64 @@ std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTre auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = BoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( BoyerMooreHorspool::match ); +std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarNonlinearPattern & pattern ) { + std::set < unsigned > occ; + std::map < alphabet::RankedSymbol, size_t > bcs = BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); + std::map < alphabet::RankedSymbol, alphabet::Symbol > variablesSetting; + + tree::PrefixRankedBarTree repeats = ExactSubtreeRepeatsNaive::repeats ( subject ); + + // index to the subject + unsigned i = 0; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { + // clear the current state of variable to subtree repeat + variablesSetting.clear(); + + // index to the pattern + unsigned j = pattern.getContent ( ).size ( ) - 1; + + // offset to the subject + unsigned offset = i + j; + + while ( ( j > 0 ) && ( offset > 0 ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { + // match of symbol + offset = offset - 1; + j = j - 1; + } else if ( ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) && ( pattern.getBars ( ).count ( subject.getContent ( )[offset] )) ) { + // else match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + + // check nonlinear variable + if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j - 1 ] ) ) { + auto setting = variablesSetting.find ( pattern.getContent ( )[ j - 1 ] ); + + if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset + 1 ].getSymbol ( ) != setting->second ) + break; + + variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j - 1 ], repeats.getContent( )[ offset + 1 ].getSymbol ( ) ) ); + } + j = j - 2; + } else { + break; + } + } + + // match was found + if ( j == 0 ) occ.insert ( offset ); + + // shift heuristics + i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( ) - 1]]; + } + + return occ; +} + +auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarNonlinearPattern = BoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarNonlinearPattern > ( BoyerMooreHorspool::match ); + } /* namespace exact */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/exact/BoyerMooreHorspool.h b/alib2algo/src/arbology/exact/BoyerMooreHorspool.h index c40adb0b0964df5d0bab15d4b907055be59457b3..f4391d94094afe2b3f0def4902cbff7bd14bd089 100644 --- a/alib2algo/src/arbology/exact/BoyerMooreHorspool.h +++ b/alib2algo/src/arbology/exact/BoyerMooreHorspool.h @@ -30,6 +30,7 @@ public: static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern ); static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ); + static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarNonlinearPattern & pattern ); }; diff --git a/tests.aarbology.sh b/tests.aarbology.sh index 8ac0d70e0fa1e01dff56665ddeb7db0553098e82..7fa8f9b88a72ce47484f20fc7b31f92a5b23d4cc 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -59,6 +59,10 @@ function log { echo "$5" >> $LOGFILE } +function generateNonlinearPattern { + ./arand2 -t RNP --nodes $RAND_SIZE_PATTERN --terminals $(( $RANDOM % $RAND_ALPHABET + 1 )) --height $RAND_HEIGHT_PATTERN 2>/dev/null +} + function generatePattern { ./arand2 -t RP --nodes $RAND_SIZE_PATTERN --terminals $(( $RANDOM % $RAND_ALPHABET + 1 )) --height $RAND_HEIGHT_PATTERN 2>/dev/null } @@ -259,6 +263,51 @@ function runTestPattern { clearResults } +# $1 - aconversions2 sequence +function runTestNonlinearPattern { + echo $1 + echo -ne "\t" + + initResults + + # predefined tests first + for SUBJECT_FILE in `ls $TESTS_DIR/aarbology.test*.subject.xml`; do + PATTERN_FILE=${SUBJECT_FILE%.subject.xml}.nonlinearPattern.xml + if [ -f $PATTERN_FILE ]; then + SUBJECT_FILE_COPY=$(mktemp) + PATTERN_FILE_COPY=$(mktemp) + + cat $SUBJECT_FILE > $SUBJECT_FILE_COPY + cat $PATTERN_FILE > $PATTERN_FILE_COPY + + Occs=`./aarbology2 -a exactPatternMatch -s "$SUBJECT_FILE_COPY" -p "$PATTERN_FILE_COPY" | ./astat2 -p size --set` + + runTest2 "$Occs" "$2" "$SUBJECT_FILE_COPY" "$PATTERN_FILE_COPY" + fi + done + + echo -n " | " + + # random tests + for i in $(seq 1 $TESTCASE_ITERATIONS ); + do + SUBJECT_FILE=$(mktemp) + PATTERN_FILE=$(mktemp) + + cat <(generateSubject) > $SUBJECT_FILE + cat <(generateNonlinearPattern) > $PATTERN_FILE + + Occs=`./aarbology2 -a exactPatternMatch -s "$SUBJECT_FILE" -p "$PATTERN_FILE" | ./astat2 -p size --set` + + runTest2 "$Occs" "$2" "$SUBJECT_FILE" "$PATTERN_FILE" & (( i%JOBS==0 )) && wait + done + + wait + + outputResults + clearResults +} + # $1 - aconversions2 sequence function runTestPatternEnds { echo $1 @@ -314,6 +363,8 @@ runTestSubtree "Exact Boyer Moore Horspool (Subtree PrefixRankedBar)" "./aarbolo runTestSubtree "Exact Subtree Automaton (Tree)" "./arun2 -t occurrences -a <(./aarbology2 -a exactSubtreeMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set" runTestPattern "Exact Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" +runTestNonlinearPattern "Exact Pattern Match (NonlinearPattern PrefixRankedBar)" "./aarbology2 -a exactPatternMatch -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" +runTestNonlinearPattern "Exact Boyer Moore Horspool (NonlinearPattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRankedBar)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"