Skip to content
Snippets Groups Projects
Commit 07a6ac05 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

bcs on nonlinear tree patterns

parent d7dd8f34
No related branches found
No related tags found
No related merge requests found
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "BadCharacterShiftTable.h" #include "BadCharacterShiftTable.h"
   
#include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
   
namespace arbology { namespace arbology {
   
...@@ -18,13 +19,19 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const ...@@ -18,13 +19,19 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const
} }
   
std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) { std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) {
return bcs ( tree::PrefixRankedBarNonlinearPattern ( pattern ) );
}
auto BadCharacterShiftTablePrefixRankedBarPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( BadCharacterShiftTable::bcs );
std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarNonlinearPattern & pattern ) {
const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( ); const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( );
   
std::map < alphabet::RankedSymbol, size_t > bcs; std::map < alphabet::RankedSymbol, size_t > bcs;
   
// initialisation of bcs table to the size of the pattern // initialisation of bcs table to the size of the pattern
for ( const alphabet::RankedSymbol & symbol : alphabet ) { for ( const alphabet::RankedSymbol & symbol : alphabet ) {
if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( symbol ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue;
   
bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) ); bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) );
} }
...@@ -34,13 +41,13 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const ...@@ -34,13 +41,13 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const
unsigned lastSOffset = pattern.getContent ( ).size ( ); unsigned lastSOffset = pattern.getContent ( ).size ( );
   
for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); i++ ) for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); i++ )
if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) )
lastSOffset = pattern.getContent ( ).size ( ) - i - 1; lastSOffset = pattern.getContent ( ).size ( ) - i - 1;
   
// limit the shift by occurrence of the last variable // limit the shift by occurrence of the last variable
   
for ( const alphabet::RankedSymbol & symbol : alphabet ) { for ( const alphabet::RankedSymbol & symbol : alphabet ) {
if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue; if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( symbol ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue;
   
size_t tmp = lastSOffset; size_t tmp = lastSOffset;
   
...@@ -60,7 +67,7 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const ...@@ -60,7 +67,7 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const
   
// limit the shift by position of symbols within the pattern // limit the shift by position of symbols within the pattern
for ( unsigned i = 0; i < pattern.getContent ( ).size ( ) - 1; i++ ) { // last symbol is not concerned for ( unsigned i = 0; i < pattern.getContent ( ).size ( ) - 1; i++ ) { // last symbol is not concerned
if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) ) continue; if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) || ( pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) ) continue;
   
size_t tmp = pattern.getContent ( ).size ( ) - i - 1; size_t tmp = pattern.getContent ( ).size ( ) - i - 1;
   
...@@ -71,7 +78,7 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const ...@@ -71,7 +78,7 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const
return bcs; return bcs;
} }
   
auto BadCharacterShiftTablePrefixRankedBarPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( BadCharacterShiftTable::bcs ); auto BadCharacterShiftTablePrefixRankedBarNonlinearPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarNonlinearPattern > ( BadCharacterShiftTable::bcs );
   
} /* namespace exact */ } /* namespace exact */
   
......
...@@ -33,6 +33,7 @@ public: ...@@ -33,6 +33,7 @@ public:
static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern ); static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern );
   
static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern ); static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern );
static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarNonlinearPattern & pattern );
   
}; };
   
......
...@@ -8,10 +8,12 @@ ...@@ -8,10 +8,12 @@
#include "BoyerMooreHorspool.h" #include "BoyerMooreHorspool.h"
#include "BadCharacterShiftTable.h" #include "BadCharacterShiftTable.h"
#include "SubtreeJumpTable.h" #include "SubtreeJumpTable.h"
#include "ExactSubtreeRepeatsNaive.h"
   
#include <tree/Tree.h> #include <tree/Tree.h>
#include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
#include <alphabet/RankedSymbol.h> #include <alphabet/RankedSymbol.h>
   
#include <map> #include <map>
...@@ -73,6 +75,64 @@ std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTre ...@@ -73,6 +75,64 @@ std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTre
   
auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = BoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( BoyerMooreHorspool::match ); auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = BoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( BoyerMooreHorspool::match );
   
std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarNonlinearPattern & pattern ) {
std::set < unsigned > occ;
std::map < alphabet::RankedSymbol, size_t > bcs = BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern
std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
std::map < alphabet::RankedSymbol, alphabet::Symbol > variablesSetting;
tree::PrefixRankedBarTree repeats = ExactSubtreeRepeatsNaive::repeats ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// clear the current state of variable to subtree repeat
variablesSetting.clear();
// index to the pattern
unsigned j = pattern.getContent ( ).size ( ) - 1;
// offset to the subject
unsigned offset = i + j;
while ( ( j > 0 ) && ( offset > 0 ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset = offset - 1;
j = j - 1;
} else if ( ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) && ( pattern.getBars ( ).count ( subject.getContent ( )[offset] )) ) {
// else match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
// check nonlinear variable
if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j - 1 ] ) ) {
auto setting = variablesSetting.find ( pattern.getContent ( )[ j - 1 ] );
if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset + 1 ].getSymbol ( ) != setting->second )
break;
variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j - 1 ], repeats.getContent( )[ offset + 1 ].getSymbol ( ) ) );
}
j = j - 2;
} else {
break;
}
}
// match was found
if ( j == 0 ) occ.insert ( offset );
// shift heuristics
i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( ) - 1]];
}
return occ;
}
auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarNonlinearPattern = BoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarNonlinearPattern > ( BoyerMooreHorspool::match );
} /* namespace exact */ } /* namespace exact */
   
} /* namespace arbology */ } /* namespace arbology */
...@@ -30,6 +30,7 @@ public: ...@@ -30,6 +30,7 @@ public:
   
static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern ); static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern );
static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ); static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern );
static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarNonlinearPattern & pattern );
   
}; };
   
......
...@@ -59,6 +59,10 @@ function log { ...@@ -59,6 +59,10 @@ function log {
echo "$5" >> $LOGFILE echo "$5" >> $LOGFILE
} }
   
function generateNonlinearPattern {
./arand2 -t RNP --nodes $RAND_SIZE_PATTERN --terminals $(( $RANDOM % $RAND_ALPHABET + 1 )) --height $RAND_HEIGHT_PATTERN 2>/dev/null
}
function generatePattern { function generatePattern {
./arand2 -t RP --nodes $RAND_SIZE_PATTERN --terminals $(( $RANDOM % $RAND_ALPHABET + 1 )) --height $RAND_HEIGHT_PATTERN 2>/dev/null ./arand2 -t RP --nodes $RAND_SIZE_PATTERN --terminals $(( $RANDOM % $RAND_ALPHABET + 1 )) --height $RAND_HEIGHT_PATTERN 2>/dev/null
} }
...@@ -259,6 +263,51 @@ function runTestPattern { ...@@ -259,6 +263,51 @@ function runTestPattern {
clearResults clearResults
} }
   
# $1 - aconversions2 sequence
function runTestNonlinearPattern {
echo $1
echo -ne "\t"
initResults
# predefined tests first
for SUBJECT_FILE in `ls $TESTS_DIR/aarbology.test*.subject.xml`; do
PATTERN_FILE=${SUBJECT_FILE%.subject.xml}.nonlinearPattern.xml
if [ -f $PATTERN_FILE ]; then
SUBJECT_FILE_COPY=$(mktemp)
PATTERN_FILE_COPY=$(mktemp)
cat $SUBJECT_FILE > $SUBJECT_FILE_COPY
cat $PATTERN_FILE > $PATTERN_FILE_COPY
Occs=`./aarbology2 -a exactPatternMatch -s "$SUBJECT_FILE_COPY" -p "$PATTERN_FILE_COPY" | ./astat2 -p size --set`
runTest2 "$Occs" "$2" "$SUBJECT_FILE_COPY" "$PATTERN_FILE_COPY"
fi
done
echo -n " | "
# random tests
for i in $(seq 1 $TESTCASE_ITERATIONS );
do
SUBJECT_FILE=$(mktemp)
PATTERN_FILE=$(mktemp)
cat <(generateSubject) > $SUBJECT_FILE
cat <(generateNonlinearPattern) > $PATTERN_FILE
Occs=`./aarbology2 -a exactPatternMatch -s "$SUBJECT_FILE" -p "$PATTERN_FILE" | ./astat2 -p size --set`
runTest2 "$Occs" "$2" "$SUBJECT_FILE" "$PATTERN_FILE" & (( i%JOBS==0 )) && wait
done
wait
outputResults
clearResults
}
# $1 - aconversions2 sequence # $1 - aconversions2 sequence
function runTestPatternEnds { function runTestPatternEnds {
echo $1 echo $1
...@@ -314,6 +363,8 @@ runTestSubtree "Exact Boyer Moore Horspool (Subtree PrefixRankedBar)" "./aarbolo ...@@ -314,6 +363,8 @@ runTestSubtree "Exact Boyer Moore Horspool (Subtree PrefixRankedBar)" "./aarbolo
runTestSubtree "Exact Subtree Automaton (Tree)" "./arun2 -t occurrences -a <(./aarbology2 -a exactSubtreeMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set" runTestSubtree "Exact Subtree Automaton (Tree)" "./arun2 -t occurrences -a <(./aarbology2 -a exactSubtreeMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
   
runTestPattern "Exact Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestNonlinearPattern "Exact Pattern Match (NonlinearPattern PrefixRankedBar)" "./aarbology2 -a exactPatternMatch -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestNonlinearPattern "Exact Boyer Moore Horspool (NonlinearPattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRankedBar)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set" runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRankedBar)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment