Skip to content
Snippets Groups Projects
Commit 06be2ab9 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

+Reversed BMH on PrefixRanked notation

parent 60f22939
No related branches found
No related tags found
No related merge requests found
......@@ -32,7 +32,7 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const
 
// find the distance between the end of the pattern and the index
// of the last symbol representing the variable
int lastSOffset = pattern.getContent ( ).size ( );
unsigned lastSOffset = pattern.getContent ( ).size ( );
 
for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); i++ )
if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) )
......
......@@ -9,6 +9,7 @@
 
#include <exception/AlibException.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedPattern.h>
 
namespace arbology {
 
......@@ -32,7 +33,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 
// find the distance between the beginning of the pattern and the index
// of the first symbol representing the variable's bar
int firstSBarOffset = pattern.getContent ( ).size ( ) + 1;
unsigned firstSBarOffset = pattern.getContent ( ).size ( ) + 1;
for (int i = (int) pattern.getContent ( ).size ( ) - 1; i >= 0; i--) {
if ( pattern.getContent ( )[i].getSymbol() == pattern.getVariablesBarSymbol ( ) )
firstSBarOffset = i;
......@@ -72,6 +73,50 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 
auto ReversedBadCharacterShiftTablePrefixRankedBarPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( ReversedBadCharacterShiftTable::getInstance ( ), ReversedBadCharacterShiftTable::bcs );
 
std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedPattern & pattern ) {
const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( );
std::map < alphabet::RankedSymbol, size_t > bcs;
// initialisation of bcs table to the size of the pattern
for ( const alphabet::RankedSymbol & symbol : alphabet ) {
if ( symbol == pattern.getSubtreeWildcard ( ) ) continue;
bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) );
}
// find the distance between the beginning of the pattern and the index
// of the first symbol representing the variable's bar
unsigned firstSOffset = pattern.getContent ( ).size ( ) + 1;
for (int i = (int) pattern.getContent ( ).size ( ) - 1; i >= 0; i--) {
if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) )
firstSOffset = i;
}
if(firstSOffset == 0) firstSOffset = 1;
// limit the shift by occurrence of the last variable
for ( const alphabet::RankedSymbol & symbol : alphabet ) {
if ( symbol == pattern.getSubtreeWildcard ( ) ) continue;
if ( bcs[symbol] > firstSOffset )
bcs[symbol] = firstSOffset;
}
// limit the shift by position of symbols within the pattern
for ( unsigned i = pattern.getContent ( ).size ( ) - 1; i >= 1 ; i-- ) // first symbol is not concerned
if ( pattern.getContent ( )[i] != pattern.getSubtreeWildcard ( ) ) {
size_t tmp = i;
if ( bcs[pattern.getContent ( )[i]] > tmp )
bcs[pattern.getContent ( )[i]] = tmp;
}
return bcs;
}
auto ReversedBadCharacterShiftTablePrefixRankedPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedPattern > ( ReversedBadCharacterShiftTable::getInstance ( ), ReversedBadCharacterShiftTable::bcs );
} /* namespace exact */
 
} /* namespace arbology */
......@@ -33,6 +33,7 @@ public:
static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern );
 
static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern );
static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedPattern & pattern );
 
static ReversedBadCharacterShiftTable & getInstance ( ) {
static ReversedBadCharacterShiftTable res;
......
......@@ -11,7 +11,9 @@
 
#include <exception/AlibException.h>
#include <tree/Tree.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <alphabet/RankedSymbol.h>
 
......@@ -74,6 +76,54 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRank
 
auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( ReversedBoyerMooreHorspool::getInstance ( ), ReversedBoyerMooreHorspool::match );
 
std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) {
return match ( subject, tree::PrefixRankedPattern ( pattern ) );
}
auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedTree = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedTree > ( ReversedBoyerMooreHorspool::getInstance ( ), ReversedBoyerMooreHorspool::match );
std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ) {
std::set < unsigned > occ;
std::map < alphabet::RankedSymbol, size_t > bcs = ReversedBadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern
std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
// index to the subject
int i = (int) subject.getContent().size() - pattern.getContent().size() + 1;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i >= 0 ) {
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent().size() ) && ( offset < subject.getContent().size() ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset = offset + 1;
} else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
} else {
break;
}
j = j + 1;
}
// match was found
if ( j == pattern.getContent().size() ) occ.insert ( i );
// shift heristics
i -= bcs[subject.getContent ( )[i]];
}
return occ;
}
auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( ReversedBoyerMooreHorspool::getInstance ( ), ReversedBoyerMooreHorspool::match );
} /* namespace exact */
 
} /* namespace arbology */
......@@ -30,6 +30,8 @@ public:
 
static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern );
static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern );
static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern );
static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern );
 
static ReversedBoyerMooreHorspool & getInstance ( ) {
static ReversedBoyerMooreHorspool res;
......
......@@ -268,6 +268,7 @@ runTestSubtree "Exact Subtree Automaton (Tree)" "./arun2 -t occurrences -a <(./a
 
runTestPattern "Exact Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRankedBar)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRanked)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
runTestPattern "Exact Pattern Matching Automaton (Pattern Tree)" "./arun2 -t occurrences -a <(./aarbology2 -a exactPatternMatchingAutomaton -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment