Skip to content
Snippets Groups Projects
Commit 3bfb1046 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

kmp for tree patterns on prefix ranked notation

parent 96d09b3e
No related branches found
No related tags found
No related merge requests found
......@@ -10,6 +10,7 @@
 
#include <exception/AlibException.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedPattern.h>
 
namespace arbology {
 
......@@ -62,6 +63,49 @@ std::vector < size_t > BorderArrayNaive::ba ( const tree::PrefixRankedBarPattern
 
auto BorderArrayPrefixRankedBarPattern = BorderArrayNaive::RegistratorWrapper < std::vector < size_t >, tree::PrefixRankedBarPattern > ( BorderArrayNaive::getInstance ( ), BorderArrayNaive::ba );
 
bool BorderArrayNaive::matches ( const tree::PrefixRankedPattern & pattern, const std::vector < int > & subtreeJumpTable, int stop, int offset ) {
unsigned i = 1;
while ( offset < stop && i < pattern.getContent ( ).size ( ) )
if ( pattern.getContent ( )[i] == pattern.getContent ( )[offset] ) {
i++;
offset++;
} else if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getContent ( )[offset] == pattern.getSubtreeWildcard ( ) ) ) {
i = subtreeJumpTable[i];
offset = subtreeJumpTable[offset];
} else {
return false;
}
return true;
}
std::vector < size_t > BorderArrayNaive::ba ( const tree::PrefixRankedPattern & pattern ) {
std::vector < int > patternSubtreeJumpTable = SubtreeJumpTable::compute ( pattern );
std::vector < size_t > res;
for ( unsigned i = 0; i <= pattern.getContent ( ).size ( ); i++ )
res.push_back ( 0 );
res[0] = -1;
for ( unsigned i = 1; i <= pattern.getContent ( ).size ( ); i++ ) {
int min = i;
for ( unsigned j = 1; j < i; j++ )
if ( matches ( pattern, patternSubtreeJumpTable, i, j ) ) {
min = j;
break;
}
res[i] = i - min;
}
return res;
}
auto BorderArrayPrefixRankedPattern = BorderArrayNaive::RegistratorWrapper < std::vector < size_t >, tree::PrefixRankedPattern > ( BorderArrayNaive::getInstance ( ), BorderArrayNaive::ba );
} /* namespace exact */
 
} /* namespace arbology */
......@@ -25,6 +25,8 @@ namespace exact {
class BorderArrayNaive : public std::SingleDispatch < std::vector < size_t >, tree::RankedTreeBase > {
static bool matches ( const tree::PrefixRankedBarPattern & pattern, const std::vector < int > & subtreeJumpTable, int stop, int offset );
 
static bool matches ( const tree::PrefixRankedPattern & pattern, const std::vector < int > & subtreeJumpTable, int stop, int offset );
public:
/**
* Search for pattern in linear string.
......@@ -38,6 +40,12 @@ public:
*/
static std::vector < size_t > ba ( const tree::PrefixRankedBarPattern & pattern );
 
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
static std::vector < size_t > ba ( const tree::PrefixRankedPattern & pattern );
static BorderArrayNaive & getInstance ( ) {
static BorderArrayNaive res;
 
......
......@@ -13,6 +13,8 @@
#include <tree/Tree.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
#include <alphabet/RankedSymbol.h>
 
#include <map>
......@@ -74,6 +76,55 @@ std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree
 
auto KnuthMorrisPrattPrefixRankedBarTreePrefixRankedBarPattern = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
 
std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) {
return match ( subject, tree::PrefixRankedPattern ( pattern ) );
}
auto KnuthMorrisPrattPrefixRankedTreePrefixRankedTree = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedTree > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ) {
std::set < unsigned > occ;
std::vector < size_t > ba = BorderArrayNaive::ba ( pattern );
std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset++;
j++;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j++;
} else {
break;
}
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heristics
i += j - ba[j];
}
return occ;
}
auto KnuthMorrisPrattPrefixRankedTreePrefixRankedPattern = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
} /* namespace exact */
 
} /* namespace arbology */
......@@ -31,6 +31,9 @@ public:
static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern );
static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern );
 
static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern );
static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern );
static KnuthMorrisPratt & getInstance ( ) {
static KnuthMorrisPratt res;
 
......
......@@ -13,6 +13,7 @@
#include <deque>
 
#include "RankedPattern.h"
#include "PrefixRankedTree.h"
 
#include "../../sax/FromXMLParserHelper.h"
#include "../common/TreeFromXMLParser.h"
......@@ -23,6 +24,8 @@
#include "../../XmlApi.hpp"
#include "../../CastApi.hpp"
 
#include "../../alphabet/SubtreeWildcardSymbol.h"
namespace tree {
 
PrefixRankedPattern::PrefixRankedPattern ( alphabet::RankedSymbol subtreeWildcard, std::set < alphabet::RankedSymbol > alphabet, std::vector < alphabet::RankedSymbol > data ) : RankedPatternAlphabet ( std::move ( subtreeWildcard ) ) {
......@@ -37,6 +40,9 @@ PrefixRankedPattern::PrefixRankedPattern ( alphabet::RankedSymbol subtreeWildcar
m_Data = std::move ( data );
}
 
PrefixRankedPattern::PrefixRankedPattern ( const PrefixRankedTree & tree ) : PrefixRankedPattern ( alphabet::RankedSymbol ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ), tree.getAlphabet ( ), tree.getContent ( ) ) {
}
PrefixRankedPattern::PrefixRankedPattern ( const RankedPattern & tree ) : RankedPatternAlphabet ( tree.getSubtreeWildcard ( ) ) {
toPrefixRanked ( tree.getRoot ( ) );
addSymbolsToAlphabet ( tree.getAlphabet ( ) );
......
......@@ -19,6 +19,7 @@
namespace tree {
 
class RankedPattern;
class PrefixRankedTree;
 
/**
* Represents regular expression parsed from the XML. Regular expression is stored
......@@ -32,6 +33,7 @@ class PrefixRankedPattern : public RankedTreeBase, public RankedPatternAlphabet
public:
explicit PrefixRankedPattern ( alphabet::RankedSymbol subtreeWildcard, std::set < alphabet::RankedSymbol > alphabet, std::vector < alphabet::RankedSymbol > data );
explicit PrefixRankedPattern ( alphabet::RankedSymbol subtreeWildcard, std::vector < alphabet::RankedSymbol > data );
explicit PrefixRankedPattern ( const PrefixRankedTree & tree );
explicit PrefixRankedPattern ( const RankedPattern & tree );
 
virtual RankedTreeBase * clone ( ) const;
......
......@@ -214,4 +214,5 @@ runTestSubtree "Exact Subtree Automaton" "./arun2 -t occurrences -a <(./aarbolog
 
runTestPattern "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestPattern "Exact Knuth Morris Pratt" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
runTestPattern "Exact Knuth Morris Pratt" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
runTestPattern "Exact Pattern Automaton" "./arun2 -t occurrences -a <(./aarbology2 -a exactPatternMatchingAutomaton -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment