Newer
Older
/*
* KnuthMorrisPratt.cpp
*
* Created on: 5. 11. 2014
* Author: Jan Travnicek
*/
#include "KnuthMorrisPratt.h"
#include "BorderArrayNaive.h"
#include "SubtreeJumpTable.h"
#include <exception/AlibException.h>
#include <tree/Tree.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include <alphabet/RankedSymbol.h>
#include <map>
namespace arbology {
namespace exact {
std::set < unsigned > KnuthMorrisPratt::match ( const tree::Tree & subject, const tree::Tree & pattern ) {
return getInstance ( ).dispatch ( subject.getData ( ), pattern.getData ( ) );
}
std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern ) {
return match ( subject, tree::PrefixRankedBarPattern ( pattern ) );
}
auto KnuthMorrisPrattPrefixRankedBarTreePrefixRankedBarTree = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarTree > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ) {
std::set < unsigned > occ;
std::vector < size_t > ba = BorderArrayNaive::ba ( pattern );
std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset++;
j++;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( subject.getContent ( )[offset].getSymbol ( ) != pattern.getBarSymbol ( ) ) ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j += 2;
} else {
break;
}
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heristics
i += j - ba[j];
}
return occ;
}
auto KnuthMorrisPrattPrefixRankedBarTreePrefixRankedBarPattern = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) {
return match ( subject, tree::PrefixRankedPattern ( pattern ) );
}
auto KnuthMorrisPrattPrefixRankedTreePrefixRankedTree = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedTree > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ) {
std::set < unsigned > occ;
std::vector < size_t > ba = BorderArrayNaive::ba ( pattern );
std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset++;
j++;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j++;
} else {
break;
}
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heristics
i += j - ba[j];
}
return occ;
}
auto KnuthMorrisPrattPrefixRankedTreePrefixRankedPattern = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
} /* namespace exact */
} /* namespace arbology */