Newer
Older
/*
* KnuthMorrisPratt.h
*
* Created on: 5. 11. 2014
* Author: Jan Travnicek
*/
#ifndef _ARBOLOGY_KNUTH_MORRIS_PRATT_H_
#define _ARBOLOGY_KNUTH_MORRIS_PRATT_H_
#include <set>
#include <vector>
#include <tree/properties/BorderArrayNaive.h>
#include <tree/properties/SubtreeJumpTable.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
namespace arbology {
namespace exact {
/**
* Implementation of BMH for MI(E+\eps)-EVY course 2014
* To get rid of zeros in BCS table we ignore last haystack character
*/
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern );
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset++;
j++;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j += 2;
} else {
break;
}
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heuristics
i += j - ba[j];
}
return occ;
}
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern );
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset++;
j++;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j++;
} else {
break;
}
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heristics
i += j - ba[j];
}
return occ;
}
} /* namespace exact */
} /* namespace arbology */
#endif /* _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ */