Newer
Older
/*
* KnuthMorrisPratt.h
*
* Created on: 5. 11. 2014
* Author: Jan Travnicek
*/
#ifndef _ARBOLOGY_KNUTH_MORRIS_PRATT_H_
#define _ARBOLOGY_KNUTH_MORRIS_PRATT_H_
#include <tree/properties/BorderArrayNaive.h>
#include <tree/properties/SubtreeJumpTable.h>
#include <tree/properties/ExactSubtreeRepeatsNaive.h>
#include <tree/exact/ForwardOccurrenceTest.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
#include <tree/ranked/PrefixRankedNonlinearPattern.h>
namespace arbology {
namespace exact {
/**
* Implementation of BMH for MI(E+\eps)-EVY course 2014
* To get rid of zeros in BCS table we ignore last haystack character
*/
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern );
//measurements::start("Algorithm", measurements::Type::MAIN);
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i );
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heuristics
i += j - ba[j];
}
//measurements::end();
return occ;
}
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern );
//measurements::start("Algorithm", measurements::Type::MAIN);
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i );
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heuristics
i += j - ba[j];
}
//measurements::end();
return occ;
}
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern );
//measurements::start("Algorithm", measurements::Type::MAIN);
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i );
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heristics
i += j - ba[j];
}
//measurements::end();
return occ;
}
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern );
//measurements::start("Algorithm", measurements::Type::MAIN);
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );
// index to the subject
unsigned i = 0;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
// index to the pattern
unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i );
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heristics
i += j - ba[j];
}
} /* namespace exact */
} /* namespace arbology */
#endif /* _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ */