Newer
Older
/*
* DeadZoneUsingBadCharacterShiftAndBorderArray.h
*
* Created on: 5. 11. 2014
* Author: Jan Travnicek
*/
#ifndef _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_
#define _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_
#include <set>
#include <vector>
#include <tree/TreeFeatures.h>
#include <alphabet/RankedSymbol.h>
#include <tree/properties/ReversedBadCharacterShiftTable.h>
#include <tree/properties/BorderArrayNaive.h>
#include <tree/properties/SubtreeJumpTable.h>
#include <tree/Tree.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
#include <alphabet/RankedSymbol.h>
#include <map>
namespace arbology {
namespace exact {
/**
* Implementation of DeadZone matching using bad character shift as shifting method on one direction and border array on the other
*/
class DeadZoneUsingBadCharacterShiftAndBorderArray : public alib::DoubleDispatch < DeadZoneUsingBadCharacterShiftAndBorderArray, std::set < unsigned >, const tree::TreeBase &, const tree::TreeBase & > {
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
static std::set < unsigned > match ( const tree::Tree & subject, const tree::Tree & pattern );
template < class SymbolType, class RankType >
static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, std::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high );
template < class SymbolType, class RankType >
static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, std::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high );
template < class SymbolType, class RankType >
std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
std::set < unsigned > occ;
std::map < common::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
ext::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern );
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
return occ;
}
template < class SymbolType, class RankType >
void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, std::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
if ( low >= high ) return;
int i = ( low + high ) / 2;
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset++;
j++;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j += 2;
} else {
break;
}
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
}
template < class SymbolType, class RankType >
std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
std::set < unsigned > occ;
std::map < common::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
ext::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern );
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
return occ;
}
template < class SymbolType, class RankType >
void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, std::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
if ( low >= high ) return;
int i = ( low + high ) / 2;
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
// match of symbol
offset++;
else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
else
break;
j++;
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
}
} /* namespace exact */
} /* namespace arbology */
#endif /* _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_ */