Newer
Older
/*
* DeadZoneUsingBadCharacterShiftAndBorderArray.h
*
* Created on: 5. 11. 2014
* Author: Jan Travnicek
*/
#ifndef _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_
#define _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_
#include <alib/set>
#include <alib/vector>
#include <alib/map>
#include <tree/properties/ReversedBadCharacterShiftTable.h>
#include <tree/properties/BorderArrayNaive.h>
#include <tree/properties/SubtreeJumpTable.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
namespace arbology {
namespace exact {
/**
* Implementation of DeadZone matching using bad character shift as shifting method on one direction and border array on the other
*/
class DeadZoneUsingBadCharacterShiftAndBorderArray {
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static void match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static void match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high );
template < class SymbolType, class RankType >
ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
ext::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern );
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
return occ;
}
template < class SymbolType, class RankType >
void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
if ( low >= high ) return;
int i = ( low + high ) / 2;
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset++;
j++;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j += 2;
} else {
break;
}
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
}
template < class SymbolType, class RankType >
ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
ext::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern );
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
return occ;
}
template < class SymbolType, class RankType >
void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
if ( low >= high ) return;
int i = ( low + high ) / 2;
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
// match of symbol
offset++;
else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
else
break;
j++;
}
// match was found
if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
}
} /* namespace exact */
} /* namespace arbology */
#endif /* _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_ */