/* * DeadZoneUsingBadCharacterShiftAndBorderArray.h * * Created on: 5. 11. 2014 * Author: Jan Travnicek */ #ifndef _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_ #define _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_ #include <alib/set> #include <alib/vector> #include <alib/map> #include <common/ranked_symbol.hpp> #include <tree/properties/ReversedBadCharacterShiftTable.h> #include <tree/properties/BorderArrayNaive.h> #include <tree/properties/SubtreeJumpTable.h> #include <tree/exact/ForwardOccurrenceTest.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedPattern.h> namespace arbology { namespace exact { /** * Implementation of DeadZone matching using bad character shift as shifting method on one direction and border array on the other */ class DeadZoneUsingBadCharacterShiftAndBorderArray { public: /** * Search for pattern in linear string. * @return set set of occurences */ template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarTree < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarPattern < SymbolType > & pattern ); template < class SymbolType > static void match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarPattern < SymbolType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedTree < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ); template < class SymbolType > static void match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ); }; template < class SymbolType > ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarTree < SymbolType > & pattern ) { return match ( subject, tree::PrefixRankedBarPattern < SymbolType > ( pattern ) ); } template < class SymbolType > ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarPattern < SymbolType > & pattern ) { ext::set < unsigned > occ; ext::map < common::ranked_symbol < SymbolType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern ext::vector < size_t > fba = tree::properties::BorderArrayNaive::construct ( pattern ); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); return occ; } template < class SymbolType > void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarPattern < SymbolType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) { if ( low >= high ) return; int i = ( low + high ) / 2; // index to the pattern unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i ); // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high ); } template < class SymbolType > ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedTree < SymbolType > & pattern ) { return match ( subject, tree::PrefixRankedPattern < SymbolType > ( pattern ) ); } template < class SymbolType > ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ) { ext::set < unsigned > occ; ext::map < common::ranked_symbol < SymbolType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern ext::vector < size_t > fba = tree::properties::BorderArrayNaive::construct ( pattern ); ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); return occ; } template < class SymbolType > void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) { if ( low >= high ) return; int i = ( low + high ) / 2; // index to the pattern unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i ); // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high ); } } /* namespace exact */ } /* namespace arbology */ #endif /* _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_ */