/* * DeadZoneUsingBadCharacterShiftAndBorderArray.cpp * * Created on: 5. 11. 2014 * Author: Jan Travnicek */ #include "DeadZoneUsingBadCharacterShiftAndBorderArray.h" #include "ReversedBadCharacterShiftTable.h" #include "BorderArrayNaive.h" #include "SubtreeJumpTable.h" #include <tree/Tree.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedPattern.h> #include <alphabet/RankedSymbol.h> #include <map> namespace arbology { namespace exact { std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::Tree & subject, const tree::Tree & pattern ) { return dispatch ( subject.getData ( ), pattern.getData ( ) ); } std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern ) { return match ( subject, tree::PrefixRankedBarPattern ( pattern ) ); } auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedBarTreePrefixRankedBarTree = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarTree > ( DeadZoneUsingBadCharacterShiftAndBorderArray::match ); std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ) { std::set < unsigned > occ; std::map < alphabet::RankedSymbol, size_t > bbcs = ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern std::vector < size_t > fba = BorderArrayNaive::ba ( pattern ); std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); return occ; } void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern, std::vector < size_t > & fba, std::map < alphabet::RankedSymbol, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ) { if ( low >= high ) return; int i = ( low + high ) / 2; // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { // match of symbol offset++; j++; } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; j += 2; } else { break; } } // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high ); } auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedBarTreePrefixRankedBarPattern = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( DeadZoneUsingBadCharacterShiftAndBorderArray::match ); std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) { return match ( subject, tree::PrefixRankedPattern ( pattern ) ); } auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedTreePrefixRankedTree = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedTree > ( DeadZoneUsingBadCharacterShiftAndBorderArray::match ); std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ) { std::set < unsigned > occ; std::map < alphabet::RankedSymbol, size_t > bbcs = ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern std::vector < size_t > fba = BorderArrayNaive::ba ( pattern ); std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); return occ; } void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern, std::vector < size_t > & fba, std::map < alphabet::RankedSymbol, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ) { if ( low >= high ) return; int i = ( low + high ) / 2; // index to the pattern unsigned j = 0; // offset to the subject unsigned offset = i; while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) // match of symbol offset++; else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; else break; j++; } // match was found if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 ); match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high ); } auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedTreePrefixRankedPattern = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( DeadZoneUsingBadCharacterShiftAndBorderArray::match ); } /* namespace exact */ } /* namespace arbology */