Skip to content
Snippets Groups Projects
DeadZoneUsingBadCharacterShiftAndBorderArray.h 8.12 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * DeadZoneUsingBadCharacterShiftAndBorderArray.h
     *
     *  Created on: 5. 11. 2014
     *      Author: Jan Travnicek
     */
    
    #ifndef _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_
    #define _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_
    
    #include <set>
    #include <vector>
    
    #include <core/multipleDispatch.hpp>
    
    #include <tree/TreeFeatures.h>
    #include <alphabet/RankedSymbol.h>
    
    
    #include <tree/properties/ReversedBadCharacterShiftTable.h>
    #include <tree/properties/BorderArrayNaive.h>
    #include <tree/properties/SubtreeJumpTable.h>
    
    #include <tree/Tree.h>
    #include <tree/ranked/PrefixRankedBarTree.h>
    #include <tree/ranked/PrefixRankedBarPattern.h>
    #include <tree/ranked/PrefixRankedTree.h>
    #include <tree/ranked/PrefixRankedPattern.h>
    #include <alphabet/RankedSymbol.h>
    
    #include <map>
    
    
    namespace arbology {
    
    namespace exact {
    
    /**
     * Implementation of DeadZone matching using bad character shift as shifting method on one direction and border array on the other
     */
    
    class DeadZoneUsingBadCharacterShiftAndBorderArray : public alib::DoubleDispatch < DeadZoneUsingBadCharacterShiftAndBorderArray, std::set < unsigned >, const tree::TreeBase &, const tree::TreeBase & > {
    
    public:
    	/**
    	 * Search for pattern in linear string.
    	 * @return set set of occurences
    	 */
    	static std::set < unsigned > match ( const tree::Tree & subject, const tree::Tree & pattern );
    
    
    	template < class SymbolType, class RankType >
    	static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
    	template < class SymbolType, class RankType >
    	static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
    	template < class SymbolType, class RankType >
    
    	static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, std::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high );
    
    	template < class SymbolType, class RankType >
    	static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
    	template < class SymbolType, class RankType >
    	static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
    	template < class SymbolType, class RankType >
    
    	static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, std::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high );
    
    template < class SymbolType, class RankType >
    std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
    	return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
    }
    
    template < class SymbolType, class RankType >
    std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
    	std::set < unsigned > occ;
    
    	std::map < common::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
    
    	ext::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern );
    	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
    
    
    	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
    	return occ;
    }
    
    template < class SymbolType, class RankType >
    
    void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, std::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
    
    	if ( low >= high ) return;
    
    	int i = ( low + high ) / 2;
    
    	 // index to the pattern
    	unsigned j = 0;
    
    	 // offset to the subject
    	unsigned offset = i;
    
    	while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
    		if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
    			 // match of symbol
    			offset++;
    			j++;
    		} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] )  ) ) {
    			 // match of variable with subtree
    			offset = subjectSubtreeJumpTable[offset];
    			j += 2;
    		} else {
    			break;
    		}
    	}
    
    	 // match was found
    	if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
    
    	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
    	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
    }
    
    template < class SymbolType, class RankType >
    std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
    	return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
    }
    
    template < class SymbolType, class RankType >
    std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
    	std::set < unsigned > occ;
    
    	std::map < common::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
    
    	ext::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern );
    	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
    
    
    	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
    	return occ;
    }
    
    template < class SymbolType, class RankType >
    
    void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, std::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
    
    	if ( low >= high ) return;
    
    	int i = ( low + high ) / 2;
    
    	 // index to the pattern
    	unsigned j = 0;
    
    	 // offset to the subject
    	unsigned offset = i;
    
    	while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
    		if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
    			 // match of symbol
    			offset++;
    		else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
    			 // match of variable with subtree
    			offset = subjectSubtreeJumpTable[offset];
    		else
    			break;
    
    		j++;
    	}
    
    	 // match was found
    	if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
    
    	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
    	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
    }
    
    
    } /* namespace exact */
    
    } /* namespace arbology */
    
    #endif /* _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_ */