Skip to content
Snippets Groups Projects
ReversedBoyerMooreHorspool.h 10.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * ReversedBoyerMooreHorspool.h
     *
     *  Created on: 5. 11. 2014
     *      Author: Jan Travnicek
     */
    
    #ifndef _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_
    #define _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_
    
    
    #include <alib/set>
    #include <alib/map>
    
    Jan Trávníček's avatar
    Jan Trávníček committed
    #include <common/ranked_symbol.hpp>
    
    #include <tree/properties/ReversedBadCharacterShiftTable.h>
    #include <tree/properties/SubtreeJumpTable.h>
    #include <tree/properties/ExactSubtreeRepeatsNaive.h>
    
    #include <tree/ranked/PrefixRankedTree.h>
    #include <tree/ranked/PrefixRankedBarTree.h>
    #include <tree/ranked/PrefixRankedPattern.h>
    #include <tree/ranked/PrefixRankedBarPattern.h>
    #include <tree/ranked/PrefixRankedNonlinearPattern.h>
    #include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
    
    
    namespace arbology {
    
    namespace exact {
    
    /**
     * Implementation of BMH for MI(E+\eps)-EVY course 2014
     * To get rid of zeros in BCS table we ignore last haystack character
     */
    
    class ReversedBoyerMooreHorspool {
    
    public:
    	/**
    	 * Search for pattern in linear string.
    	 * @return set set of occurences
    	 */
    
    	template < class SymbolType, class RankType >
    
    	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
    
    	template < class SymbolType, class RankType >
    
    	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
    
    	template < class SymbolType, class RankType >
    
    	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern );
    
    	template < class SymbolType, class RankType >
    
    	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
    
    	template < class SymbolType, class RankType >
    
    	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
    
    	template < class SymbolType, class RankType >
    
    	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern );
    
    template < class SymbolType, class RankType >
    
    ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
    
    	return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
    }
    
    template < class SymbolType, class RankType >
    
    ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
    	ext::set < unsigned > occ;
    
    	ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
    
    	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
    
    
    	 // index to the subject
    	int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
    
    	 // main loop of the algorithm over all possible indexes where the pattern can start
    	while ( i >= 0 ) {
    
    		 // index to the pattern
    		unsigned j = 0;
    
    		 // offset to the subject
    		unsigned offset = i;
    
    		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
    			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
    				 // match of symbol
    				offset = offset + 1;
    				j = j + 1;
    
    			} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) {
    
    				 // match of variable with subtree
    				offset = subjectSubtreeJumpTable[offset];
    				j = j + 2;
    			} else {
    				break;
    			}
    		}
    
    		 // match was found
    		if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
    
    		 // shift heuristics
    		i -= bcs[subject.getContent ( )[i]];
    	}
    
    	return occ;
    }
    
    template < class SymbolType, class RankType >
    
    ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) {
    	ext::set < unsigned > occ;
    
    	ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
    
    	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
    
    	ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting;
    
    	tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );
    
    
    	 // index to the subject
    	int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
    
    	 // main loop of the algorithm over all possible indexes where the pattern can start
    	while ( i >= 0 ) {
    		 // clear the current state of variable to subtree repeat
    		variablesSetting.clear();
    
    		// index to the pattern
    		unsigned j = 0;
    
    		 // offset to the subject
    		unsigned offset = i;
    
    		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
    			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
    				 // match of symbol
    				offset = offset + 1;
    				j = j + 1;
    
    			} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) {
    
    				 // check nonlinear variable
    				if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
    					auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] );
    
    					if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second )
    						break;
    
    					variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) );
    				}
    
    				 // match of variable with subtree
    				offset = subjectSubtreeJumpTable[offset];
    				j = j + 2;
    			} else {
    				break;
    			}
    		}
    
    		 // match was found
    		if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
    
    		 // shift heuristics
    		i -= bcs[subject.getContent ( )[i]];
    	}
    
    	return occ;
    }
    
    template < class SymbolType, class RankType >
    
    ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
    
    	return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
    }
    
    template < class SymbolType, class RankType >
    
    ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
    	ext::set < unsigned > occ;
    
    	ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
    
    	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
    
    
    	 // index to the subject
    	int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
    
    	 // main loop of the algorithm over all possible indexes where the pattern can start
    	while ( i >= 0 ) {
    
    		 // index to the pattern
    		unsigned j = 0;
    
    		 // offset to the subject
    		unsigned offset = i;
    
    		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
    			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
    				 // match of symbol
    				offset = offset + 1;
    			else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
    				 // match of variable with subtree
    				offset = subjectSubtreeJumpTable[offset];
    			else
    				break;
    
    			j = j + 1;
    		}
    
    		 // match was found
    		if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
    
    		 // shift heristics
    		i -= bcs[subject.getContent ( )[i]];
    	}
    
    	return occ;
    }
    
    template < class SymbolType, class RankType >
    
    ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) {
    	ext::set < unsigned > occ;
    
    	ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
    
    	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
    
    	ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting;
    
    	tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );
    
    
    	 // index to the subject
    	int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
    
    	 // main loop of the algorithm over all possible indexes where the pattern can start
    	while ( i >= 0 ) {
    		 // clear the current state of variable to subtree repeat
    		variablesSetting.clear();
    
    		 // index to the pattern
    		unsigned j = 0;
    
    		 // offset to the subject
    		unsigned offset = i;
    
    		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
    			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
    				 // match of symbol
    				offset = offset + 1;
    			else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
    				 // check nonlinear variable
    				if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
    					auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] );
    
    					if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second )
    						break;
    
    					variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) );
    				}
    
    				 // match of variable with subtree
    				offset = subjectSubtreeJumpTable[offset];
    			} else
    				break;
    
    			j = j + 1;
    		}
    
    		 // match was found
    		if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
    
    		 // shift heristics
    		i -= bcs[subject.getContent ( )[i]];
    	}
    
    	return occ;
    }
    
    
    } /* namespace exact */
    
    } /* namespace arbology */
    
    #endif /* _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_ */