Skip to content
Snippets Groups Projects
GoodSuffixShiftTable.h 2.42 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * GoodSuffixShiftTable.h
     *
     *  Created on: 23. 3. 2017
     *      Author: Jan Travnicek
     */
    
    #ifndef _STRINGOLOGY_GOOD_SUFFIX_SHIFT_TABLE_H_
    #define _STRINGOLOGY_GOOD_SUFFIX_SHIFT_TABLE_H_
    
    #include <string/String.h>
    #include <string/StringFeatures.h>
    #include <core/multipleDispatch.hpp>
    
    #include <vector>
    
    #include <string/LinearString.h>
    
    #include <string/properties/BorderArray.h>
    #include <automaton/determinize/Determinize.h>
    #include <automaton/simplify/EpsilonRemoverIncoming.h>
    #include <stringology/exact/ExactFactorAutomaton.h>
    
    namespace string {
    
    namespace properties {
    
    /**
     * Computation of GSS table for BM from MI(E+\eps)-EVY course 2014
     */
    class GoodSuffixShiftTable : public std::SingleDispatch < GoodSuffixShiftTable, std::vector < size_t >, const string::StringBase & > {
    public:
    	/**
    	 * Search for pattern in linear string.
    	 * @return set set of occurences
    	 */
    	static std::vector < size_t > gss ( const string::String & pattern );
    
    	template < class SymbolType >
    	static std::vector < size_t > gss ( const string::LinearString < SymbolType > & pattern );
    
    };
    
    template < class SymbolType >
    std::vector < size_t > GoodSuffixShiftTable::gss ( const string::LinearString < SymbolType > & pattern ) {
    	std::vector < SymbolType > content = pattern.getContent ( );
    	std::reverse ( content.begin ( ), content.end ( ) );
    	string::LinearString < SymbolType > reversed ( std::move ( content ) );
    
    	std::vector < unsigned > borderArray = string::properties::BorderArray::construct ( reversed );
    	size_t max = reversed.getContent ( ).size ( ) - borderArray.back ( );
    
    	automaton::DFA < DefaultSymbolType, std::set < unsigned > > factorAutomaton = automaton::determinize::Determinize::determinize ( automaton::simplify::EpsilonRemoverIncoming::remove ( stringology::exact::ExactFactorAutomaton::construct ( reversed ) ) );
    
    	std::vector < size_t > result;
    
    	std::set < unsigned > state = factorAutomaton.getInitialState ( );
    	result.push_back ( 1 );
    	for ( const SymbolType & symbol : reversed.getContent ( ) ) {
    		state = factorAutomaton.getTransitions ( ).find ( std::make_pair ( std::move ( state ), symbol ) )->second;
    		if ( state.size ( ) >= 2 ) {
    			unsigned first = * state.begin ( );
    			unsigned second = * std::next ( state.begin ( ) );
    
    			result.push_back ( second - first );
    		} else {
    			result.push_back ( max );
    		}
    	}
    
    	return result;
    }
    
    } /* namespace properties */
    
    } /* namespace string */
    
    #endif /* _STRINGOLOGY_GOOD_SUFFIX_SHIFT_TABLE_H_ */