#pragma once #include <alib/vector> #include <string/LinearString.h> #include <string/properties/BorderArray.h> #include <automaton/determinize/Determinize.h> #include <automaton/simplify/EpsilonRemoverIncoming.h> #include <stringology/indexing/NondeterministicExactFactorAutomaton.h> namespace string { namespace properties { /** * Computation of GSS table for BM from MI(E+\eps)-EVY course 2014 */ class GoodSuffixShiftTable { public: /** * Search for pattern in linear string. * @return set set of occurences */ template < class SymbolType > static ext::vector < size_t > gss ( const string::LinearString < SymbolType > & pattern ); }; template < class SymbolType > ext::vector < size_t > GoodSuffixShiftTable::gss ( const string::LinearString < SymbolType > & pattern ) { ext::vector < SymbolType > content = pattern.getContent ( ); std::reverse ( content.begin ( ), content.end ( ) ); string::LinearString < SymbolType > reversed ( std::move ( content ) ); ext::vector < size_t > borderArray = string::properties::BorderArray::construct ( reversed ); size_t max = reversed.getContent ( ).size ( ) - borderArray.back ( ); automaton::DFA < SymbolType, ext::set < unsigned > > factorAutomaton = automaton::determinize::Determinize::determinize ( automaton::simplify::EpsilonRemoverIncoming::remove ( stringology::indexing::NondeterministicExactFactorAutomaton::construct ( reversed ) ) ); ext::vector < size_t > result; ext::set < unsigned > state = factorAutomaton.getInitialState ( ); result.push_back ( 1 ); for ( const SymbolType & symbol : reversed.getContent ( ) ) { state = factorAutomaton.getTransitions ( ).find ( ext::make_pair ( std::move ( state ), symbol ) )->second; if ( state.size ( ) >= 2 ) { unsigned first = * state.begin ( ); unsigned second = * std::next ( state.begin ( ) ); result.push_back ( second - first ); } else { result.push_back ( max ); } } return result; } } /* namespace properties */ } /* namespace string */