// // Created by shushiri on 17.2.19. // #ifndef APPROXIMATE_COVERS_COMPUTATION_H #define APPROXIMATE_COVERS_COMPUTATION_H #include <string/LinearString.h> #include <automaton/FSM/EpsilonNFA.h> #include <automaton/FSM/NFA.h> #include <automaton/simplify/EpsilonRemoverIncoming.h> #include <stringology/cover/NondeterministicApproximateSuffixAutomatonForHammingDistance.h> namespace stringology{ namespace cover { class ApproximateCoversComputation { public: /** * * @return set of all smallest distance k-approximate covers of the input string with the smallest distances with maximum Hamming distance k. */ template < class SymbolType > static ext::map < ext::vector < SymbolType >, unsigned int > compute(const string::LinearString < SymbolType > & pattern, unsigned int k, bool restricted = 0); private: template < class SymbolType > static unsigned int smallestDistanceCover(const ext::set < ext::pair < unsigned int, unsigned int > > dSubset, const ext::vector < SymbolType > coverCandidate); template < class SymbolType > static ext::map < ext::vector < SymbolType >, unsigned int > processState ( ext::set < ext::pair < unsigned int, unsigned int > > previousState, const ext::vector < SymbolType > previousLfactor, unsigned int previousDepth, unsigned int k, const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > approximateSuffixNDA, const string::LinearString < SymbolType > & pattern, bool restricted ); }; template < class SymbolType > ext::map < ext::vector < SymbolType >, unsigned int > ApproximateCoversComputation::compute ( const string::LinearString<SymbolType> &pattern, unsigned int k, bool restricted ) { automaton::EpsilonNFA < SymbolType, DefaultEpsilonType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDAwithEpsilonTransitions = stringology::cover::NondeterministicApproximateSuffixAutomatonForHammingDistance::construct ( pattern, k ); automaton::NFA < SymbolType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDA = automaton::simplify::EpsilonRemoverIncoming::remove( approximateSuffixNDAwithEpsilonTransitions ); ext::set < ext::pair < unsigned int, unsigned int > > initialState; initialState.insert( approximateSuffixNDA.getInitialState( ) ); ext::vector < SymbolType > lfactor; unsigned int depth = 0; ext::map < ext::vector < SymbolType >, unsigned int > result = processState( initialState, lfactor, depth, k, approximateSuffixNDA, pattern, restricted); return result; } template < class SymbolType > ext::map < ext::vector < SymbolType >, unsigned int > ApproximateCoversComputation::processState( ext::set < ext::pair < unsigned int, unsigned int > > previousState, const ext::vector < SymbolType > previousLfactor, unsigned int previousDepth, unsigned int k, const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > approximateSuffixNDA, const string::LinearString < SymbolType > & pattern, bool restricted ) { ext::map < ext::vector < SymbolType >, unsigned int > resultSet; for ( const SymbolType & symbol : pattern.getAlphabet() ) { ext::set < ext::pair < unsigned int, unsigned int > > newState; unsigned int depth = previousDepth + 1; for ( auto const & state : previousState ) { auto iter = approximateSuffixNDA.getTransitions().find(ext::make_pair(std::move(state), symbol)); if ( iter != approximateSuffixNDA.getTransitions().end() ) newState.insert( iter->second.begin(), iter->second.end() ); } bool levelZero = 0; if ( restricted ) { for (const ext::pair<unsigned int, unsigned int> &state : newState) { if (state.second == 0) { levelZero = 1; break; } } } if ( newState.size() > 1 && ( restricted == levelZero ) ) { // lfactor(newState) is a k-approximately repeating factor if ( (*newState.begin()).first == depth ){ // newState is k-approximate prefix ext::vector < SymbolType > lfactor = previousLfactor; lfactor.push_back( symbol ); ext::set< ext::pair<unsigned int, unsigned int> >::iterator it = newState.end(); if ( ( *( --it ) ).first == pattern.getContent().size() && lfactor.size() > k ) { // The state is final ext::pair < unsigned int, unsigned int > previous; int cnt = 0; bool isCover = false; for ( const ext::pair < unsigned int, unsigned int > & state : newState ) { if ( cnt ) { if ( ( state.first - previous.first ) > lfactor.size() ) { isCover = false; break; } isCover = true; } previous = state; cnt++; } if ( isCover ){ unsigned int l = smallestDistanceCover( newState, lfactor); if ( lfactor.size() > l ){ resultSet.insert( ext::make_pair( lfactor, l ) ); } } } ext::map < ext::vector < SymbolType >, unsigned int > resultSet2 = processState( newState, lfactor, depth, k, approximateSuffixNDA, pattern, restricted ); auto iter = resultSet2.begin(); if ( iter != resultSet2.end() ) resultSet.insert( resultSet2.begin(), resultSet2.end() ); } } } return resultSet; } template < class SymbolType > unsigned int ApproximateCoversComputation::smallestDistanceCover( const ext::set < ext::pair < unsigned int, unsigned int > > dSubset, const ext::vector< SymbolType > coverCandidate) { ext::set < ext::pair < unsigned int, unsigned int > > subset ( dSubset ); unsigned int lmin; unsigned int lmax; int l; ext::set< ext::pair < unsigned int, unsigned int > >::iterator it1 = subset.end(); lmin = ( (*subset.begin()).second > (*( --it1)).second ) ? (*subset.begin()).second : (*it1).second; lmax = (*subset.begin()).second; for ( auto const & state : subset ){ if ( state.second > lmax) lmax = state.second; } l = lmax; while (1) { for ( const ext::pair < unsigned int, unsigned int > & state : subset ) { ext::set< ext::pair < unsigned int, unsigned int > >::iterator it = subset.end(); if ( ( int ) state.second == l && state != *subset.begin() && state != (*( --it )) ) { subset.erase( state ); } } l = l - 1; if ( l < ( int ) lmin ) break; int cnt = 0; bool isCover = false; ext::pair < unsigned int, unsigned int > previous; for ( const ext::pair < unsigned int, unsigned int > & state : subset ) { if ( cnt ){ if ( ( state.first - previous.first ) > coverCandidate.size() ){ isCover = false; break; } isCover = true; } previous = state; cnt++; } if ( !isCover ) break; } l = l + 1; return l; } } /* namespace cover */ }/* namespace stringology */ #endif //APPROXIMATE_COVERS_COMPUTATION_H