// // Created by shushiri on 17.2.19. // #ifndef APPROXIMATE_COVERS_COMPUTATION_H #define APPROXIMATE_COVERS_COMPUTATION_H #include <string/LinearString.h> #include <automaton/FSM/EpsilonNFA.h> #include <automaton/FSM/NFA.h> #include <automaton/simplify/EpsilonRemoverIncoming.h> #include <stringology/cover/NondeterministicApproximateSuffixAutomatonForHammingDistance.h> //#include "NondeterministicApproximateSuffixAutomatonForHammingDistance.h" namespace stringology{ namespace cover { class ApproximateCoversComputation { public: /** * * @return set of all smallest distance k-approximate covers of the input string with the smallest distances with maximum Hamming distance k. */ template < class SymbolType > static ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > compute(const string::LinearString < SymbolType > & pattern, unsigned int k); //private: template < class SymbolType > static///// unsigned int smallestDistanceCover(const ext::set < ext::pair < unsigned int, unsigned int > > dSubset, const ext::vector < SymbolType > coverCandidate); template < class SymbolType > static//// ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > processState ( ext::set < ext::pair < unsigned int, unsigned int > > previousState, unsigned int previousDepth, const ext::set<SymbolType> inputAphabet, unsigned int k, const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > approximateSuffixNDA, const ext::vector < SymbolType > previousLfactor, const string::LinearString < SymbolType > & pattern ); }; template < class SymbolType > ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > ApproximateCoversComputation::compute ( const string::LinearString<SymbolType> &pattern, unsigned int k ) { automaton::EpsilonNFA < SymbolType, DefaultEpsilonType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDAwithEpsilonTransitions = stringology::cover::NondeterministicApproximateSuffixAutomatonForHammingDistance::construct ( pattern, k ); automaton::NFA < SymbolType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDA = automaton::simplify::EpsilonRemoverIncoming::remove( approximateSuffixNDAwithEpsilonTransitions ); ext::set < ext::pair < unsigned int, unsigned int > > initialState; initialState.insert( approximateSuffixNDA.getInitialState( ) ); ext::vector < SymbolType > lfactor; unsigned int depth = 0; ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > result = processState( initialState, depth, pattern.getAlphabet(), k, approximateSuffixNDA, lfactor, pattern); return result; } template < class SymbolType > ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > ApproximateCoversComputation::processState( ext::set < ext::pair < unsigned int, unsigned int > > previousState, unsigned int previousDepth, const ext::set<SymbolType> inputAphabet, unsigned int k, const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > approximateSuffixNDA, const ext::vector < SymbolType > previousLfactor, const string::LinearString < SymbolType > & pattern ) { ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > resultSet; for ( const SymbolType & symbol : inputAphabet ) { ext::set < ext::pair < unsigned int, unsigned int > > newState; unsigned int depth = previousDepth + 1; for ( auto const & state : previousState ) { auto iter = approximateSuffixNDA.getTransitions().find(ext::make_pair(std::move(state), symbol)); if ( iter != approximateSuffixNDA.getTransitions().end() ) newState.insert( iter->second.begin(), iter->second.end() ); } if ( newState.size() > 1 ) { // lfactor(newState) is a k-approximately repeating factor if ( (*newState.begin()).first == depth ){ // newState is k-approximate prefix ext::vector < SymbolType > lfactor = previousLfactor; lfactor.push_back( symbol ); ext::set< ext::pair<unsigned int, unsigned int> >::iterator it = newState.end(); if ( ( *( --it ) ).first == pattern.getContent().size() ) { // The state is final ext::pair < unsigned int, unsigned int > previous; int cnt = 0; bool isCover = false; for ( const ext::pair < unsigned int, unsigned int > & state : newState ) { if ( cnt ) { if ( ( state.first - previous.first ) > lfactor.size() ) { isCover = false; break; } isCover = true; } previous = state; cnt++; } if ( isCover ){ unsigned int l = smallestDistanceCover( newState, lfactor); if ( lfactor.size() > l ){ resultSet.insert( ext::make_pair( lfactor, l ) ); } } } ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > resultSet2 = processState( newState, depth, inputAphabet, k, approximateSuffixNDA, lfactor, pattern); auto iter = resultSet2.begin(); if ( iter != resultSet2.end() ) resultSet.insert( resultSet2.begin(), resultSet2.end() ); } } } return resultSet; } template < class SymbolType > unsigned int ApproximateCoversComputation::smallestDistanceCover( const ext::set < ext::pair < unsigned int, unsigned int > > dSubset, const ext::vector< SymbolType > coverCandidate) { ext::set < ext::pair < unsigned int, unsigned int > > subset ( dSubset ); unsigned int lmin; unsigned int lmax; unsigned int l; ext::set< ext::pair < unsigned int, unsigned int > >::iterator it1 = subset.end(); lmin = ( (*subset.begin()).second > (*( --it1)).second ) ? (*subset.begin()).second : (*it1).second; lmax = (*subset.begin()).second; for ( auto const & state : subset ){ if ( state.second > lmax) lmax = state.second; } l = lmax; while (1) { for ( const ext::pair < unsigned int, unsigned int > & state : subset ) { ext::set< ext::pair < unsigned int, unsigned int > >::iterator it = subset.end(); if ( state.second == l && state != *subset.begin() && state != (*( --it )) ) { subset.erase( state ); } } l = l - 1; if ( l < lmin ) break; int cnt = 0; bool isCover = false; ext::pair < unsigned int, unsigned int > previous; for ( const ext::pair < unsigned int, unsigned int > & state : subset ) { if ( cnt ){ if ( ( state.first - previous.first ) > coverCandidate.size() ){ isCover = false; break; } isCover = true; } previous = state; cnt++; } if ( !isCover ) break; } l = l + 1; return l; } } /* namespace cover */ }/* namespace stringology */ #endif //APPROXIMATE_COVERS_COMPUTATION_H