Skip to content
Snippets Groups Projects
ApproximateCoversComputation.h 8.42 KiB
Newer Older
//
// Created by shushiri on 17.2.19.
//

#ifndef APPROXIMATE_COVERS_COMPUTATION_H
#define APPROXIMATE_COVERS_COMPUTATION_H

#include <string/LinearString.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/FSM/NFA.h>
#include <automaton/simplify/EpsilonRemoverIncoming.h>
Irina Shushkova's avatar
Irina Shushkova committed
#include <stringology/indexing/NondeterministicApproximateSuffixAutomatonForHammingDistance.h>
namespace stringology{
namespace cover {


class ApproximateCoversComputation {
public:
    /**
     *
     * @return set of all smallest distance k-approximate covers of the input string with the smallest
distances with maximum Hamming distance k.
     */

    template < class SymbolType >
    static ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > > compute(const string::LinearString < SymbolType > & pattern, unsigned int k, bool restricted = 0 );

    template < class SymbolType >
    static ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > > compute(const string::LinearString < SymbolType > & pattern, unsigned int k );
    template < class SymbolType >
    static unsigned int smallestDistanceCover(const ext::set < ext::pair < unsigned int, unsigned int > > & dSubset,
                                       const ext::vector < SymbolType > & coverCandidate);

    template < class SymbolType >
    static ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > > processState (
            const ext::set < ext::pair < unsigned int, unsigned int > > & previousState,
            const ext::vector < SymbolType > & previousLfactor,
            unsigned int previousDepth,
            unsigned int k,
            const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > & approximateSuffixNDA,
            const string::LinearString < SymbolType > & pattern,
            bool restricted );
template < class SymbolType >
ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > > ApproximateCoversComputation::compute (
        const string::LinearString < SymbolType > & pattern,
        unsigned int k ) {
    return ApproximateCoversComputation::compute ( pattern, k, 0 );
}

template < class SymbolType >
ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > > ApproximateCoversComputation::compute (
        const string::LinearString<SymbolType> &pattern,
        unsigned int k,
        bool restricted ) {

    automaton::EpsilonNFA < SymbolType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDAwithEpsilonTransitions =
Irina Shushkova's avatar
Irina Shushkova committed
            stringology::indexing::NondeterministicApproximateSuffixAutomatonForHammingDistance::construct ( pattern, k );

    automaton::NFA < SymbolType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDA =
            automaton::simplify::EpsilonRemoverIncoming::remove( approximateSuffixNDAwithEpsilonTransitions );

    ext::set < ext::pair < unsigned int, unsigned int > > initialState;
    initialState.insert( approximateSuffixNDA.getInitialState( ) );
    ext::vector < SymbolType > lfactor;
    unsigned int depth = 0;

    ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > > result =
            processState( initialState, lfactor, depth, k, approximateSuffixNDA, pattern, restricted);
    return result;
}


template < class SymbolType >
ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > >ApproximateCoversComputation::processState(
        const ext::set < ext::pair < unsigned int, unsigned int > > & previousState,
        const ext::vector < SymbolType > & previousLfactor,
        unsigned int previousDepth,
        unsigned int k,
        const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > & approximateSuffixNDA,
        const string::LinearString < SymbolType > & pattern,
        bool restricted ) {
    ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > >  resultSet;
    for ( const SymbolType & symbol : pattern.getAlphabet() ) {
        ext::set < ext::pair < unsigned int, unsigned int > > newState;
        unsigned int depth = previousDepth + 1;

        for ( auto const & state : previousState ) {
            auto transition_range = approximateSuffixNDA.getTransitions().equal_range(ext::make_pair(std::move(state), symbol));
            for ( const auto & it : transition_range ) {
                newState.insert ( it.second );
            }
        bool levelZero = 0;
        if ( restricted ) {
            for (const ext::pair<unsigned int, unsigned int> &state : newState) {
                if (state.second == 0) {
                    levelZero = 1;
                    break;
                }
            }
        }

        if ( newState.size() > 1 && ( restricted == levelZero ) ) {        // lfactor(newState) is a k-approximately repeating factor
            if ( (*newState.begin()).first == depth ){      // newState is k-approximate prefix
                ext::vector < SymbolType > lfactor = previousLfactor;
                lfactor.push_back( symbol );

                ext::set< ext::pair<unsigned int, unsigned int> >::iterator it = newState.end();

                if ( ( *( --it ) ).first == pattern.getContent().size() ) {       // The state is final
                    ext::pair < unsigned int, unsigned int > previous;
                    int cnt = 0;
                    bool isCover = false;
                    for ( const ext::pair < unsigned int, unsigned int > & state : newState ) {
                        if ( cnt ) {
                            if ( ( state.first - previous.first ) > lfactor.size() ) {
                                isCover = false;
                                break;
                            }
                            isCover = true;
                        }
                        previous = state;
                        cnt++;
                    }
                    if ( isCover ){
                        unsigned int l = smallestDistanceCover( newState, lfactor);
                        if  ( lfactor.size() > l ){
                            string::LinearString < SymbolType > str( lfactor );
                            resultSet.insert( ext::make_pair( str, l ) );
                ext::set  < ext::pair < string::LinearString < SymbolType >, unsigned int > > resultSet2 =
                        processState( newState, lfactor, depth, k, approximateSuffixNDA, pattern, restricted );
                auto iter = resultSet2.begin();
                if ( iter != resultSet2.end() )
                    resultSet.insert( resultSet2.begin(), resultSet2.end() );
            }
        }
    }
    return resultSet;
}



template < class SymbolType >
unsigned int ApproximateCoversComputation::smallestDistanceCover(
        const ext::set < ext::pair < unsigned int, unsigned int > > & dSubset,
        const ext::vector< SymbolType > & coverCandidate) {
    ext::set < ext::pair < unsigned int, unsigned int > > subset ( dSubset );
    unsigned int lmin;
    unsigned int lmax;

    ext::set< ext::pair < unsigned int, unsigned int > >::iterator it1 = subset.end();
    lmin = ( (*subset.begin()).second > (*( --it1)).second ) ? (*subset.begin()).second : (*it1).second;
    lmax = (*subset.begin()).second;

    for ( auto const & state : subset ){
        if ( state.second > lmax) lmax = state.second;
    }

    l = lmax;

    while (1) {

        ext::set< ext::pair < unsigned int, unsigned int > >::iterator last = subset.end();
        last--;
        auto it = subset.begin();
        while (it != subset.end()) {
            if ((int) (*it).second == l && (*it) != *subset.begin() && (*it) != (*last)) {
                it = subset.erase(it);
        int cnt = 0;
        bool isCover = false;
        ext::pair < unsigned int, unsigned int > previous;
        for ( const ext::pair < unsigned int, unsigned int > & state : subset ) {
            if ( cnt ){
                if ( ( state.first - previous.first ) > coverCandidate.size() ){
                    isCover = false;
                    break;
                }
                isCover = true;
            }
            previous = state;
            cnt++;
        }
        if ( !isCover ) break;
    }
    l = l + 1;
    return l;
}

} /* namespace cover */
}/* namespace stringology */


#endif //APPROXIMATE_COVERS_COMPUTATION_H