Skip to content
Snippets Groups Projects
ApproximateCoversComputation.h 7.51 KiB
Newer Older
//
// Created by shushiri on 17.2.19.
//

#ifndef APPROXIMATE_COVERS_COMPUTATION_H
#define APPROXIMATE_COVERS_COMPUTATION_H

#include <string/LinearString.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/FSM/NFA.h>
#include <automaton/simplify/EpsilonRemoverIncoming.h>
#include <stringology/cover/NondeterministicApproximateSuffixAutomatonForHammingDistance.h>
//#include "NondeterministicApproximateSuffixAutomatonForHammingDistance.h"
namespace stringology{
namespace cover {


class ApproximateCoversComputation {
public:
    /**
     *
     * @return set of all smallest distance k-approximate covers of the input string with the smallest
distances with maximum Hamming distance k.
     */

    template < class SymbolType >
    static ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > >  compute(const string::LinearString < SymbolType > & pattern, unsigned int k);

//private:
    template < class SymbolType >
    static/////
    unsigned int smallestDistanceCover(const ext::set < ext::pair < unsigned int, unsigned int > > dSubset,
                                       const ext::vector < SymbolType > coverCandidate);

    template < class SymbolType >
    static////
    ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > processState (
            ext::set < ext::pair < unsigned int, unsigned int > > previousState,
            unsigned int previousDepth,
            const ext::set<SymbolType> inputAphabet,
            unsigned int k,
            const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > approximateSuffixNDA,
            const ext::vector < SymbolType > previousLfactor,
            const string::LinearString < SymbolType > & pattern );
};

template < class SymbolType >
ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > >  ApproximateCoversComputation::compute (
        const string::LinearString<SymbolType> &pattern, unsigned int k ) {
    automaton::EpsilonNFA < SymbolType, DefaultEpsilonType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDAwithEpsilonTransitions =
            stringology::cover::NondeterministicApproximateSuffixAutomatonForHammingDistance::construct ( pattern, k );

    automaton::NFA < SymbolType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDA =
            automaton::simplify::EpsilonRemoverIncoming::remove( approximateSuffixNDAwithEpsilonTransitions );

    ext::set < ext::pair < unsigned int, unsigned int > > initialState;
    initialState.insert( approximateSuffixNDA.getInitialState( ) );
    ext::vector < SymbolType > lfactor;
    unsigned int depth = 0;

    ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > result =
            processState( initialState, depth, pattern.getAlphabet(), k, approximateSuffixNDA, lfactor, pattern);

    return result;
}


template < class SymbolType >
ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > ApproximateCoversComputation::processState(
        ext::set < ext::pair < unsigned int, unsigned int > > previousState,
        unsigned int previousDepth,
        const ext::set<SymbolType> inputAphabet,
        unsigned int k,
        const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > approximateSuffixNDA,
        const ext::vector < SymbolType > previousLfactor,
        const string::LinearString < SymbolType > & pattern ) {

    ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > resultSet;

    for ( const SymbolType & symbol : inputAphabet ) {
        ext::set < ext::pair < unsigned int, unsigned int > > newState;
        unsigned int depth = previousDepth + 1;

        for ( auto const & state : previousState ) {
            auto iter = approximateSuffixNDA.getTransitions().find(ext::make_pair(std::move(state), symbol));

            if ( iter != approximateSuffixNDA.getTransitions().end() )
                newState.insert( iter->second.begin(), iter->second.end() );
        }

        if ( newState.size() > 1 ) {        // lfactor(newState) is a k-approximately repeating factor
            if ( (*newState.begin()).first == depth ){      // newState is k-approximate prefix
                ext::vector < SymbolType > lfactor = previousLfactor;
                lfactor.push_back( symbol );

                ext::set< ext::pair<unsigned int, unsigned int> >::iterator it = newState.end();

                if ( ( *( --it ) ).first == pattern.getContent().size() ) {       // The state is final
                    ext::pair < unsigned int, unsigned int > previous;
                    int cnt = 0;
                    bool isCover = false;
                    for ( const ext::pair < unsigned int, unsigned int > & state : newState ) {
                        if ( cnt ) {
                            if ( ( state.first - previous.first ) > lfactor.size() ) {
                                isCover = false;
                                break;
                            }
                            isCover = true;
                        }
                        previous = state;
                        cnt++;
                    }
                    if ( isCover ){
                        unsigned int l = smallestDistanceCover( newState, lfactor);
                        if  ( lfactor.size() > l ){
                            resultSet.insert( ext::make_pair( lfactor, l ) );
                        }
                    }
                }

                ext::set < ext::pair < ext::vector < SymbolType >, unsigned int > > resultSet2 =
                        processState( newState, depth, inputAphabet, k, approximateSuffixNDA, lfactor, pattern);
                auto iter = resultSet2.begin();
                if ( iter != resultSet2.end() )
                    resultSet.insert( resultSet2.begin(), resultSet2.end() );
            }

        }

    }

    return resultSet;

}



template < class SymbolType >
unsigned int ApproximateCoversComputation::smallestDistanceCover(
        const ext::set < ext::pair < unsigned int, unsigned int > > dSubset, const ext::vector< SymbolType > coverCandidate) {
    ext::set < ext::pair < unsigned int, unsigned int > > subset ( dSubset );
    unsigned int lmin;
    unsigned int lmax;
    unsigned int l;

    ext::set< ext::pair < unsigned int, unsigned int > >::iterator it1 = subset.end();
    lmin = ( (*subset.begin()).second > (*( --it1)).second ) ? (*subset.begin()).second : (*it1).second;
    lmax = (*subset.begin()).second;

    for ( auto const & state : subset ){
        if ( state.second > lmax) lmax = state.second;
    }

    l = lmax;

    while (1) {
        for ( const ext::pair < unsigned int, unsigned int > & state : subset ) {
            ext::set< ext::pair < unsigned int, unsigned int > >::iterator it = subset.end();
            if ( state.second == l && state != *subset.begin() && state != (*( --it )) ) {
                subset.erase( state );
            }
        }
        l = l - 1;

        if ( l < lmin ) break;
        int cnt = 0;
        bool isCover = false;
        ext::pair < unsigned int, unsigned int > previous;
        for ( const ext::pair < unsigned int, unsigned int > & state : subset ) {
            if ( cnt ){
                if ( ( state.first - previous.first ) > coverCandidate.size() ){
                    isCover = false;
                    break;
                }
                isCover = true;
            }
            previous = state;
            cnt++;
        }

        if ( !isCover ) break;
    }
    l = l + 1;
    return l;

}

} /* namespace cover */
}/* namespace stringology */


#endif //APPROXIMATE_COVERS_COMPUTATION_H