Newer
Older
//
// Created by shushiri on 17.2.19.
//
#ifndef APPROXIMATE_COVERS_COMPUTATION_H
#define APPROXIMATE_COVERS_COMPUTATION_H
#include <string/LinearString.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/FSM/NFA.h>
#include <automaton/simplify/efficient/EpsilonRemoverIncoming.h>
#include <stringology/indexing/NondeterministicApproximateSuffixAutomatonForHammingDistance.h>
namespace stringology{
namespace cover {
class ApproximateCoversComputation {
public:
/**
*
* @return set of all smallest distance k-approximate covers of the input string with the smallest
distances with maximum Hamming distance k.
*/
template < class SymbolType >
static ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > > compute(const string::LinearString < SymbolType > & pattern, unsigned int k, bool restricted = 0 );
template < class SymbolType >
static ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > > compute(const string::LinearString < SymbolType > & pattern, unsigned int k );
private:
template < class SymbolType >
static unsigned int smallestDistanceCover(const ext::set < ext::pair < unsigned int, unsigned int > > & dSubset,
const ext::vector < SymbolType > & coverCandidate);
template < class SymbolType >
static ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > > processState (
const ext::set < ext::pair < unsigned int, unsigned int > > & previousState,
const ext::vector < SymbolType > & previousLfactor,
unsigned int previousDepth,
unsigned int k,
const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > & approximateSuffixNDA,
const string::LinearString < SymbolType > & pattern,
bool restricted );
template < class SymbolType >
ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > > ApproximateCoversComputation::compute (
const string::LinearString < SymbolType > & pattern,
unsigned int k ) {
return ApproximateCoversComputation::compute ( pattern, k, 0 );
}
template < class SymbolType >
ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > > ApproximateCoversComputation::compute (
const string::LinearString<SymbolType> &pattern,
unsigned int k,
bool restricted ) {
automaton::EpsilonNFA < SymbolType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDAwithEpsilonTransitions =
stringology::indexing::NondeterministicApproximateSuffixAutomatonForHammingDistance::construct ( pattern, k );
automaton::NFA < SymbolType, ext::pair < unsigned int, unsigned int > > approximateSuffixNDA =
automaton::simplify::efficient::EpsilonRemoverIncoming::remove( approximateSuffixNDAwithEpsilonTransitions );
ext::set < ext::pair < unsigned int, unsigned int > > initialState;
initialState.insert( approximateSuffixNDA.getInitialState( ) );
ext::vector < SymbolType > lfactor;
unsigned int depth = 0;
ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > > result =
processState( initialState, lfactor, depth, k, approximateSuffixNDA, pattern, restricted);
return result;
}
template < class SymbolType >
ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > >ApproximateCoversComputation::processState(
const ext::set < ext::pair < unsigned int, unsigned int > > & previousState,
const ext::vector < SymbolType > & previousLfactor,
unsigned int previousDepth,
unsigned int k,
const automaton::NFA<SymbolType, ext::pair<unsigned int, unsigned int> > & approximateSuffixNDA,
const string::LinearString < SymbolType > & pattern,
bool restricted ) {
ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > > resultSet;
for ( const SymbolType & symbol : pattern.getAlphabet() ) {
ext::set < ext::pair < unsigned int, unsigned int > > newState;
unsigned int depth = previousDepth + 1;
for ( auto const & state : previousState ) {
auto transition_range = approximateSuffixNDA.getTransitions().equal_range(ext::make_pair(std::move(state), symbol));
for ( const auto & it : transition_range ) {
newState.insert ( it.second );
}
bool levelZero = 0;
if ( restricted ) {
for (const ext::pair<unsigned int, unsigned int> &state : newState) {
if (state.second == 0) {
levelZero = 1;
break;
}
}
}
if ( newState.size() > 1 && ( restricted == levelZero ) ) { // lfactor(newState) is a k-approximately repeating factor
if ( (*newState.begin()).first == depth ){ // newState is k-approximate prefix
ext::vector < SymbolType > lfactor = previousLfactor;
lfactor.push_back( symbol );
ext::set< ext::pair<unsigned int, unsigned int> >::iterator it = newState.end();
if ( ( *( --it ) ).first == pattern.getContent().size() ) { // The state is final
ext::pair < unsigned int, unsigned int > previous;
int cnt = 0;
bool isCover = false;
for ( const ext::pair < unsigned int, unsigned int > & state : newState ) {
if ( cnt ) {
if ( ( state.first - previous.first ) > lfactor.size() ) {
isCover = false;
break;
}
isCover = true;
}
previous = state;
cnt++;
}
if ( isCover ){
unsigned int l = smallestDistanceCover( newState, lfactor);
if ( lfactor.size() > l ){
string::LinearString < SymbolType > str( lfactor );
resultSet.insert( ext::make_pair( str, l ) );
}
}
}
ext::set < ext::pair < string::LinearString < SymbolType >, unsigned int > > resultSet2 =
processState( newState, lfactor, depth, k, approximateSuffixNDA, pattern, restricted );
auto iter = resultSet2.begin();
if ( iter != resultSet2.end() )
resultSet.insert( resultSet2.begin(), resultSet2.end() );
}
}
}
return resultSet;
}
template < class SymbolType >
unsigned int ApproximateCoversComputation::smallestDistanceCover(
const ext::set < ext::pair < unsigned int, unsigned int > > & dSubset,
const ext::vector< SymbolType > & coverCandidate) {
ext::set < ext::pair < unsigned int, unsigned int > > subset ( dSubset );
unsigned int lmin;
unsigned int lmax;
int l;
ext::set< ext::pair < unsigned int, unsigned int > >::iterator it1 = subset.end();
lmin = ( (*subset.begin()).second > (*( --it1)).second ) ? (*subset.begin()).second : (*it1).second;
lmax = (*subset.begin()).second;
for ( auto const & state : subset ){
if ( state.second > lmax) lmax = state.second;
}
l = lmax;
while (1) {
ext::set< ext::pair < unsigned int, unsigned int > >::iterator last = subset.end();
last--;
auto it = subset.begin();
while (it != subset.end()) {
if ((int) (*it).second == l && (*it) != *subset.begin() && (*it) != (*last)) {
if ( l < ( int ) lmin ) break;
int cnt = 0;
bool isCover = false;
ext::pair < unsigned int, unsigned int > previous;
for ( const ext::pair < unsigned int, unsigned int > & state : subset ) {
if ( cnt ){
if ( ( state.first - previous.first ) > coverCandidate.size() ){
isCover = false;
break;
}
isCover = true;
}
previous = state;
cnt++;
}
if ( !isCover ) break;
}
l = l + 1;
return l;
}
} /* namespace cover */
}/* namespace stringology */