Newer
Older
* This file is part of Algorithms library toolkit.
* Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz)
* Algorithms library toolkit is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* Algorithms library toolkit is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with Algorithms library toolkit. If not, see <http://www.gnu.org/licenses/>.
*
#ifndef TO_REG_EXP_STATE_ELIMINATION_H_
#define TO_REG_EXP_STATE_ELIMINATION_H_
#include <regexp/unbounded/UnboundedRegExp.h>
#include <automaton/FSM/DFA.h>
#include <automaton/FSM/NFA.h>
#include <automaton/FSM/MultiInitialStateNFA.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/FSM/ExtendedNFA.h>
#include <regexp/simplify/RegExpOptimize.h>
#include <regexp/transform/RegExpAlternate.h>
#include <regexp/transform/RegExpConcatenate.h>
#include <regexp/transform/RegExpIterate.h>
#include <common/createUnique.hpp>
#include <label/FinalStateLabel.h>
* Converts a finite automaton to a regular expression using using the State Elimination algorithm (Melichar: Jazyky a překlady 2.118).
* This algorithm returns the regular expression as regexp::UnboundedRegExp.
* @tparam SymbolType the type of input symbols of the accepted automaton
* @tparam StateType the type of states of the accepted automaton
* @param automaton finite automaton to convert
* @return unbounded regular expression equivalent to the original automaton
template < class T >
static regexp::UnboundedRegExp < typename T::SymbolType > convert ( const T & automaton );
/**
* Helper function to create new initial and final states in the automaton for the algorithm.
* @tparam SymbolType the type of input symbols of the accepted automaton
* @tparam StateType the type of states of the accepted automaton
* @param automaton extended finite automaton
*/
template < class SymbolType, class StateType >
static void extendExtendedNFA(automaton::ExtendedNFA < SymbolType, StateType > & automaton );
/**
* Helper function to create a regexp from all transitions between states @p from and @p to.
* It creates the alternation regexp of all such transitions.
* @tparam SymbolType the type of input symbols of the accepted automaton
* @tparam StateType the type of states of the accepted automaton
* @param automaton automaton to select the transitions
* @param from source state in @param automaton
* @param to destination state in @param automaton
* @return the regular expression node representing the transitions between states @p from and @p to
*/
template < class SymbolType, class StateType >
static regexp::UnboundedRegExpStructure < SymbolType > transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to );
/**
* Helper function for the elimination of a single state according to the algorithm.
* @tparam SymbolType the type of input symbols of the accepted automaton
* @tparam StateType the type of states of the accepted automaton
* @param extendedAutomaton automaton for the elimination
* @param state state to eliminate
* @return the @p extendedAutomaton after the elimination of a state @state.
*/
template < class SymbolType, class StateType >
static automaton::ExtendedNFA < SymbolType, StateType > eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & q );
template < class T >
regexp::UnboundedRegExp < typename T::SymbolType > ToRegExpStateElimination::convert ( const T & automaton ) {
using SymbolType = typename T::SymbolType;
using StateType = typename T::StateType;
if ( automaton.getFinalStates ( ).empty ( ) )
return regexp::UnboundedRegExp < SymbolType > ( regexp::UnboundedRegExpStructure < DefaultSymbolType > ( regexp::UnboundedRegExpEmpty < DefaultSymbolType > ( ) ) );
// steps 1 + 2
automaton::ExtendedNFA < SymbolType, StateType > extendedAutomaton ( automaton );
extendExtendedNFA ( extendedAutomaton );
// step 3 - Exterminate!
// select all states that are neither final nor initial
ext::set < StateType > statesToEliminate = extendedAutomaton.getStates ( );
statesToEliminate.erase ( extendedAutomaton.getInitialState ( ) );
statesToEliminate.erase ( * extendedAutomaton.getFinalStates ( ).begin ( ) );
for ( const StateType & state : statesToEliminate )
extendedAutomaton = eliminateState ( extendedAutomaton, state );
// step 4
regexp::UnboundedRegExpStructure < SymbolType > finalStateLoop = regexp::transform::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, * extendedAutomaton.getFinalStates ( ).begin ( ), * extendedAutomaton.getFinalStates ( ).begin ( ) ) );
regexp::UnboundedRegExpStructure < SymbolType > initialToFinalState = regexp::transform::RegExpConcatenate::concatenate ( transitionsToRegExp ( extendedAutomaton, extendedAutomaton.getInitialState ( ), *extendedAutomaton.getFinalStates ( ).begin ( ) ),finalStateLoop );
return regexp::UnboundedRegExp < SymbolType > ( regexp::simplify::RegExpOptimize::optimize ( initialToFinalState ) );
}
template < class SymbolType, class StateType >
automaton::ExtendedNFA < SymbolType, StateType > ToRegExpStateElimination::eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & q ) {
automaton::ExtendedNFA < SymbolType, StateType > newAutomaton ( extendedAutomaton.getInitialState ( ) ); // sure that q is neither initial nor final (follows from step 2 - extending ExtendedNFA)
newAutomaton.setStates ( extendedAutomaton.getStates ( ) );
newAutomaton.removeState ( q ); // preserve all states but q (the one to eliminate)
newAutomaton.setInputAlphabet ( extendedAutomaton.getInputAlphabet ( ) );
newAutomaton.setFinalStates ( extendedAutomaton.getFinalStates ( ) );
for ( const StateType & p: newAutomaton.getStates ( ) ) {
for ( const StateType & r : newAutomaton.getStates ( ) ) {
regexp::UnboundedRegExpStructure < SymbolType > concat = transitionsToRegExp ( extendedAutomaton, p, q );
concat = regexp::transform::RegExpConcatenate::concatenate ( concat, regexp::transform::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, q, q ) ) );
concat = regexp::transform::RegExpConcatenate::concatenate ( concat, transitionsToRegExp ( extendedAutomaton, q, r ) );
regexp::UnboundedRegExpStructure < SymbolType > alt = regexp::transform::RegExpAlternate::alternate ( concat, transitionsToRegExp ( extendedAutomaton, p, r ) );
newAutomaton.addTransition ( p, regexp::simplify::RegExpOptimize::optimize ( alt ), r );
}
}
return newAutomaton;
}
template < class SymbolType, class StateType >
regexp::UnboundedRegExpStructure < SymbolType > ToRegExpStateElimination::transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to ) {
regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } );
for ( const auto & transition: automaton.getTransitionsFromState ( from ) )
ret = regexp::transform::RegExpAlternate::alternate ( ret, transition.first.second );
return regexp::simplify::RegExpOptimize::optimize ( ret );
}
template < class SymbolType, class StateType >
void ToRegExpStateElimination::extendExtendedNFA ( automaton::ExtendedNFA < SymbolType, StateType > & automaton ) {
const StateType & initState = automaton.getInitialState ( );
if ( automaton.getFinalStates ( ).count ( initState ) > 0 || !automaton.getTransitionsToState ( initState ).empty ( ) ) {
StateType q0 = common::createUnique ( initState, automaton.getStates ( ) );
automaton.addState ( q0 );
regexp::UnboundedRegExpStructure < DefaultSymbolType > regexp { regexp::UnboundedRegExpEpsilon < DefaultSymbolType > ( ) };
automaton.addTransition ( q0, regexp, initState );
automaton.setInitialState ( q0 );
}
if ( automaton.getFinalStates ( ).size ( ) > 1 ) {
StateType f = common::createUnique ( label::FinalStateLabel::instance < StateType > ( ), automaton.getStates ( ) );
automaton.addState ( f );
for ( const StateType & state : automaton.getFinalStates ( ) ) {
regexp::UnboundedRegExpStructure < SymbolType > regexp { regexp::UnboundedRegExpEpsilon < SymbolType > ( ) };
automaton.addTransition ( state, regexp, f );
}
ext::set < StateType > newFinalStates;
newFinalStates.insert ( f );
automaton.setFinalStates ( newFinalStates );
}
}