/* * ToRegExpStateElimination.h * * This file is part of Algorithms library toolkit. * Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz) * Algorithms library toolkit is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * Algorithms library toolkit is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with Algorithms library toolkit. If not, see <http://www.gnu.org/licenses/>. * * Created on: 9. 2. 2014 * Author: Tomas Pecka */ #ifndef TO_REG_EXP_STATE_ELIMINATION_H_ #define TO_REG_EXP_STATE_ELIMINATION_H_ #include <regexp/unbounded/UnboundedRegExp.h> #include <automaton/FSM/DFA.h> #include <automaton/FSM/NFA.h> #include <automaton/FSM/MultiInitialStateNFA.h> #include <automaton/FSM/EpsilonNFA.h> #include <automaton/FSM/ExtendedNFA.h> #include <regexp/simplify/RegExpOptimize.h> #include <regexp/transform/RegExpAlternate.h> #include <regexp/transform/RegExpConcatenate.h> #include <regexp/transform/RegExpIterate.h> #include <common/createUnique.hpp> #include <label/FinalStateLabel.h> namespace automaton { namespace convert { /** * Converts a finite automaton to a regular expression using using the State Elimination algorithm (Melichar: Jazyky a překlady 2.118). * This algorithm returns the regular expression as regexp::UnboundedRegExp. */ class ToRegExpStateElimination { public: /** * Performs conversion. * @tparam T type of the finite automaton * @tparam SymbolType the type of input symbols of the accepted automaton * @tparam StateType the type of states of the accepted automaton * @param automaton finite automaton to convert * @return unbounded regular expression equivalent to the original automaton */ template < class T > static regexp::UnboundedRegExp < typename T::SymbolType > convert ( const T & automaton ); private: /** * Helper function to create new initial and final states in the automaton for the algorithm. * @tparam SymbolType the type of input symbols of the accepted automaton * @tparam StateType the type of states of the accepted automaton * @param automaton extended finite automaton */ template < class SymbolType, class StateType > static void extendExtendedNFA(automaton::ExtendedNFA < SymbolType, StateType > & automaton ); /** * Helper function to create a regexp from all transitions between states @p from and @p to. * It creates the alternation regexp of all such transitions. * @tparam SymbolType the type of input symbols of the accepted automaton * @tparam StateType the type of states of the accepted automaton * @param automaton automaton to select the transitions * @param from source state in @param automaton * @param to destination state in @param automaton * @return the regular expression node representing the transitions between states @p from and @p to */ template < class SymbolType, class StateType > static const regexp::UnboundedRegExpStructure < SymbolType > transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to ); /** * Helper function for the elimination of a single state according to the algorithm. * @tparam SymbolType the type of input symbols of the accepted automaton * @tparam StateType the type of states of the accepted automaton * @param extendedAutomaton automaton for the elimination * @param state state to eliminate * @return the @p extendedAutomaton after the elimination of a state @state. */ template < class SymbolType, class StateType > static automaton::ExtendedNFA < SymbolType, StateType > eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & q ); }; template < class T > regexp::UnboundedRegExp < typename T::SymbolType > ToRegExpStateElimination::convert ( const T & automaton ) { using SymbolType = typename T::SymbolType; using StateType = typename T::StateType; if ( automaton.getFinalStates ( ).empty ( ) ) return regexp::UnboundedRegExp < SymbolType > ( regexp::UnboundedRegExpStructure < DefaultSymbolType > ( regexp::UnboundedRegExpEmpty < DefaultSymbolType > ( ) ) ); // steps 1 + 2 automaton::ExtendedNFA < SymbolType, StateType > extendedAutomaton ( automaton ); extendExtendedNFA ( extendedAutomaton ); // step 3 - Exterminate! // select all states that are neither final nor initial ext::set < StateType > statesToEliminate = extendedAutomaton.getStates ( ); statesToEliminate.erase ( extendedAutomaton.getInitialState ( ) ); statesToEliminate.erase ( * extendedAutomaton.getFinalStates ( ).begin ( ) ); for ( const StateType & state : statesToEliminate ) extendedAutomaton = eliminateState ( extendedAutomaton, state ); // step 4 regexp::UnboundedRegExpStructure < SymbolType > finalStateLoop = regexp::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, * extendedAutomaton.getFinalStates ( ).begin ( ), * extendedAutomaton.getFinalStates ( ).begin ( ) ) ); regexp::UnboundedRegExpStructure < SymbolType > initialToFinalState = regexp::RegExpConcatenate::concatenate ( transitionsToRegExp ( extendedAutomaton, extendedAutomaton.getInitialState ( ), *extendedAutomaton.getFinalStates ( ).begin ( ) ),finalStateLoop ); return regexp::UnboundedRegExp < SymbolType > ( regexp::simplify::RegExpOptimize::optimize ( initialToFinalState ) ); } template < class SymbolType, class StateType > automaton::ExtendedNFA < SymbolType, StateType > ToRegExpStateElimination::eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & q ) { automaton::ExtendedNFA < SymbolType, StateType > newAutomaton ( extendedAutomaton.getInitialState ( ) ); // sure that q is neither initial nor final (follows from step 2 - extending ExtendedNFA) newAutomaton.setStates ( extendedAutomaton.getStates ( ) ); newAutomaton.removeState ( q ); // preserve all states but q (the one to eliminate) newAutomaton.setInputAlphabet ( extendedAutomaton.getInputAlphabet ( ) ); newAutomaton.setFinalStates ( extendedAutomaton.getFinalStates ( ) ); for ( const StateType & p: newAutomaton.getStates ( ) ) { for ( const StateType & r : newAutomaton.getStates ( ) ) { regexp::UnboundedRegExpStructure < SymbolType > concat = transitionsToRegExp ( extendedAutomaton, p, q ); concat = regexp::RegExpConcatenate::concatenate ( concat, regexp::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, q, q ) ) ); concat = regexp::RegExpConcatenate::concatenate ( concat, transitionsToRegExp ( extendedAutomaton, q, r ) ); regexp::UnboundedRegExpStructure < SymbolType > alt = regexp::RegExpAlternate::alternate ( concat, transitionsToRegExp ( extendedAutomaton, p, r ) ); newAutomaton.addTransition ( p, regexp::simplify::RegExpOptimize::optimize ( alt ), r ); } } return newAutomaton; } template < class SymbolType, class StateType > const regexp::UnboundedRegExpStructure < SymbolType > ToRegExpStateElimination::transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to ) { regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } ); for ( const auto & transition: automaton.getTransitionsFromState ( from ) ) if ( transition.second == to ) ret = regexp::RegExpAlternate::alternate ( ret, transition.first.second ); return regexp::simplify::RegExpOptimize::optimize ( ret ); } template < class SymbolType, class StateType > void ToRegExpStateElimination::extendExtendedNFA ( automaton::ExtendedNFA < SymbolType, StateType > & automaton ) { const StateType & initState = automaton.getInitialState ( ); if ( automaton.getFinalStates ( ).count ( initState ) > 0 || !automaton.getTransitionsToState ( initState ).empty ( ) ) { StateType q0 = common::createUnique ( initState, automaton.getStates ( ) ); automaton.addState ( q0 ); regexp::UnboundedRegExpStructure < DefaultSymbolType > regexp { regexp::UnboundedRegExpEpsilon < DefaultSymbolType > ( ) }; automaton.addTransition ( q0, regexp, initState ); automaton.setInitialState ( q0 ); } if ( automaton.getFinalStates ( ).size ( ) > 1 ) { StateType f = common::createUnique ( label::FinalStateLabel::instance < StateType > ( ), automaton.getStates ( ) ); automaton.addState ( f ); for ( const StateType & state : automaton.getFinalStates ( ) ) { regexp::UnboundedRegExpStructure < SymbolType > regexp { regexp::UnboundedRegExpEpsilon < SymbolType > ( ) }; automaton.addTransition ( state, regexp, f ); } ext::set < StateType > newFinalStates; newFinalStates.insert ( f ); automaton.setFinalStates ( newFinalStates ); } } } /* namespace convert */ } /* namespace automaton */ #endif /* TO_REG_EXP_STATE_ELIMINATION_H_ */