Skip to content
Snippets Groups Projects
ToRegExpStateElimination.h 9.13 KiB
Newer Older
  • Learn to ignore specific revisions
  • Tomáš Pecka's avatar
    Tomáš Pecka committed
    /*
    
     * ToRegExpStateElimination.h
    
    Tomáš Pecka's avatar
    Tomáš Pecka committed
     *
    
     * This file is part of Algorithms library toolkit.
     * Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz)
    
     * Algorithms library toolkit is free software: you can redistribute it and/or modify
     * it under the terms of the GNU General Public License as published by
     * the Free Software Foundation, either version 3 of the License, or
     * (at your option) any later version.
    
     * Algorithms library toolkit is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
    
     * You should have received a copy of the GNU General Public License
     * along with Algorithms library toolkit.  If not, see <http://www.gnu.org/licenses/>.
     *
    
    Tomáš Pecka's avatar
    Tomáš Pecka committed
     *  Created on: 9. 2. 2014
    
    Jan Trávníček's avatar
    Jan Trávníček committed
     *	  Author: Tomas Pecka
    
    #ifndef TO_REG_EXP_STATE_ELIMINATION_H_
    #define TO_REG_EXP_STATE_ELIMINATION_H_
    
    #include <regexp/unbounded/UnboundedRegExp.h>
    
    Tomáš Pecka's avatar
    Tomáš Pecka committed
    
    #include <automaton/FSM/DFA.h>
    #include <automaton/FSM/NFA.h>
    
    #include <automaton/FSM/MultiInitialStateNFA.h>
    
    Tomáš Pecka's avatar
    Tomáš Pecka committed
    #include <automaton/FSM/EpsilonNFA.h>
    #include <automaton/FSM/ExtendedNFA.h>
    
    
    #include <automaton/Automaton.h>
    
    #include <regexp/simplify/RegExpOptimize.h>
    #include <regexp/transform/RegExpAlternate.h>
    #include <regexp/transform/RegExpConcatenate.h>
    #include <regexp/transform/RegExpIterate.h>
    
    #include <common/createUnique.hpp>
    
    #include <label/FinalStateLabel.h>
    
    
    namespace automaton {
    
    namespace convert {
    
     * Converts a finite automaton to a regular expression using using the State Elimination algorithm (Melichar: Jazyky a překlady 2.118).
     * This algorithm returns the regular expression as regexp::UnboundedRegExp.
    
    Tomáš Pecka's avatar
    Tomáš Pecka committed
     */
    
    class ToRegExpStateElimination {
    
    Tomáš Pecka's avatar
    Tomáš Pecka committed
    public:
    
    Jan Trávníček's avatar
    Jan Trávníček committed
    	/**
    	 * Performs conversion.
    
    	 * @tparam T type of the finite automaton
    
    	 * @tparam SymbolType the type of input symbols of the accepted automaton
    	 * @tparam StateType the type of states of the accepted automaton
    
    	 * @param automaton finite automaton to convert
    	 * @return unbounded regular expression equivalent to the original automaton
    
    Jan Trávníček's avatar
    Jan Trávníček committed
    	 */
    
    	template < class T, class SymbolType = typename automaton::SymbolTypeOfAutomaton < T >, class StateType = typename automaton::StateTypeOfAutomaton < T > >
    	static regexp::UnboundedRegExp < SymbolType > convert ( const T & automaton );
    
    Tomáš Pecka's avatar
    Tomáš Pecka committed
    
    private:
    
    	/**
    	 * Helper function to create new initial and final states in the automaton for the algorithm.
    
    	 * @tparam SymbolType the type of input symbols of the accepted automaton
    	 * @tparam StateType the type of states of the accepted automaton
    
    	 * @param automaton extended finite automaton
    	 */
    
    	template < class SymbolType, class StateType >
    	static void extendExtendedNFA(automaton::ExtendedNFA < SymbolType, StateType > & automaton );
    
    	/**
    	 * Helper function to create a regexp from all transitions between states @p from and @p to.
    	 * It creates the alternation regexp of all such transitions.
    
    	 * @tparam SymbolType the type of input symbols of the accepted automaton
    	 * @tparam StateType the type of states of the accepted automaton
    
    	 * @param automaton automaton to select the transitions
    	 * @param from source state in @param automaton
    	 * @param to   destination state in @param automaton
    	 * @return the regular expression node representing the transitions between states @p from and @p to
    	 */
    
    	template < class SymbolType, class StateType >
    	static const regexp::UnboundedRegExpStructure < SymbolType > transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to );
    
    	/**
    	 * Helper function for the elimination of a single state according to the algorithm.
    
    	 * @tparam SymbolType the type of input symbols of the accepted automaton
    	 * @tparam StateType the type of states of the accepted automaton
    
    	 * @param extendedAutomaton automaton for the elimination
    	 * @param state state to eliminate
    	 * @return the @p extendedAutomaton after the elimination of a state @state.
    	 */
    
    	template < class SymbolType, class StateType >
    	static automaton::ExtendedNFA < SymbolType, StateType > eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & state );
    
    template < class T, class SymbolType, class StateType >
    regexp::UnboundedRegExp < SymbolType > ToRegExpStateElimination::convert ( const T & automaton ) {
    	if ( automaton.getFinalStates ( ).size ( ) == 0 )
    		return regexp::UnboundedRegExp < SymbolType > ( regexp::UnboundedRegExpStructure < DefaultSymbolType > ( regexp::UnboundedRegExpEmpty < DefaultSymbolType > ( ) ) );
    
    	// steps 1 + 2
    	automaton::ExtendedNFA < SymbolType, StateType > extendedAutomaton ( automaton );
    	extendExtendedNFA ( extendedAutomaton );
    
    	// step 3 - Exterminate!
    	// select all states that are neither final nor initial
    	ext::set < StateType > statesToEliminate = extendedAutomaton.getStates ( );
    	statesToEliminate.erase ( extendedAutomaton.getInitialState ( ) );
    	statesToEliminate.erase ( * extendedAutomaton.getFinalStates ( ).begin ( ) );
    
    	for ( const StateType & state : statesToEliminate )
    		extendedAutomaton = eliminateState ( extendedAutomaton, state );
    
    	// step 4
    	regexp::UnboundedRegExpStructure < SymbolType > finalStateLoop = regexp::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, * extendedAutomaton.getFinalStates ( ).begin ( ), * extendedAutomaton.getFinalStates ( ).begin ( ) ) );
    	regexp::UnboundedRegExpStructure < SymbolType > initialToFinalState = regexp::RegExpConcatenate::concatenate ( transitionsToRegExp ( extendedAutomaton, extendedAutomaton.getInitialState ( ), *extendedAutomaton.getFinalStates ( ).begin ( ) ),finalStateLoop );
    	return regexp::UnboundedRegExp < SymbolType > ( regexp::simplify::RegExpOptimize::optimize ( initialToFinalState ) );
    }
    
    template < class SymbolType, class StateType >
    automaton::ExtendedNFA < SymbolType, StateType > ToRegExpStateElimination::eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & q ) {
    	automaton::ExtendedNFA < SymbolType, StateType > newAutomaton ( extendedAutomaton.getInitialState ( ) ); // sure that q is neither initial nor final (follows from step 2 - extending ExtendedNFA)
    	newAutomaton.setStates ( extendedAutomaton.getStates ( ) );
    	newAutomaton.removeState ( q ); // preserve all states but q (the one to eliminate)
    	newAutomaton.setInputAlphabet ( extendedAutomaton.getInputAlphabet ( ) );
    	newAutomaton.setFinalStates ( extendedAutomaton.getFinalStates ( ) );
    
    	for ( const StateType & p: newAutomaton.getStates ( ) ) {
    		for ( const StateType & r : newAutomaton.getStates ( ) ) {
    			regexp::UnboundedRegExpStructure < SymbolType > concat = transitionsToRegExp ( extendedAutomaton, p, q );
    			concat = regexp::RegExpConcatenate::concatenate ( concat, regexp::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, q, q ) ) );
    			concat = regexp::RegExpConcatenate::concatenate ( concat, transitionsToRegExp ( extendedAutomaton, q, r ) );
    
    			regexp::UnboundedRegExpStructure < SymbolType > alt = regexp::RegExpAlternate::alternate ( concat, transitionsToRegExp ( extendedAutomaton, p, r ) );
    
    			newAutomaton.addTransition ( p, regexp::simplify::RegExpOptimize::optimize ( alt ), r );
    		}
    	}
    
    	return newAutomaton;
    }
    
    template < class SymbolType, class StateType >
    const regexp::UnboundedRegExpStructure < SymbolType > ToRegExpStateElimination::transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to ) {
    	regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } );
    
    	for ( const auto & transition: automaton.getTransitionsFromState ( from ) )
    		if ( transition.second.count ( to ) > 0)
    			ret = regexp::RegExpAlternate::alternate ( ret, transition.first.second );
    
    	return regexp::simplify::RegExpOptimize::optimize ( ret );
    }
    
    template < class SymbolType, class StateType >
    void ToRegExpStateElimination::extendExtendedNFA ( automaton::ExtendedNFA < SymbolType, StateType > & automaton ) {
    	const StateType & initState = automaton.getInitialState ( );
    	if ( automaton.getFinalStates ( ).count ( initState ) > 0 || automaton.getTransitionsToState ( initState ).size ( ) > 0 ) {
    		StateType q0 = common::createUnique ( initState, automaton.getStates ( ) );
    		automaton.addState ( q0 );
    
    		regexp::UnboundedRegExpStructure < DefaultSymbolType > regexp { regexp::UnboundedRegExpEpsilon < DefaultSymbolType > ( ) };
    		automaton.addTransition ( q0, regexp, initState );
    
    		automaton.setInitialState ( q0 );
    	}
    
    	if ( automaton.getFinalStates ( ).size ( ) > 1 ) {
    		StateType f = common::createUnique ( label::FinalStateLabel::instance < StateType > ( ), automaton.getStates ( ) );
    		automaton.addState ( f );
    
    		for ( const StateType & state : automaton.getFinalStates ( ) ) {
    			regexp::UnboundedRegExpStructure < SymbolType > regexp { regexp::UnboundedRegExpEpsilon < SymbolType > ( ) };
    			automaton.addTransition ( state, regexp, f );
    		}
    
    		ext::set < StateType > newFinalStates;
    		newFinalStates.insert ( f );
    		automaton.setFinalStates ( newFinalStates );
    	}
    }
    
    
    } /* namespace convert */
    
    } /* namespace automaton */
    
    #endif /* TO_REG_EXP_STATE_ELIMINATION_H_ */