ToRegExpStateElimination.h

/*
 * ToRegExpStateElimination.h
 *
 * This file is part of Algorithms library toolkit.
 * Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz)

 * Algorithms library toolkit is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.

 * Algorithms library toolkit is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.

 * You should have received a copy of the GNU General Public License
 * along with Algorithms library toolkit.  If not, see <http://www.gnu.org/licenses/>.
 *
 *  Created on: 9. 2. 2014
 *	  Author: Tomas Pecka
 */

#ifndef TO_REG_EXP_STATE_ELIMINATION_H_
#define TO_REG_EXP_STATE_ELIMINATION_H_

#include <regexp/unbounded/UnboundedRegExp.h>

#include <automaton/FSM/DFA.h>
#include <automaton/FSM/NFA.h>
#include <automaton/FSM/MultiInitialStateNFA.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/FSM/ExtendedNFA.h>

#include <regexp/simplify/RegExpOptimize.h>
#include <regexp/transform/RegExpAlternate.h>
#include <regexp/transform/RegExpConcatenate.h>
#include <regexp/transform/RegExpIterate.h>

#include <common/createUnique.hpp>

#include <label/FinalStateLabel.h>

namespace automaton {

namespace convert {

/**
 * Converts a finite automaton to a regular expression using using the State Elimination algorithm (Melichar: Jazyky a překlady 2.118).
 * This algorithm returns the regular expression as regexp::UnboundedRegExp.
 */
class ToRegExpStateElimination {
public:
	/**
	 * Performs conversion.
	 * @tparam T type of the finite automaton
	 * @tparam SymbolType the type of input symbols of the accepted automaton
	 * @tparam StateType the type of states of the accepted automaton
	 * @param automaton finite automaton to convert
	 * @return unbounded regular expression equivalent to the original automaton
	 */
	template < class T >
	static regexp::UnboundedRegExp < typename T::SymbolType > convert ( const T & automaton );

private:
	/**
	 * Helper function to create new initial and final states in the automaton for the algorithm.
	 * @tparam SymbolType the type of input symbols of the accepted automaton
	 * @tparam StateType the type of states of the accepted automaton
	 * @param automaton extended finite automaton
	 */
	template < class SymbolType, class StateType >
	static void extendExtendedNFA(automaton::ExtendedNFA < SymbolType, StateType > & automaton );

	/**
	 * Helper function to create a regexp from all transitions between states @p from and @p to.
	 * It creates the alternation regexp of all such transitions.
	 * @tparam SymbolType the type of input symbols of the accepted automaton
	 * @tparam StateType the type of states of the accepted automaton
	 * @param automaton automaton to select the transitions
	 * @param from source state in @param automaton
	 * @param to   destination state in @param automaton
	 * @return the regular expression node representing the transitions between states @p from and @p to
	 */
	template < class SymbolType, class StateType >
	static regexp::UnboundedRegExpStructure < SymbolType > transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to );

	/**
	 * Helper function for the elimination of a single state according to the algorithm.
	 * @tparam SymbolType the type of input symbols of the accepted automaton
	 * @tparam StateType the type of states of the accepted automaton
	 * @param extendedAutomaton automaton for the elimination
	 * @param state state to eliminate
	 * @return the @p extendedAutomaton after the elimination of a state @state.
	 */
	template < class SymbolType, class StateType >
	static automaton::ExtendedNFA < SymbolType, StateType > eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & q );
};

template < class T >
regexp::UnboundedRegExp < typename T::SymbolType > ToRegExpStateElimination::convert ( const T & automaton ) {
	using SymbolType = typename T::SymbolType;
	using StateType = typename T::StateType;

	if ( automaton.getFinalStates ( ).empty ( ) )
		return regexp::UnboundedRegExp < SymbolType > ( regexp::UnboundedRegExpStructure < DefaultSymbolType > ( regexp::UnboundedRegExpEmpty < DefaultSymbolType > ( ) ) );

	// steps 1 + 2
	automaton::ExtendedNFA < SymbolType, StateType > extendedAutomaton ( automaton );
	extendExtendedNFA ( extendedAutomaton );

	// step 3 - Exterminate!
	// select all states that are neither final nor initial
	ext::set < StateType > statesToEliminate = extendedAutomaton.getStates ( );
	statesToEliminate.erase ( extendedAutomaton.getInitialState ( ) );
	statesToEliminate.erase ( * extendedAutomaton.getFinalStates ( ).begin ( ) );

	for ( const StateType & state : statesToEliminate )
		extendedAutomaton = eliminateState ( extendedAutomaton, state );

	// step 4
	regexp::UnboundedRegExpStructure < SymbolType > finalStateLoop = regexp::transform::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, * extendedAutomaton.getFinalStates ( ).begin ( ), * extendedAutomaton.getFinalStates ( ).begin ( ) ) );
	regexp::UnboundedRegExpStructure < SymbolType > initialToFinalState = regexp::transform::RegExpConcatenate::concatenate ( transitionsToRegExp ( extendedAutomaton, extendedAutomaton.getInitialState ( ), *extendedAutomaton.getFinalStates ( ).begin ( ) ),finalStateLoop );
	return regexp::UnboundedRegExp < SymbolType > ( regexp::simplify::RegExpOptimize::optimize ( initialToFinalState ) );
}

template < class SymbolType, class StateType >
automaton::ExtendedNFA < SymbolType, StateType > ToRegExpStateElimination::eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & q ) {
	automaton::ExtendedNFA < SymbolType, StateType > newAutomaton ( extendedAutomaton.getInitialState ( ) ); // sure that q is neither initial nor final (follows from step 2 - extending ExtendedNFA)
	newAutomaton.setStates ( extendedAutomaton.getStates ( ) );
	newAutomaton.removeState ( q ); // preserve all states but q (the one to eliminate)
	newAutomaton.setInputAlphabet ( extendedAutomaton.getInputAlphabet ( ) );
	newAutomaton.setFinalStates ( extendedAutomaton.getFinalStates ( ) );

	for ( const StateType & p: newAutomaton.getStates ( ) ) {
		for ( const StateType & r : newAutomaton.getStates ( ) ) {
			regexp::UnboundedRegExpStructure < SymbolType > concat = transitionsToRegExp ( extendedAutomaton, p, q );
			concat = regexp::transform::RegExpConcatenate::concatenate ( concat, regexp::transform::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, q, q ) ) );
			concat = regexp::transform::RegExpConcatenate::concatenate ( concat, transitionsToRegExp ( extendedAutomaton, q, r ) );

			regexp::UnboundedRegExpStructure < SymbolType > alt = regexp::transform::RegExpAlternate::alternate ( concat, transitionsToRegExp ( extendedAutomaton, p, r ) );

			newAutomaton.addTransition ( p, regexp::simplify::RegExpOptimize::optimize ( alt ), r );
		}
	}

	return newAutomaton;
}

template < class SymbolType, class StateType >
regexp::UnboundedRegExpStructure < SymbolType > ToRegExpStateElimination::transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to ) {
	regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } );

	for ( const auto & transition: automaton.getTransitionsFromState ( from ) )
		if ( transition.second == to )
			ret = regexp::transform::RegExpAlternate::alternate ( ret, transition.first.second );

	return regexp::simplify::RegExpOptimize::optimize ( ret );
}

template < class SymbolType, class StateType >
void ToRegExpStateElimination::extendExtendedNFA ( automaton::ExtendedNFA < SymbolType, StateType > & automaton ) {
	const StateType & initState = automaton.getInitialState ( );
	if ( automaton.getFinalStates ( ).count ( initState ) > 0 || !automaton.getTransitionsToState ( initState ).empty ( ) ) {
		StateType q0 = common::createUnique ( initState, automaton.getStates ( ) );
		automaton.addState ( q0 );

		regexp::UnboundedRegExpStructure < DefaultSymbolType > regexp { regexp::UnboundedRegExpEpsilon < DefaultSymbolType > ( ) };
		automaton.addTransition ( q0, regexp, initState );

		automaton.setInitialState ( q0 );
	}

	if ( automaton.getFinalStates ( ).size ( ) > 1 ) {
		StateType f = common::createUnique ( label::FinalStateLabel::instance < StateType > ( ), automaton.getStates ( ) );
		automaton.addState ( f );

		for ( const StateType & state : automaton.getFinalStates ( ) ) {
			regexp::UnboundedRegExpStructure < SymbolType > regexp { regexp::UnboundedRegExpEpsilon < SymbolType > ( ) };
			automaton.addTransition ( state, regexp, f );
		}

		ext::set < StateType > newFinalStates;
		newFinalStates.insert ( f );
		automaton.setFinalStates ( newFinalStates );
	}
}

} /* namespace convert */

} /* namespace automaton */

#endif /* TO_REG_EXP_STATE_ELIMINATION_H_ */