Newer
Older
* This file is part of Algorithms library toolkit.
* Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz)
* Algorithms library toolkit is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* Algorithms library toolkit is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with Algorithms library toolkit. If not, see <http://www.gnu.org/licenses/>.
*
#ifndef TO_REG_EXP_STATE_ELIMINATION_H_
#define TO_REG_EXP_STATE_ELIMINATION_H_
#include <regexp/unbounded/UnboundedRegExp.h>
#include <automaton/FSM/DFA.h>
#include <automaton/FSM/NFA.h>
#include <automaton/FSM/MultiInitialStateNFA.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/FSM/ExtendedNFA.h>
#include <automaton/Automaton.h>
#include <regexp/simplify/RegExpOptimize.h>
#include <regexp/transform/RegExpAlternate.h>
#include <regexp/transform/RegExpConcatenate.h>
#include <regexp/transform/RegExpIterate.h>
#include <common/createUnique.hpp>
#include <label/FinalStateLabel.h>
* Converts a finite automaton to a regular expression using using the State Elimination algorithm (Melichar: Jazyky a překlady 2.118).
* This algorithm returns the regular expression as regexp::UnboundedRegExp.
* @tparam SymbolType the type of input symbols of the accepted automaton
* @tparam StateType the type of states of the accepted automaton
* @param automaton finite automaton to convert
* @return unbounded regular expression equivalent to the original automaton
template < class T, class SymbolType = typename automaton::SymbolTypeOfAutomaton < T >, class StateType = typename automaton::StateTypeOfAutomaton < T > >
static regexp::UnboundedRegExp < SymbolType > convert ( const T & automaton );
/**
* Helper function to create new initial and final states in the automaton for the algorithm.
* @tparam SymbolType the type of input symbols of the accepted automaton
* @tparam StateType the type of states of the accepted automaton
* @param automaton extended finite automaton
*/
template < class SymbolType, class StateType >
static void extendExtendedNFA(automaton::ExtendedNFA < SymbolType, StateType > & automaton );
/**
* Helper function to create a regexp from all transitions between states @p from and @p to.
* It creates the alternation regexp of all such transitions.
* @tparam SymbolType the type of input symbols of the accepted automaton
* @tparam StateType the type of states of the accepted automaton
* @param automaton automaton to select the transitions
* @param from source state in @param automaton
* @param to destination state in @param automaton
* @return the regular expression node representing the transitions between states @p from and @p to
*/
template < class SymbolType, class StateType >
static const regexp::UnboundedRegExpStructure < SymbolType > transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to );
/**
* Helper function for the elimination of a single state according to the algorithm.
* @tparam SymbolType the type of input symbols of the accepted automaton
* @tparam StateType the type of states of the accepted automaton
* @param extendedAutomaton automaton for the elimination
* @param state state to eliminate
* @return the @p extendedAutomaton after the elimination of a state @state.
*/
template < class SymbolType, class StateType >
static automaton::ExtendedNFA < SymbolType, StateType > eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & state );
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
template < class T, class SymbolType, class StateType >
regexp::UnboundedRegExp < SymbolType > ToRegExpStateElimination::convert ( const T & automaton ) {
if ( automaton.getFinalStates ( ).size ( ) == 0 )
return regexp::UnboundedRegExp < SymbolType > ( regexp::UnboundedRegExpStructure < DefaultSymbolType > ( regexp::UnboundedRegExpEmpty < DefaultSymbolType > ( ) ) );
// steps 1 + 2
automaton::ExtendedNFA < SymbolType, StateType > extendedAutomaton ( automaton );
extendExtendedNFA ( extendedAutomaton );
// step 3 - Exterminate!
// select all states that are neither final nor initial
ext::set < StateType > statesToEliminate = extendedAutomaton.getStates ( );
statesToEliminate.erase ( extendedAutomaton.getInitialState ( ) );
statesToEliminate.erase ( * extendedAutomaton.getFinalStates ( ).begin ( ) );
for ( const StateType & state : statesToEliminate )
extendedAutomaton = eliminateState ( extendedAutomaton, state );
// step 4
regexp::UnboundedRegExpStructure < SymbolType > finalStateLoop = regexp::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, * extendedAutomaton.getFinalStates ( ).begin ( ), * extendedAutomaton.getFinalStates ( ).begin ( ) ) );
regexp::UnboundedRegExpStructure < SymbolType > initialToFinalState = regexp::RegExpConcatenate::concatenate ( transitionsToRegExp ( extendedAutomaton, extendedAutomaton.getInitialState ( ), *extendedAutomaton.getFinalStates ( ).begin ( ) ),finalStateLoop );
return regexp::UnboundedRegExp < SymbolType > ( regexp::simplify::RegExpOptimize::optimize ( initialToFinalState ) );
}
template < class SymbolType, class StateType >
automaton::ExtendedNFA < SymbolType, StateType > ToRegExpStateElimination::eliminateState ( const automaton::ExtendedNFA < SymbolType, StateType > & extendedAutomaton, const StateType & q ) {
automaton::ExtendedNFA < SymbolType, StateType > newAutomaton ( extendedAutomaton.getInitialState ( ) ); // sure that q is neither initial nor final (follows from step 2 - extending ExtendedNFA)
newAutomaton.setStates ( extendedAutomaton.getStates ( ) );
newAutomaton.removeState ( q ); // preserve all states but q (the one to eliminate)
newAutomaton.setInputAlphabet ( extendedAutomaton.getInputAlphabet ( ) );
newAutomaton.setFinalStates ( extendedAutomaton.getFinalStates ( ) );
for ( const StateType & p: newAutomaton.getStates ( ) ) {
for ( const StateType & r : newAutomaton.getStates ( ) ) {
regexp::UnboundedRegExpStructure < SymbolType > concat = transitionsToRegExp ( extendedAutomaton, p, q );
concat = regexp::RegExpConcatenate::concatenate ( concat, regexp::RegExpIterate::iterate ( transitionsToRegExp ( extendedAutomaton, q, q ) ) );
concat = regexp::RegExpConcatenate::concatenate ( concat, transitionsToRegExp ( extendedAutomaton, q, r ) );
regexp::UnboundedRegExpStructure < SymbolType > alt = regexp::RegExpAlternate::alternate ( concat, transitionsToRegExp ( extendedAutomaton, p, r ) );
newAutomaton.addTransition ( p, regexp::simplify::RegExpOptimize::optimize ( alt ), r );
}
}
return newAutomaton;
}
template < class SymbolType, class StateType >
const regexp::UnboundedRegExpStructure < SymbolType > ToRegExpStateElimination::transitionsToRegExp ( const automaton::ExtendedNFA < SymbolType, StateType > & automaton, const StateType & from, const StateType & to ) {
regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } );
for ( const auto & transition: automaton.getTransitionsFromState ( from ) )
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
ret = regexp::RegExpAlternate::alternate ( ret, transition.first.second );
return regexp::simplify::RegExpOptimize::optimize ( ret );
}
template < class SymbolType, class StateType >
void ToRegExpStateElimination::extendExtendedNFA ( automaton::ExtendedNFA < SymbolType, StateType > & automaton ) {
const StateType & initState = automaton.getInitialState ( );
if ( automaton.getFinalStates ( ).count ( initState ) > 0 || automaton.getTransitionsToState ( initState ).size ( ) > 0 ) {
StateType q0 = common::createUnique ( initState, automaton.getStates ( ) );
automaton.addState ( q0 );
regexp::UnboundedRegExpStructure < DefaultSymbolType > regexp { regexp::UnboundedRegExpEpsilon < DefaultSymbolType > ( ) };
automaton.addTransition ( q0, regexp, initState );
automaton.setInitialState ( q0 );
}
if ( automaton.getFinalStates ( ).size ( ) > 1 ) {
StateType f = common::createUnique ( label::FinalStateLabel::instance < StateType > ( ), automaton.getStates ( ) );
automaton.addState ( f );
for ( const StateType & state : automaton.getFinalStates ( ) ) {
regexp::UnboundedRegExpStructure < SymbolType > regexp { regexp::UnboundedRegExpEpsilon < SymbolType > ( ) };
automaton.addTransition ( state, regexp, f );
}
ext::set < StateType > newFinalStates;
newFinalStates.insert ( f );
automaton.setFinalStates ( newFinalStates );
}
}