From 95f28a3fa5d70eed319dd290934fa0de7e071a95 Mon Sep 17 00:00:00 2001 From: Tomas Pecka <peckato1@fit.cvut.cz> Date: Mon, 25 Mar 2019 16:14:51 +0100 Subject: [PATCH] Algo: FA to RE conversion using the algorithm from the proof of the DFA->RE theorem --- .../src/automaton/convert/ToRegExpKleene.cpp | 47 ++++ .../src/automaton/convert/ToRegExpKleene.h | 202 ++++++++++++++++++ .../test-src/tests/conversionsTest.cpp | 5 + examples2/automaton/DFA4.txt | 4 + 4 files changed, 258 insertions(+) create mode 100644 alib2algo/src/automaton/convert/ToRegExpKleene.cpp create mode 100644 alib2algo/src/automaton/convert/ToRegExpKleene.h create mode 100644 examples2/automaton/DFA4.txt diff --git a/alib2algo/src/automaton/convert/ToRegExpKleene.cpp b/alib2algo/src/automaton/convert/ToRegExpKleene.cpp new file mode 100644 index 0000000000..e8ae6354da --- /dev/null +++ b/alib2algo/src/automaton/convert/ToRegExpKleene.cpp @@ -0,0 +1,47 @@ +/* + * ToRegExpKleene.cpp + * + * Created on: 25. 3. 2019 + * Author: Tomas Pecka + */ + +#include "ToRegExpKleene.h" +#include <registration/AlgoRegistration.hpp> + +namespace automaton { + +namespace convert { + +auto ToRegExpKleeneDFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::DFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation ( +"Performs conversion.\n\ +\n\ +@param automaton finite automaton to convert\n\ +@return unbounded regular expression equivalent to the original automaton" ); + +auto ToRegExpKleeneNFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::NFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation ( +"Performs conversion.\n\ +\n\ +@param automaton finite automaton to convert\n\ +@return unbounded regular expression equivalent to the original automaton" ); + +auto ToRegExpKleeneENFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::EpsilonNFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation ( +"Performs conversion.\n\ +\n\ +@param automaton finite automaton to convert\n\ +@return unbounded regular expression equivalent to the original automaton" ); + +auto ToRegExpKleeneMISNFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::MultiInitialStateNFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation ( +"Performs conversion.\n\ +\n\ +@param automaton finite automaton to convert\n\ +@return unbounded regular expression equivalent to the original automaton" ); + +auto ToRegExpKleeneMISENFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::MultiInitialStateEpsilonNFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation ( +"Performs conversion.\n\ +\n\ +@param automaton finite automaton to convert\n\ +@return unbounded regular expression equivalent to the original automaton" ); + +} /* namespace convert */ + +} /* namespace automaton */ diff --git a/alib2algo/src/automaton/convert/ToRegExpKleene.h b/alib2algo/src/automaton/convert/ToRegExpKleene.h new file mode 100644 index 0000000000..87a9ac15e6 --- /dev/null +++ b/alib2algo/src/automaton/convert/ToRegExpKleene.h @@ -0,0 +1,202 @@ +/* + * ToRegExpKleene.h + * + * This file is part of Algorithms library toolkit. + * Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz) + + * Algorithms library toolkit is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * Algorithms library toolkit is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with Algorithms library toolkit. If not, see <http://www.gnu.org/licenses/>. + * + * Created on: 25. 3. 2019 + * Author: Tomas Pecka + */ + +#ifndef TO_REG_EXP_KLEENE_H_ +#define TO_REG_EXP_KLEENE_H_ + +#include <regexp/unbounded/UnboundedRegExp.h> + +#include <automaton/FSM/DFA.h> +#include <automaton/FSM/NFA.h> +#include <automaton/FSM/EpsilonNFA.h> +#include <automaton/FSM/MultiInitialStateNFA.h> +#include <automaton/FSM/MultiInitialStateEpsilonNFA.h> +#include <automaton/Automaton.h> + +#include <regexp/simplify/RegExpOptimize.h> +#include <regexp/transform/RegExpAlternate.h> +#include <regexp/transform/RegExpConcatenate.h> +#include <regexp/transform/RegExpIterate.h> + +#include <common/createUnique.hpp> + +#include <label/FinalStateLabel.h> + +namespace automaton { + +namespace convert { + +/** + * Converts a finite automaton to a regular expression using using the algorithm from Kleene Theorem (Hopcroft 2nd edition: 3.2.1 - Th 3.4). + * This algorithm returns the regular expression as regexp::UnboundedRegExp. + */ +class ToRegExpKleene { +public: + /** + * Performs conversion. + * @tparam SymbolType the type of input symbols of the accepted automaton + * @tparam StateType the type of states of the accepted automaton + * @param automaton finite automaton to convert + * @return unbounded regular expression equivalent to the original automaton + */ + template < class SymbolType, class StateType > + static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::DFA < SymbolType, StateType > & automaton ); + + /** + * @override + */ + template < class SymbolType, class StateType > + static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::NFA < SymbolType, StateType > & automaton ); + + /** + * @override + */ + template < class SymbolType, class StateType > + static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::MultiInitialStateNFA < SymbolType, StateType > & automaton ); + + /** + * @override + */ + template < class SymbolType, class EpsilonType, class StateType > + static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > & automaton ); + + /** + * @override + */ + template < class SymbolType, class EpsilonType, class StateType > + static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::EpsilonNFA < SymbolType, EpsilonType, StateType > & automaton ); + +private: + /** + * Helper function to create a regexp from all transitions between states @p from and @p to. + * It creates the alternation regexp of all such transitions. + * @tparam SymbolType the type of input symbols of the accepted automaton + * @tparam StateType the type of states of the accepted automaton + * @param automaton automaton to select the transitions + * @param from source state in @param automaton + * @param to destination state in @param automaton + * @return the regular expression node representing the transitions between states @p from and @p to + */ + template < class SymbolType, class EpsilonType, class StateType > + static const regexp::UnboundedRegExpStructure < SymbolType > transitionsToRegExp ( const automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > & automaton, const StateType & from, const StateType & to ); + + /** + * @return Wrapper for map.at ( ) that returns default value of UnboundedRegExpEmpty if key does not exist + */ + template < class SymbolType, class StateType > + static const regexp::UnboundedRegExpStructure < SymbolType > RGetDefault ( const std::map < std::pair < StateType, StateType >, regexp::UnboundedRegExpStructure < SymbolType > > & R, const std::pair < StateType, StateType > & key ); +}; + +template < class SymbolType, class StateType > +const regexp::UnboundedRegExpStructure < SymbolType > ToRegExpKleene::RGetDefault ( const std::map < std::pair < StateType, StateType >, regexp::UnboundedRegExpStructure < SymbolType > > & R, const std::pair < StateType, StateType > & key ) { + auto it = R.find ( key ); + return it != R.end ( ) ? it -> second : regexp::UnboundedRegExpStructure < SymbolType > ( regexp::UnboundedRegExpEmpty < SymbolType > ( ) ); +} + +template < class SymbolType, class StateType > +regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::DFA < SymbolType, StateType > & automaton ) { + return convert ( automaton::MultiInitialStateEpsilonNFA < SymbolType, DefaultEpsilonType, StateType > ( automaton ) ); +} + +template < class SymbolType, class StateType > +regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::NFA < SymbolType, StateType > & automaton ) { + return convert ( automaton::MultiInitialStateEpsilonNFA < SymbolType, DefaultEpsilonType, StateType > ( automaton ) ); +} + +template < class SymbolType, class EpsilonType, class StateType > +regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::EpsilonNFA < SymbolType, EpsilonType, StateType > & automaton ) { + return convert ( automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > ( automaton ) ); +} + +template < class SymbolType, class StateType > +regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::MultiInitialStateNFA < SymbolType, StateType > & automaton ) { + return convert ( automaton::MultiInitialStateEpsilonNFA < SymbolType, DefaultEpsilonType, StateType > ( automaton ) ); +} + +template < class SymbolType, class EpsilonType, class StateType > +regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > & automaton ) { + std::vector < std::map < std::pair < StateType, StateType >, regexp::UnboundedRegExpStructure < SymbolType > > > R ( 1 ); + size_t k = 0; + + // initialize R [ 0 ] + for ( const StateType & a : automaton.getStates ( ) ) + for ( const StateType & b : automaton.getStates ( ) ) + R [ k ] [ std::make_pair ( a, b ) ] = transitionsToRegExp ( automaton, a, b ); + k += 1; + + // initialize R [ 1 ] ... R [ k ] + for ( const StateType & kState : automaton.getStates ( ) ) { + R.push_back ( std::map < std::pair < StateType, StateType >, regexp::UnboundedRegExpStructure < SymbolType > > ( ) ); + + for ( const StateType & a : automaton.getStates ( ) ) { + for ( const StateType & b : automaton.getStates ( ) ) { + // TODO regexp::RegExpConcatenate on unbounded - variadic parameter count + + const regexp::UnboundedRegExpStructure < SymbolType > re = regexp::RegExpConcatenate::concatenate ( + RGetDefault ( R [ k - 1 ], std::make_pair ( a, kState ) ), + regexp::RegExpConcatenate::concatenate ( + regexp::RegExpIterate::iterate ( RGetDefault ( R [ k - 1 ], std::make_pair ( kState, kState ) ) ), + RGetDefault ( R [ k - 1 ], std::make_pair ( kState, b ) ) ) ); + + R [ k ] [ std::make_pair ( a, b ) ] = regexp::simplify::RegExpOptimize::optimize ( regexp::RegExpAlternate::alternate ( RGetDefault ( R [ k - 1 ], std::make_pair ( a, b ) ), re ) ); + } + } + + k += 1; + } + + regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } ); + for ( const auto & i : automaton.getInitialStates ( ) ) { + for ( const auto & f : automaton.getFinalStates ( ) ) { + ret = regexp::RegExpAlternate::alternate ( ret, RGetDefault ( R [ k - 1 ], std::make_pair ( i, f ) ) ); + } + } + + return regexp::UnboundedRegExp < SymbolType > ( ret ); +} + + +template < class SymbolType, class EpsilonType, class StateType > +const regexp::UnboundedRegExpStructure < SymbolType > ToRegExpKleene::transitionsToRegExp ( const automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > & automaton, const StateType & from, const StateType & to ) { + regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } ); + + if ( from == to ) + ret.setStructure ( regexp::UnboundedRegExpEpsilon < SymbolType > ( ) ); + + for ( const auto & transition: automaton.getTransitionsFromState ( from ) ) { + if ( transition.second.count ( to ) ) { + if ( transition.first.second.template is < SymbolType > ( ) ) + ret = regexp::RegExpAlternate::alternate ( ret, regexp::UnboundedRegExpStructure < SymbolType > ( regexp::UnboundedRegExpSymbol < SymbolType > ( transition.first.second.template get < SymbolType > ( ) ) ) ); + else + ret = regexp::RegExpAlternate::alternate ( ret, regexp::UnboundedRegExpStructure < SymbolType > ( regexp::UnboundedRegExpEpsilon < SymbolType > ( ) ) ); + } + } + + return regexp::simplify::RegExpOptimize::optimize ( ret ); +} + +} /* namespace convert */ + +} /* namespace automaton */ + +#endif /* TO_REG_EXP_KLEENE_H_ */ diff --git a/alib2integrationtest/test-src/tests/conversionsTest.cpp b/alib2integrationtest/test-src/tests/conversionsTest.cpp index 844ce64e8a..325a8d2d1b 100644 --- a/alib2integrationtest/test-src/tests/conversionsTest.cpp +++ b/alib2integrationtest/test-src/tests/conversionsTest.cpp @@ -30,11 +30,16 @@ TEST_CASE ( "FA-RG-RE conversions test", "[integration]" ) { "automaton::convert::ToRegExpStateElimination - | regexp::convert::ToAutomatonDerivation -", "automaton::convert::ToRegExpStateElimination - | regexp::convert::ToAutomatonGlushkov -", "automaton::convert::ToRegExpStateElimination - | regexp::convert::ToAutomatonThompson -", + "automaton::convert::ToRegExpKleene - | regexp::convert::ToAutomatonDerivation -", + "automaton::convert::ToRegExpKleene - | regexp::convert::ToAutomatonGlushkov -", + "automaton::convert::ToRegExpKleene - | regexp::convert::ToAutomatonThompson -", "automaton::convert::ToRegExpAlgebraic - | regexp::convert::ToGrammarRightRGDerivation - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -", "automaton::convert::ToRegExpAlgebraic - | regexp::convert::ToGrammarRightRGGlushkov - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -", "automaton::convert::ToRegExpStateElimination - | regexp::convert::ToGrammarRightRGDerivation - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -", "automaton::convert::ToRegExpStateElimination - | regexp::convert::ToGrammarRightRGGlushkov - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -", + "automaton::convert::ToRegExpKleene - | regexp::convert::ToGrammarRightRGDerivation - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -", + "automaton::convert::ToRegExpKleene - | regexp::convert::ToGrammarRightRGGlushkov - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -", "automaton::convert::ToGrammarLeftRG - | grammar::convert::ToRegExp - | regexp::convert::ToAutomatonDerivation -", "automaton::convert::ToGrammarRightRG - | grammar::convert::ToRegExp - | regexp::convert::ToAutomatonDerivation -", diff --git a/examples2/automaton/DFA4.txt b/examples2/automaton/DFA4.txt new file mode 100644 index 0000000000..7185db4997 --- /dev/null +++ b/examples2/automaton/DFA4.txt @@ -0,0 +1,4 @@ +DFA a b +>1 1 2 +<2 3 2 +<3 1 2 -- GitLab