From 95f28a3fa5d70eed319dd290934fa0de7e071a95 Mon Sep 17 00:00:00 2001
From: Tomas Pecka <peckato1@fit.cvut.cz>
Date: Mon, 25 Mar 2019 16:14:51 +0100
Subject: [PATCH] Algo: FA to RE conversion using the algorithm from the proof
 of the DFA->RE theorem

---
 .../src/automaton/convert/ToRegExpKleene.cpp  |  47 ++++
 .../src/automaton/convert/ToRegExpKleene.h    | 202 ++++++++++++++++++
 .../test-src/tests/conversionsTest.cpp        |   5 +
 examples2/automaton/DFA4.txt                  |   4 +
 4 files changed, 258 insertions(+)
 create mode 100644 alib2algo/src/automaton/convert/ToRegExpKleene.cpp
 create mode 100644 alib2algo/src/automaton/convert/ToRegExpKleene.h
 create mode 100644 examples2/automaton/DFA4.txt

diff --git a/alib2algo/src/automaton/convert/ToRegExpKleene.cpp b/alib2algo/src/automaton/convert/ToRegExpKleene.cpp
new file mode 100644
index 0000000000..e8ae6354da
--- /dev/null
+++ b/alib2algo/src/automaton/convert/ToRegExpKleene.cpp
@@ -0,0 +1,47 @@
+/*
+ * ToRegExpKleene.cpp
+ *
+ *  Created on: 25. 3. 2019
+ *	  Author: Tomas Pecka
+ */
+
+#include "ToRegExpKleene.h"
+#include <registration/AlgoRegistration.hpp>
+
+namespace automaton {
+
+namespace convert {
+
+auto ToRegExpKleeneDFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::DFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation (
+"Performs conversion.\n\
+\n\
+@param automaton finite automaton to convert\n\
+@return unbounded regular expression equivalent to the original automaton" );
+
+auto ToRegExpKleeneNFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::NFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation (
+"Performs conversion.\n\
+\n\
+@param automaton finite automaton to convert\n\
+@return unbounded regular expression equivalent to the original automaton" );
+
+auto ToRegExpKleeneENFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::EpsilonNFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation (
+"Performs conversion.\n\
+\n\
+@param automaton finite automaton to convert\n\
+@return unbounded regular expression equivalent to the original automaton" );
+
+auto ToRegExpKleeneMISNFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::MultiInitialStateNFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation (
+"Performs conversion.\n\
+\n\
+@param automaton finite automaton to convert\n\
+@return unbounded regular expression equivalent to the original automaton" );
+
+auto ToRegExpKleeneMISENFA = registration::AbstractRegister < ToRegExpKleene, regexp::UnboundedRegExp < >, const automaton::MultiInitialStateEpsilonNFA < > & > ( ToRegExpKleene::convert, "automaton" ).setDocumentation (
+"Performs conversion.\n\
+\n\
+@param automaton finite automaton to convert\n\
+@return unbounded regular expression equivalent to the original automaton" );
+
+} /* namespace convert */
+
+} /* namespace automaton */
diff --git a/alib2algo/src/automaton/convert/ToRegExpKleene.h b/alib2algo/src/automaton/convert/ToRegExpKleene.h
new file mode 100644
index 0000000000..87a9ac15e6
--- /dev/null
+++ b/alib2algo/src/automaton/convert/ToRegExpKleene.h
@@ -0,0 +1,202 @@
+/*
+ * ToRegExpKleene.h
+ *
+ * This file is part of Algorithms library toolkit.
+ * Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz)
+
+ * Algorithms library toolkit is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * Algorithms library toolkit is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with Algorithms library toolkit.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Created on: 25. 3. 2019
+ *	  Author: Tomas Pecka
+ */
+
+#ifndef TO_REG_EXP_KLEENE_H_
+#define TO_REG_EXP_KLEENE_H_
+
+#include <regexp/unbounded/UnboundedRegExp.h>
+
+#include <automaton/FSM/DFA.h>
+#include <automaton/FSM/NFA.h>
+#include <automaton/FSM/EpsilonNFA.h>
+#include <automaton/FSM/MultiInitialStateNFA.h>
+#include <automaton/FSM/MultiInitialStateEpsilonNFA.h>
+#include <automaton/Automaton.h>
+
+#include <regexp/simplify/RegExpOptimize.h>
+#include <regexp/transform/RegExpAlternate.h>
+#include <regexp/transform/RegExpConcatenate.h>
+#include <regexp/transform/RegExpIterate.h>
+
+#include <common/createUnique.hpp>
+
+#include <label/FinalStateLabel.h>
+
+namespace automaton {
+
+namespace convert {
+
+/**
+ * Converts a finite automaton to a regular expression using using the algorithm from Kleene Theorem (Hopcroft 2nd edition: 3.2.1 - Th 3.4).
+ * This algorithm returns the regular expression as regexp::UnboundedRegExp.
+ */
+class ToRegExpKleene {
+public:
+	/**
+	 * Performs conversion.
+	 * @tparam SymbolType the type of input symbols of the accepted automaton
+	 * @tparam StateType the type of states of the accepted automaton
+	 * @param automaton finite automaton to convert
+	 * @return unbounded regular expression equivalent to the original automaton
+	 */
+	template < class SymbolType, class StateType >
+	static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::DFA < SymbolType, StateType > & automaton );
+
+	/**
+	 * @override
+	 */
+	template < class SymbolType, class StateType >
+	static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::NFA < SymbolType, StateType > & automaton );
+
+	/**
+	 * @override
+	 */
+	template < class SymbolType, class StateType >
+	static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::MultiInitialStateNFA < SymbolType, StateType > & automaton );
+
+	/**
+	 * @override
+	 */
+	template < class SymbolType, class EpsilonType, class StateType >
+	static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > & automaton );
+
+	/**
+	 * @override
+	 */
+	template < class SymbolType, class EpsilonType, class StateType >
+	static regexp::UnboundedRegExp < SymbolType > convert ( const automaton::EpsilonNFA < SymbolType, EpsilonType, StateType > & automaton );
+
+private:
+	/**
+	 * Helper function to create a regexp from all transitions between states @p from and @p to.
+	 * It creates the alternation regexp of all such transitions.
+	 * @tparam SymbolType the type of input symbols of the accepted automaton
+	 * @tparam StateType the type of states of the accepted automaton
+	 * @param automaton automaton to select the transitions
+	 * @param from source state in @param automaton
+	 * @param to   destination state in @param automaton
+	 * @return the regular expression node representing the transitions between states @p from and @p to
+	 */
+	template < class SymbolType, class EpsilonType, class StateType >
+	static const regexp::UnboundedRegExpStructure < SymbolType > transitionsToRegExp ( const automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > & automaton, const StateType & from, const StateType & to );
+
+	/**
+	 * @return Wrapper for map.at ( ) that returns default value of UnboundedRegExpEmpty if key does not exist
+	 */
+	template < class SymbolType, class StateType >
+	static const regexp::UnboundedRegExpStructure < SymbolType > RGetDefault ( const std::map < std::pair < StateType, StateType >, regexp::UnboundedRegExpStructure < SymbolType > > & R, const std::pair < StateType, StateType > & key );
+};
+
+template < class SymbolType, class StateType >
+const regexp::UnboundedRegExpStructure < SymbolType > ToRegExpKleene::RGetDefault ( const std::map < std::pair < StateType, StateType >, regexp::UnboundedRegExpStructure < SymbolType > > & R, const std::pair < StateType, StateType > & key ) {
+	auto it = R.find ( key );
+	return it != R.end ( ) ? it -> second : regexp::UnboundedRegExpStructure < SymbolType > ( regexp::UnboundedRegExpEmpty < SymbolType > ( ) );
+}
+
+template < class SymbolType, class StateType >
+regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::DFA < SymbolType, StateType > & automaton ) {
+	return convert ( automaton::MultiInitialStateEpsilonNFA < SymbolType, DefaultEpsilonType, StateType > ( automaton ) );
+}
+
+template < class SymbolType, class StateType >
+regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::NFA < SymbolType, StateType > & automaton ) {
+	return convert ( automaton::MultiInitialStateEpsilonNFA < SymbolType, DefaultEpsilonType, StateType > ( automaton ) );
+}
+
+template < class SymbolType, class EpsilonType, class StateType >
+regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::EpsilonNFA < SymbolType, EpsilonType, StateType > & automaton ) {
+	return convert ( automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > ( automaton ) );
+}
+
+template < class SymbolType, class StateType >
+regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::MultiInitialStateNFA < SymbolType, StateType > & automaton ) {
+	return convert ( automaton::MultiInitialStateEpsilonNFA < SymbolType, DefaultEpsilonType, StateType > ( automaton ) );
+}
+
+template < class SymbolType, class EpsilonType, class StateType >
+regexp::UnboundedRegExp < SymbolType > ToRegExpKleene::convert ( const automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > & automaton ) {
+	std::vector < std::map < std::pair < StateType, StateType >, regexp::UnboundedRegExpStructure < SymbolType > > > R ( 1 );
+	size_t k = 0;
+
+	// initialize R [ 0 ]
+	for ( const StateType & a : automaton.getStates ( ) )
+		for ( const StateType & b : automaton.getStates ( ) )
+			R [ k ] [ std::make_pair ( a, b ) ] = transitionsToRegExp ( automaton, a, b );
+	k += 1;
+
+	// initialize R [ 1 ] ... R [ k ]
+	for ( const StateType & kState : automaton.getStates ( ) ) {
+		R.push_back ( std::map < std::pair < StateType, StateType >, regexp::UnboundedRegExpStructure < SymbolType > > ( ) );
+
+		for ( const StateType & a : automaton.getStates ( ) ) {
+			for ( const StateType & b : automaton.getStates ( ) ) {
+				// TODO regexp::RegExpConcatenate on unbounded - variadic parameter count
+
+				const regexp::UnboundedRegExpStructure < SymbolType > re = regexp::RegExpConcatenate::concatenate (
+							RGetDefault ( R [ k - 1 ], std::make_pair ( a, kState ) ),
+							regexp::RegExpConcatenate::concatenate (
+								regexp::RegExpIterate::iterate ( RGetDefault ( R [ k - 1 ], std::make_pair ( kState, kState ) ) ),
+								RGetDefault ( R [ k - 1 ], std::make_pair ( kState, b ) ) ) );
+
+				R [ k ] [ std::make_pair ( a, b ) ] = regexp::simplify::RegExpOptimize::optimize ( regexp::RegExpAlternate::alternate ( RGetDefault ( R [ k - 1 ], std::make_pair ( a, b ) ), re ) );
+			}
+		}
+
+		k += 1;
+	}
+
+	regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } );
+	for ( const auto & i : automaton.getInitialStates ( ) ) {
+		for ( const auto & f : automaton.getFinalStates ( ) ) {
+			ret = regexp::RegExpAlternate::alternate ( ret, RGetDefault ( R [ k - 1 ], std::make_pair ( i, f ) ) );
+		}
+	}
+
+	return regexp::UnboundedRegExp < SymbolType > ( ret );
+}
+
+
+template < class SymbolType, class EpsilonType, class StateType >
+const regexp::UnboundedRegExpStructure < SymbolType > ToRegExpKleene::transitionsToRegExp ( const automaton::MultiInitialStateEpsilonNFA < SymbolType, EpsilonType, StateType > & automaton, const StateType & from, const StateType & to ) {
+	regexp::UnboundedRegExpStructure < SymbolType > ret ( regexp::UnboundedRegExpEmpty < SymbolType > { } );
+
+	if ( from == to )
+		ret.setStructure ( regexp::UnboundedRegExpEpsilon < SymbolType > ( ) );
+
+	for ( const auto & transition: automaton.getTransitionsFromState ( from ) ) {
+		if ( transition.second.count ( to ) ) {
+			if ( transition.first.second.template is < SymbolType > ( ) )
+				ret = regexp::RegExpAlternate::alternate ( ret, regexp::UnboundedRegExpStructure < SymbolType > ( regexp::UnboundedRegExpSymbol < SymbolType > ( transition.first.second.template get < SymbolType > ( ) ) ) );
+			else
+				ret = regexp::RegExpAlternate::alternate ( ret, regexp::UnboundedRegExpStructure < SymbolType > ( regexp::UnboundedRegExpEpsilon < SymbolType > ( ) ) );
+		}
+	}
+
+	return regexp::simplify::RegExpOptimize::optimize ( ret );
+}
+
+} /* namespace convert */
+
+} /* namespace automaton */
+
+#endif /* TO_REG_EXP_KLEENE_H_ */
diff --git a/alib2integrationtest/test-src/tests/conversionsTest.cpp b/alib2integrationtest/test-src/tests/conversionsTest.cpp
index 844ce64e8a..325a8d2d1b 100644
--- a/alib2integrationtest/test-src/tests/conversionsTest.cpp
+++ b/alib2integrationtest/test-src/tests/conversionsTest.cpp
@@ -30,11 +30,16 @@ TEST_CASE ( "FA-RG-RE conversions test", "[integration]" ) {
 			"automaton::convert::ToRegExpStateElimination - | regexp::convert::ToAutomatonDerivation -",
 			"automaton::convert::ToRegExpStateElimination - | regexp::convert::ToAutomatonGlushkov   -",
 			"automaton::convert::ToRegExpStateElimination - | regexp::convert::ToAutomatonThompson   -",
+			"automaton::convert::ToRegExpKleene           - | regexp::convert::ToAutomatonDerivation -",
+			"automaton::convert::ToRegExpKleene           - | regexp::convert::ToAutomatonGlushkov   -",
+			"automaton::convert::ToRegExpKleene           - | regexp::convert::ToAutomatonThompson   -",
 
 			"automaton::convert::ToRegExpAlgebraic        - | regexp::convert::ToGrammarRightRGDerivation - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -",
 			"automaton::convert::ToRegExpAlgebraic        - | regexp::convert::ToGrammarRightRGGlushkov   - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -",
 			"automaton::convert::ToRegExpStateElimination - | regexp::convert::ToGrammarRightRGDerivation - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -",
 			"automaton::convert::ToRegExpStateElimination - | regexp::convert::ToGrammarRightRGGlushkov   - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -",
+			"automaton::convert::ToRegExpKleene           - | regexp::convert::ToGrammarRightRGDerivation - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -",
+			"automaton::convert::ToRegExpKleene           - | regexp::convert::ToGrammarRightRGGlushkov   - | grammar::convert::ToGrammarLeftRG - | grammar::convert::ToAutomaton -",
 
 			"automaton::convert::ToGrammarLeftRG  - | grammar::convert::ToRegExp - | regexp::convert::ToAutomatonDerivation -",
 			"automaton::convert::ToGrammarRightRG - | grammar::convert::ToRegExp - | regexp::convert::ToAutomatonDerivation -",
diff --git a/examples2/automaton/DFA4.txt b/examples2/automaton/DFA4.txt
new file mode 100644
index 0000000000..7185db4997
--- /dev/null
+++ b/examples2/automaton/DFA4.txt
@@ -0,0 +1,4 @@
+DFA a b
+>1 1 2
+<2 3 2
+<3 1 2
-- 
GitLab