From b1ab92584c1a54b3a96190a43fa5726d97f08b04 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Thu, 20 Sep 2018 14:41:22 +0200
Subject: [PATCH] redesign regexp derivation based conversions

---
 .../regexp/convert/ToAutomatonDerivation.h    | 70 +++++----------
 .../convert/ToGrammarRightRGDerivation.cpp    | 87 +------------------
 .../convert/ToGrammarRightRGDerivation.h      | 64 +++++++++++++-
 3 files changed, 87 insertions(+), 134 deletions(-)

diff --git a/alib2algo/src/regexp/convert/ToAutomatonDerivation.h b/alib2algo/src/regexp/convert/ToAutomatonDerivation.h
index ce28ae7fec..b0b53a9c3d 100644
--- a/alib2algo/src/regexp/convert/ToAutomatonDerivation.h
+++ b/alib2algo/src/regexp/convert/ToAutomatonDerivation.h
@@ -46,66 +46,42 @@ automaton::DFA < SymbolType, unsigned > ToAutomatonDerivation::convert(const T&
 	// 1.
 	T V = regexp::simplify::RegExpOptimize::optimize(regexp);
 
-	ext::set<T> Q = { V };
-	ext::deque<ext::set<T>> Qi;
+	ext::deque < T > Qi;
 
-	Qi.push_back(ext::set<T>());
-	Qi.at(0).insert(V);
+	Qi.push_back ( V );
 
-	int i = 1;
-
-	// 2.
-	while(! Qi.at(i - 1).empty()) {
-		Qi.push_back(ext::set<T>()); // initialize set Q_i
-
-		for(const auto& dregexp : Qi.at(i - 1)) {
-
-			for(const auto& a : regexp.getAlphabet()) {
-				T derived = regexp::RegExpDerivation::derivation(dregexp, a);
-				derived = regexp::simplify::RegExpOptimize::optimize(derived);
-
-				// this will also add \emptyset as a regexp (and as FA state)
-				if(Q.count(derived) == 0) // if this state has already been found, do not add
-					Qi.at(i).insert(derived);
-			}
-		}
-
-		Q.insert(Qi.at(i).begin(), Qi.at(i).end());
-		i += 1;
-	}
-
-	ext::map<T, unsigned> stateMap;
+	ext::map < T, unsigned> stateMap;
 	unsigned stateId = 0;
+	stateMap.insert ( std::make_pair ( V, stateId ++ ) );
 
-	for(const auto& r : Q) {
-		stateMap.insert ( std::make_pair ( r, stateId ++ ) );
-	}
-
-	// ------------------------------------------------------------------------
-	// 3.
-
-	automaton::DFA < SymbolType, unsigned > automaton ( stateMap.find ( V )->second );
-
-	for(const auto& r : stateMap) {
-		automaton.addState(r.second);
-	}
-
+	automaton::DFA < SymbolType, unsigned > automaton ( stateMap.at ( V ) );
 	automaton.setInputAlphabet(regexp.getAlphabet());
 
-	for(const auto& r : Q) {
+	// 2., 3.
+	while(! Qi.empty()) {
+		T r = std::move ( Qi.back ( ) ); // initialize set Q_i
+		Qi.pop_back ( );
 
-		for(const auto& a: regexp.getAlphabet()) {
+		for(const auto& a : regexp.getAlphabet()) {
 			T derived = regexp::RegExpDerivation::derivation(r, a);
 			derived = regexp::simplify::RegExpOptimize::optimize(derived);
 
-			automaton.addTransition(stateMap.find(r)->second, a, stateMap.find(derived)->second);
+			// this will also add \emptyset as a regexp (and as FA state)
+			if(stateMap.count(derived) == 0) { // if this state has already been found, do not add
+				Qi.push_back(derived);
+				automaton.addState ( stateId );
+				stateMap.insert ( std::make_pair ( derived, stateId ++ ) );
+
+				if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(derived))
+					automaton.addFinalState(stateMap.at(derived));
+			}
+
+			automaton.addTransition(stateMap.at(r), a, stateMap.at(derived));
 		}
 	}
 
-	for(const auto& r : Q) {
-		if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(r))
-			automaton.addFinalState(stateMap.find(r)->second);
-	}
+	if(regexp::properties::RegExpEpsilon::languageContainsEpsilon ( V ) )
+		automaton.addFinalState(stateMap.at( V ));
 
 	return automaton;
 }
diff --git a/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.cpp b/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.cpp
index 3f1d2bb64a..b6f2df7a1a 100644
--- a/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.cpp
+++ b/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.cpp
@@ -6,97 +6,14 @@
  */
 
 #include "ToGrammarRightRGDerivation.h"
-
-#include <alib/set>
-#include <alib/deque>
-#include <alib/vector>
-#include <alib/hexavigesimal>
-
-#include <common/createUnique.hpp>
-
-#include <regexp/simplify/RegExpOptimize.h>
-#include <regexp/transform/RegExpDerivation.h>
-#include <regexp/properties/RegExpEpsilon.h>
 #include <registration/AlgoRegistration.hpp>
 
 namespace regexp {
 
 namespace convert {
 
-template<class T>
-grammar::RightRG < > ToGrammarRightRGDerivation::convert(const T& regexp) {
-	// 1.
-	T V = regexp::simplify::RegExpOptimize::optimize(regexp);
-
-	ext::set<T> N = { V };
-	ext::deque<ext::set<T>> Ni;
-
-	Ni.push_back(ext::set<T>());
-	Ni.at(0).insert(V);
-
-	int i = 1;
-
-	// 2.
-	while(! Ni.at(i - 1).empty()) {
-		Ni.push_back(ext::set<T>()); // initialize set Q_i
-
-		for(const auto & dregexp : Ni.at( i - 1 )) {
-			for(const auto & a : regexp.getAlphabet()) {
-				T derived = regexp::RegExpDerivation::derivation(dregexp, a);
-				derived = regexp::simplify::RegExpOptimize::optimize(derived);
-
-				// this will also add \emptyset as a regexp (and as FA state)
-				if(N.count(derived) == 0) // if this state has already been found, do not add
-					Ni.at(i).insert(derived);
-			}
-		}
-
-		N.insert(Ni.at(i).begin(), Ni.at(i).end());
-		i += 1;
-	}
-
-	// ------------------------------------------------------------------------
-	// 3.
-
-	int nonterminalId = 0;
-	ext::map<T, DefaultSymbolType> nonterminalMap;
-
-	DefaultSymbolType ntV(nonterminalId++);
-	nonterminalMap.insert(std::make_pair(V, ntV));
-
-	grammar::RightRG < > grammar(ntV);
-	grammar.setTerminalAlphabet(regexp.getAlphabet());
-
-	for(const auto & r : N) {
-		if(V == r) continue;
-
-		DefaultSymbolType nt = common::createUnique(DefaultSymbolType(ext::toBase26(nonterminalId++)), grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet());
-		nonterminalMap.insert(std::make_pair(r, nt));
-		grammar.addNonterminalSymbol(nt);
-	}
-
-	for(const auto & r : N) {
-		for(const auto & a : regexp.getAlphabet()) {
-			T derived = regexp::RegExpDerivation::derivation(r, a);
-			derived = regexp::simplify::RegExpOptimize::optimize(derived);
-
-			grammar.addRule(nonterminalMap.find(r)->second, ext::make_pair(a, nonterminalMap.find(derived)->second));
-
-			if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(derived))
-				grammar.addRule(nonterminalMap.find(r)->second, a);
-		}
-	}
-
-	grammar.setInitialSymbol(nonterminalMap.find(V)->second);
-
-	if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(V))
-		grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY!
-
-	return grammar;
-}
-
-auto ToGrammarRightRGDerivationUnboundedRegExp = registration::AbstractRegister < ToGrammarRightRGDerivation, grammar::RightRG < >, const regexp::UnboundedRegExp < > & > ( ToGrammarRightRGDerivation::convert );
-auto ToGrammarRightRGDerivationFormalRegExp = registration::AbstractRegister < ToGrammarRightRGDerivation, grammar::RightRG < >, const regexp::FormalRegExp < > & > ( ToGrammarRightRGDerivation::convert );
+auto ToGrammarRightRGDerivationUnboundedRegExp = registration::AbstractRegister < ToGrammarRightRGDerivation, grammar::RightRG < DefaultSymbolType, unsigned >, const regexp::UnboundedRegExp < > & > ( ToGrammarRightRGDerivation::convert );
+auto ToGrammarRightRGDerivationFormalRegExp = registration::AbstractRegister < ToGrammarRightRGDerivation, grammar::RightRG < DefaultSymbolType, unsigned >, const regexp::FormalRegExp < > & > ( ToGrammarRightRGDerivation::convert );
 
 } /* namespace convert */
 
diff --git a/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.h b/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.h
index b708b30217..2791abadf9 100644
--- a/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.h
+++ b/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.h
@@ -9,9 +9,21 @@
 #define TO_GRAMMAR_RIGHT_RG_DERIVATION_H_
 
 #include <grammar/Regular/RightRG.h>
+#include <regexp/RegExp.h>
 #include <regexp/formal/FormalRegExp.h>
 #include <regexp/unbounded/UnboundedRegExp.h>
 
+#include <alib/set>
+#include <alib/deque>
+#include <alib/vector>
+#include <alib/hexavigesimal>
+
+#include <common/createUnique.hpp>
+
+#include <regexp/simplify/RegExpOptimize.h>
+#include <regexp/transform/RegExpDerivation.h>
+#include <regexp/properties/RegExpEpsilon.h>
+
 namespace regexp {
 
 namespace convert {
@@ -26,11 +38,59 @@ public:
 	 * Performs conversion.
 	 * @return right regular grammar equivalent to source regexp.
 	 */
-	template <class T>
-	static grammar::RightRG < > convert(const T& regexp);
+	template  < class T, class SymbolType = typename regexp::SymbolTypeOfRegexp < T > >
+	static grammar::RightRG < SymbolType, unsigned > convert ( const T & regexp );
 
 };
 
+template < class T, class SymbolType >
+grammar::RightRG < SymbolType, unsigned > ToGrammarRightRGDerivation::convert ( const T & regexp ) {
+	// 1.
+	T V = regexp::simplify::RegExpOptimize::optimize(regexp);
+
+	// 2., 3.
+	unsigned nonterminalId = 0;
+	ext::map < T, unsigned > nonterminalMap;
+
+	unsigned ntV = common::createUnique ( nonterminalId ++, regexp.getAlphabet ( ) );
+	nonterminalMap.insert ( std::make_pair ( V, ntV ) );
+
+	grammar::RightRG < SymbolType, unsigned > grammar(ntV);
+	grammar.setTerminalAlphabet ( regexp.getAlphabet ( ) );
+
+	ext::deque < T > Ni;
+
+	Ni.push_back ( V );
+
+	while(! Ni.empty()) {
+		T r = std::move ( Ni.back ( ) );
+		Ni.pop_back ( );
+
+		for(const auto & a : regexp.getAlphabet()) {
+			T derived = regexp::RegExpDerivation::derivation(r, a);
+			derived = regexp::simplify::RegExpOptimize::optimize(derived);
+
+			// this will also add \emptyset as a regexp (and as FA state)
+			if ( nonterminalMap.count(derived) == 0) { // if this state has already been found, do not add
+				Ni.push_back(derived);
+				unsigned nt = common::createUnique ( nonterminalId ++, grammar.getTerminalAlphabet ( ), grammar.getNonterminalAlphabet ( ) );
+				grammar.addNonterminalSymbol ( nt );
+				nonterminalMap.insert ( derived, nt );
+			}
+
+			if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(derived))
+				grammar.addRule(nonterminalMap.at(r), a);
+
+			grammar.addRule(nonterminalMap.at(r), ext::make_pair(a, nonterminalMap.at(derived)));
+		}
+	}
+
+	if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(V))
+		grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY!
+
+	return grammar;
+}
+
 } /* namespace convert */
 
 } /* namespace regexp */
-- 
GitLab