From fe65fdb2c4cf77fe68e915b3fe590555e15519be Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Wed, 12 Sep 2018 21:39:53 +0200
Subject: [PATCH] improve ToCNF algo

---
 alib2algo/src/grammar/simplify/ToCNF.cpp      | 168 +++++++++---------
 alib2algo/src/grammar/simplify/ToCNF.h        |   4 +
 .../grammar/simplify/GrammarToCNFTest.cpp     |   6 +-
 3 files changed, 93 insertions(+), 85 deletions(-)

diff --git a/alib2algo/src/grammar/simplify/ToCNF.cpp b/alib2algo/src/grammar/simplify/ToCNF.cpp
index 4c658b89f0..903a644e95 100644
--- a/alib2algo/src/grammar/simplify/ToCNF.cpp
+++ b/alib2algo/src/grammar/simplify/ToCNF.cpp
@@ -15,120 +15,124 @@
 
 #include <grammar/RawRules.h>
 #include <grammar/AddRawRule.h>
+#include <object/ObjectFactory.h>
 
 namespace grammar {
 
 namespace simplify {
 
-template<class T>
-ext::pair<DefaultSymbolType, DefaultSymbolType> splitToPairs(T& grammar, const ext::vector<DefaultSymbolType>& rhs, unsigned from, unsigned size, ext::map<DefaultSymbolType, DefaultSymbolType>& createdSymbols) {
-	if(size == 2) {
-		return ext::make_pair(rhs[from], rhs[from + 1]);
-	} else if(size == 3) {
-		DefaultSymbolType firstLhs {rhs[from]};
-
-		auto second = splitToPairs(grammar, rhs, from + 1, 2, createdSymbols);
-		DefaultSymbolType secondProposal{DefaultSymbolsPairType(second)};
-		if(!createdSymbols.count(secondProposal)) {
-			createdSymbols.insert(std::make_pair(secondProposal, common::createUnique(secondProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet())));
+void splitRule ( const DefaultSymbolType & lhs, ext::vector < DefaultSymbolType > rhs, grammar::CNF < DefaultSymbolType > & result ) { //After templating the CNF make rhs const reference
+	switch ( rhs.size ( ) ) {
+	case 2: {
+			result.addRule ( lhs, ext::make_pair ( std::move ( rhs [ 0 ] ), std::move ( rhs [ 1 ] ) ) );
+			break;
 		}
-		grammar.addNonterminalSymbol(createdSymbols.find(secondProposal)->second);
-		grammar::AddRawRule::addRawRule(grammar,createdSymbols.find(secondProposal)->second, {std::move(second.first), std::move(second.second)});
-
-		return ext::make_pair(std::move(firstLhs), createdSymbols.find(secondProposal)->second);
-	} else {
-		auto first = splitToPairs(grammar, rhs, from, size / 2, createdSymbols);
-		DefaultSymbolType firstProposal{DefaultSymbolsPairType(first)};
-		if(!createdSymbols.count(firstProposal)) {
-			createdSymbols.insert(std::make_pair(firstProposal, common::createUnique(firstProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet())));
+	case 3: {
+			ext::vector < DefaultSymbolType > right ( std::make_move_iterator ( std::next ( rhs.begin ( ) ) ), std::make_move_iterator ( rhs.end ( ) ) );
+			DefaultSymbolType second = object::ObjectFactory::make ( right );
+			if ( result.addNonterminalSymbol ( second ) )
+				splitRule ( second, std::move ( right ), result );
+
+			result.addRule ( lhs, ext::make_pair ( std::move ( rhs [ 0 ] ), std::move ( second ) ) );
+			break;
 		}
-		grammar.addNonterminalSymbol(createdSymbols.find(firstProposal)->second);
-		grammar::AddRawRule::addRawRule ( grammar, createdSymbols.find(firstProposal)->second, {std::move(first.first), std::move(first.second)});
+	default: {
+			ext::vector < DefaultSymbolType > left;
+			for ( unsigned i = 0; i < rhs.size ( ) / 2; ++ i )
+				left.push_back ( std::move ( rhs [ i ] ) );
 
-		auto second = splitToPairs(grammar, rhs, from + size / 2, size - size / 2, createdSymbols);
-		DefaultSymbolType secondProposal{DefaultSymbolsPairType(second)};
-		if(!createdSymbols.count(secondProposal)) {
-			createdSymbols.insert(std::make_pair(secondProposal, common::createUnique(secondProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet())));
-		}
-		grammar.addNonterminalSymbol(createdSymbols.find(secondProposal)->second);
-		grammar::AddRawRule::addRawRule(grammar,createdSymbols.find(secondProposal)->second, {std::move(second.first), std::move(second.second)});
+			DefaultSymbolType first = object::ObjectFactory::make ( left );
+			if ( result.addNonterminalSymbol ( first ) )
+				splitRule ( first, std::move ( left ), result );
+
+			ext::vector < DefaultSymbolType > right;
+			for ( unsigned i = rhs.size ( ) / 2; i < rhs.size ( ); ++ i )
+				right.push_back ( std::move ( rhs [ i ] ) );
 
-		return ext::make_pair(createdSymbols.find(firstProposal)->second, createdSymbols.find(secondProposal)->second);
+			DefaultSymbolType second = object::ObjectFactory::make ( right );
+			if ( result.addNonterminalSymbol ( second ) )
+				splitRule ( second, std::move ( right ), result );
+
+			result.addRule ( lhs, ext::make_pair ( std::move ( first ), std::move ( second ) ) );
+		}
 	}
 }
 
 template<class T>
 grammar::CNF < > convertInternal( const T & origGrammar ) {
-	T grammarTmp(origGrammar.getInitialSymbol());
-
-	grammarTmp.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet() );
-	grammarTmp.setTerminalAlphabet( origGrammar.getTerminalAlphabet() );
-
-	ext::map<DefaultSymbolType, DefaultSymbolType> createdSymbols;
-	auto origRules = grammar::RawRules::getRawRules(origGrammar);
-	for( const auto & origRule : origRules ) {
-		for( const auto& origRhs : origRule.second ) {
-			if(origRhs.size() == 1 || origRhs.size() == 2)
-				grammar::AddRawRule::addRawRule(grammarTmp,origRule.first, origRhs);
-			else if(origRhs.size() > 2) {
-				auto second = splitToPairs(grammarTmp, origRhs, 0, origRhs.size(), createdSymbols);
-				grammar::AddRawRule::addRawRule(grammarTmp,origRule.first, {std::move(second.first), std::move(second.second)});
-			}
-		}
-	}
-
-	grammarTmp.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon());
+	grammar::CNF < > result ( origGrammar.getInitialSymbol ( ) );
 
-	grammar::CNF < > grammar(grammarTmp.getInitialSymbol());
+	result.setNonterminalAlphabet ( origGrammar.getNonterminalAlphabet ( ) );
+	result.setTerminalAlphabet ( origGrammar.getTerminalAlphabet ( ) );
 
-	grammar.setNonterminalAlphabet( grammarTmp.getNonterminalAlphabet() );
-	grammar.setTerminalAlphabet( grammarTmp.getTerminalAlphabet() );
-
-	ext::map<DefaultSymbolType, DefaultSymbolType> terminalToShadowNonterminal;
-	for( const auto & symbol : grammarTmp.getTerminalAlphabet() ) {
-		DefaultSymbolType shadowSymbol = common::createUnique(symbol, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet());
-		terminalToShadowNonterminal.insert( std::make_pair( symbol, shadowSymbol ));
-		grammar.addNonterminalSymbol( shadowSymbol );
-		grammar.addRule(std::move(shadowSymbol), symbol);
+	ext::map < DefaultSymbolType, DefaultSymbolType > terminalToShadowNonterminal;
+	for ( const DefaultSymbolType & symbol : origGrammar.getTerminalAlphabet ( ) ) {
+		DefaultSymbolType shadowSymbol = common::createUnique ( symbol, result.getTerminalAlphabet ( ), result.getNonterminalAlphabet ( ) );
+		terminalToShadowNonterminal.insert ( std::make_pair ( symbol, shadowSymbol ) );
+		result.addNonterminalSymbol ( shadowSymbol );
+		result.addRule ( std::move ( shadowSymbol ), symbol );
 	}
 
-	auto tmpRules = grammar::RawRules::getRawRules(grammarTmp);
-	for( const auto & tmpRule : tmpRules ) {
-		for( const auto& tmpRhs : tmpRule.second ) {
-			if(tmpRhs.size() == 2) {
-				if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[0])) {
-					if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[1])) {
-						grammar::AddRawRule::addRawRule(grammar,tmpRule.first, tmpRhs);
-					} else {
-						grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {tmpRhs[0], terminalToShadowNonterminal.find(tmpRhs[1])->second});
-					}
-				} else {
-					if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[1])) {
-						grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {terminalToShadowNonterminal.find(tmpRhs[0])->second, tmpRhs[1]});
-					} else {
-						grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {terminalToShadowNonterminal.find(tmpRhs[0])->second, terminalToShadowNonterminal.find(tmpRhs[1])->second });
-					}
+	for ( const auto & rules : origGrammar.getRules ( ) ) {
+		for ( const ext::vector < DefaultSymbolType > & rhs : rules.second ) {
+			if ( rhs.size ( ) == 1 ) {
+				result.addRule ( rules.first, rhs [ 0 ] );
+			} else {
+				ext::vector < DefaultSymbolType > rawRule;
+				for ( const DefaultSymbolType & symbol : rhs ) {
+					if ( origGrammar.getTerminalAlphabet ( ).count ( symbol ) )
+						rawRule.push_back ( terminalToShadowNonterminal.at ( symbol ) );
+					else
+						rawRule.push_back ( symbol );
 				}
-			} else // tmpRhs.size() == 1
-				grammar::AddRawRule::addRawRule(grammar,tmpRule.first, tmpRhs);
+
+				splitRule ( rules.first, std::move ( rawRule ), result );
+			}
 		}
 	}
 
-	grammar.setGeneratesEpsilon(grammarTmp.getGeneratesEpsilon());
+	result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) );
 
-	return grammar;
+	return result;
 }
 
 grammar::CNF < > ToCNF::convert(const grammar::CFG < > & origGrammar) {
-	return convertInternal(grammar::simplify::SimpleRulesRemover::remove(grammar::simplify::EpsilonRemover::remove(origGrammar)));
+	return convert(grammar::simplify::EpsilonRemover::remove(origGrammar));
 }
 
 grammar::CNF < > ToCNF::convert(const grammar::EpsilonFreeCFG < > & origGrammar) {
 	return convertInternal(grammar::simplify::SimpleRulesRemover::remove(origGrammar));
 }
 
-grammar::CNF < > ToCNF::convert(const grammar::GNF < > & origGrammar) {
-	return convertInternal(origGrammar);
+grammar::CNF < > ToCNF::convert ( const grammar::GNF < > & origGrammar ) {
+	grammar::CNF < > result ( origGrammar.getInitialSymbol ( ) );
+
+	result.setNonterminalAlphabet ( origGrammar.getNonterminalAlphabet ( ) );
+	result.setTerminalAlphabet ( origGrammar.getTerminalAlphabet ( ) );
+
+	ext::map < DefaultSymbolType, DefaultSymbolType > terminalToShadowNonterminal;
+	for ( const DefaultSymbolType & symbol : origGrammar.getTerminalAlphabet ( ) ) {
+		DefaultSymbolType shadowSymbol = common::createUnique ( symbol, result.getTerminalAlphabet ( ), result.getNonterminalAlphabet ( ) );
+		terminalToShadowNonterminal.insert ( std::make_pair ( symbol, shadowSymbol ) );
+		result.addNonterminalSymbol ( shadowSymbol );
+		result.addRule ( std::move ( shadowSymbol ), symbol );
+	}
+
+	for ( const auto & rules : origGrammar.getRules ( ) ) {
+		for ( const ext::pair < DefaultSymbolType, ext::vector < DefaultSymbolType > > & rhs : rules.second ) {
+			if ( rhs.second.size ( ) == 0 )
+				result.addRule ( rules.first, rhs.first );
+			else {
+				ext::vector < DefaultSymbolType > rawRule { terminalToShadowNonterminal.at ( rhs.first ) };
+				rawRule.insert ( rawRule.end ( ), rhs.second.begin ( ), rhs.second.end ( ) );
+				splitRule ( rules.first, std::move ( rawRule ), result );
+			}
+		}
+	}
+
+	result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) );
+
+	return result;
 }
 
 grammar::CNF < > ToCNF::convert(const grammar::LG < > & origGrammar) {
diff --git a/alib2algo/src/grammar/simplify/ToCNF.h b/alib2algo/src/grammar/simplify/ToCNF.h
index 88165da160..8b39636ea3 100644
--- a/alib2algo/src/grammar/simplify/ToCNF.h
+++ b/alib2algo/src/grammar/simplify/ToCNF.h
@@ -79,6 +79,8 @@ grammar::CNF < SymbolType > ToCNF::convert ( const grammar::LeftRG < SymbolType
 		}
 	}
 
+	result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) );
+
 	return result;
 }
 
@@ -108,6 +110,8 @@ grammar::CNF < SymbolType > ToCNF::convert ( const grammar::RightRG < SymbolType
 		}
 	}
 
+	result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) );
+
 	return result;
 }
 
diff --git a/alib2algo/test-src/grammar/simplify/GrammarToCNFTest.cpp b/alib2algo/test-src/grammar/simplify/GrammarToCNFTest.cpp
index 4f8c030352..fb2951651c 100644
--- a/alib2algo/test-src/grammar/simplify/GrammarToCNFTest.cpp
+++ b/alib2algo/test-src/grammar/simplify/GrammarToCNFTest.cpp
@@ -74,9 +74,9 @@ void GrammarToCNFTest::testToCNFRules2() {
 	DefaultSymbolType aP = DefaultSymbolType("a'");
 	DefaultSymbolType bP = DefaultSymbolType("b'");
 	DefaultSymbolType cP = DefaultSymbolType("c'");
-	DefaultSymbolType Xb = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(X, b))));
-	DefaultSymbolType aX = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(a, X))));
-	DefaultSymbolType bX = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(b, X))));
+	DefaultSymbolType Xb = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { X, bP } );
+	DefaultSymbolType aX = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { aP, X } );
+	DefaultSymbolType bX = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { bP, X } );
 
 	grammar::CNF < > grammar3(S);
 	grammar3.setNonterminalAlphabet({S, X, Y, aP, bP, cP, Xb, aX, bX});
-- 
GitLab