From 82e04846db893385f8efad37b0a0e71732cfeab6 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Thu, 17 Jul 2014 22:21:14 +0200
Subject: [PATCH] GNF internal form

---
 alib2data/src/grammar/ContextFree/GNF.cpp     | 82 +++++++------------
 alib2data/src/grammar/ContextFree/GNF.h       | 11 ++-
 .../src/grammar/GrammarFromXMLParser.cpp      | 16 +++-
 alib2data/src/grammar/GrammarFromXMLParser.h  |  1 +
 .../src/grammar/GrammarToXMLComposer.cpp      | 15 +++-
 alib2data/src/grammar/GrammarToXMLComposer.h  |  1 +
 alib2data/test-src/grammar/GrammarTest.cpp    | 30 +++++++
 7 files changed, 96 insertions(+), 60 deletions(-)

diff --git a/alib2data/src/grammar/ContextFree/GNF.cpp b/alib2data/src/grammar/ContextFree/GNF.cpp
index 45e460387f..4bf533992a 100644
--- a/alib2data/src/grammar/ContextFree/GNF.cpp
+++ b/alib2data/src/grammar/ContextFree/GNF.cpp
@@ -32,12 +32,9 @@ GrammarBase* GNF::plunder() && {
 }
 
 bool GNF::removeTerminalSymbol(const alphabet::Symbol& symbol) {
-	for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) {
-		if(rule.first == symbol)
-			throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule.");
-
-		for(const std::vector<alphabet::Symbol>& rhs : rule.second)
-			if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end())
+	for(const std::pair<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> >& rule : rules) {
+		for(const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rhs : rule.second)
+			if(rhs.first == symbol)
 				throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule.");
 	}
 
@@ -45,12 +42,12 @@ bool GNF::removeTerminalSymbol(const alphabet::Symbol& symbol) {
 }
 
 bool GNF::removeNonterminalSymbol(const alphabet::Symbol& symbol) {
-	for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) {
+	for(const std::pair<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> >& rule : rules) {
 		if(rule.first == symbol)
 			throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule.");
 
-		for(const std::vector<alphabet::Symbol>& rhs : rule.second)
-			if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end())
+		for(const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rhs : rule.second)
+			if(std::find(rhs.second.begin(), rhs.second.end(), symbol) != rhs.second.end())
 				throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule.");
 	}
 
@@ -61,59 +58,36 @@ bool GNF::removeNonterminalSymbol(const alphabet::Symbol& symbol) {
 	return nonterminalAlphabet.erase(symbol);
 }
 
-bool GNF::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) {
-	int rSize = rightHandSide.size();
-
-	if(rSize == 0 && leftHandSide == initialSymbol) {
-		for(const auto& rule : rules) {
-			for(const auto& ruleRHS : rule.second) {
-				if(any_of(ruleRHS.begin(), ruleRHS.end(), [&](const alphabet::Symbol& symbol) { return initialSymbol == symbol; })) {
-					throw GrammarException("Initial symbol " + (std::string) initialSymbol + "used on right hand side of already existing rule");
-				}
-			}
-		}
-
-		generatesEpsilon = true;
-		return rules[leftHandSide].insert(rightHandSide).second;
-	} else if(rSize == 1) {
-		if(!nonterminalAlphabet.count(leftHandSide))
-			throw GrammarException("Rule must rewrite nonterminal symbol");
-
-		if(!terminalAlphabet.count(rightHandSide[0]))
-			throw GrammarException("Rule must rewrite to terminal symbol");
-
-		return rules[leftHandSide].insert(rightHandSide).second;
-	} else {
-		if(!nonterminalAlphabet.count(leftHandSide))
-			throw GrammarException("Rule must rewrite nonterminal symbol");
-
-		if(!terminalAlphabet.count(rightHandSide[0]))
-			throw GrammarException("First symbol of the rule must be a terminal symbol");
-
-		for(std::vector<alphabet::Symbol>::const_iterator iter = rightHandSide.begin() + 1; iter != rightHandSide.end(); iter++) {
-			if(nonterminalAlphabet.find(*iter) == nonterminalAlphabet.end())
-				throw GrammarException("Symbol \"" + (std::string) iter->getSymbol() + "\" is not a nonterminal symbol");
-			if(generatesEpsilon && *iter == initialSymbol)
-				throw GrammarException("Initial symbol is already allowed to be rewritten to epsilon");
-		}
-
-		return rules[leftHandSide].insert(rightHandSide).second;
-	}
+bool GNF::addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rightHandSide) {
+	if(!nonterminalAlphabet.count(leftHandSide))
+		throw GrammarException("Rule must rewrite nonterminal symbol");
+
+	if(!terminalAlphabet.count(rightHandSide.first))
+		throw GrammarException("Rule must rewrite to terminal symbol");
+
+	for(const alphabet::Symbol& rhsNTs : rightHandSide.second)
+		if(nonterminalAlphabet.find(rhsNTs) == nonterminalAlphabet.end())
+			throw GrammarException("Symbol \"" + (std::string) rhsNTs.getSymbol() + "\" is not a nonterminal symbol");
+
+	return rules[leftHandSide].insert(rightHandSide).second;
 }
 
-const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> GNF::getRules() const {
+const std::map<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> > GNF::getRules() const {
 	return rules;
 }
 
-bool GNF::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) {
-	int rSize = rightHandSide.size();
-
-	if(leftHandSide == initialSymbol && rSize == 0) {
-		generatesEpsilon = false;
-	}
+bool GNF::removeRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rightHandSide) {
 	return rules[leftHandSide].erase(rightHandSide);
 }
 
+void GNF::setGeneratesEpsilon(bool genEps) {
+	generatesEpsilon = genEps;
+}
+
+bool GNF::getGeneratesEpsilon() const {
+	return generatesEpsilon;
+}
+
 bool GNF::operator==(const GrammarBase& other) const {
 	return other == *this;
 }
diff --git a/alib2data/src/grammar/ContextFree/GNF.h b/alib2data/src/grammar/ContextFree/GNF.h
index c711939be7..b3341ec7be 100644
--- a/alib2data/src/grammar/ContextFree/GNF.h
+++ b/alib2data/src/grammar/ContextFree/GNF.h
@@ -19,7 +19,7 @@ namespace grammar {
  * Context free grammar in greibach normal form. Type 2 in Chomsky hierarchy. Produces context free languages.
  */
 class GNF : public std::element<GNF, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol {
-	std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules;
+	std::map<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> > rules;
 	bool generatesEpsilon;
 public:
 	GNF(const alphabet::Symbol& initialSymbol);
@@ -30,16 +30,19 @@ public:
 
 	virtual GrammarBase* plunder() &&;
 
-	bool addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide);
+	bool addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rightHandSide);
 
-	const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> getRules() const;
+	const std::map<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> > getRules() const;
 
-	bool removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide);
+	bool removeRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rightHandSide);
 
 	bool removeTerminalSymbol(const alphabet::Symbol& symbol);
 
 	bool removeNonterminalSymbol(const alphabet::Symbol& symbol);
 
+	void setGeneratesEpsilon(bool genEps);
+	bool getGeneratesEpsilon() const;
+
 	virtual bool operator==(const GrammarBase& other) const;
 
 	virtual bool operator==(const GNF& other) const;
diff --git a/alib2data/src/grammar/GrammarFromXMLParser.cpp b/alib2data/src/grammar/GrammarFromXMLParser.cpp
index 75994a8499..9e60ddbda2 100644
--- a/alib2data/src/grammar/GrammarFromXMLParser.cpp
+++ b/alib2data/src/grammar/GrammarFromXMLParser.cpp
@@ -177,6 +177,9 @@ GNF GrammarFromXMLParser::parseGNF(std::list<sax::Token>& input) const {
 
 	parseRules(input, grammar);
 
+	bool generatesEpsilon = parseGeneratesEpsilon(input);
+	grammar.setGeneratesEpsilon(generatesEpsilon);
+
 	popToken(input, sax::Token::TokenType::END_ELEMENT, "GNF");
 	return grammar;
 }
@@ -426,7 +429,7 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, CSG& grammar)
 
 void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, GNF& grammar) const {
 	alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input);
-	std::vector<alphabet::Symbol> rhs = parseRuleRHS(input);
+	std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> rhs = parseRuleGNFRHS(input);
 	grammar.addRule(lhs, rhs);
 }
 
@@ -499,6 +502,17 @@ alphabet::Symbol GrammarFromXMLParser::parseRuleSingleSymbolLHS(std::list<sax::T
 	return lhs;
 }
 
+std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> GrammarFromXMLParser::parseRuleGNFRHS(std::list<sax::Token>& input) const {
+	popToken(input, sax::Token::TokenType::START_ELEMENT, "rhs");
+	alphabet::Symbol first = alib::FromXMLParsers::symbolParser.parse(input);
+	std::vector<alphabet::Symbol> second;
+	while (isTokenType(input, sax::Token::TokenType::START_ELEMENT)) {
+		second.push_back(alib::FromXMLParsers::symbolParser.parse(input));
+	}
+	popToken(input, sax::Token::TokenType::END_ELEMENT, "rhs");
+	return std::make_pair(first, second);
+}
+
 std::vector<alphabet::Symbol> GrammarFromXMLParser::parseRuleRHS(std::list<sax::Token>& input) const {
 	std::vector<alphabet::Symbol> rhs;
 	popToken(input, sax::Token::TokenType::START_ELEMENT, "rhs");
diff --git a/alib2data/src/grammar/GrammarFromXMLParser.h b/alib2data/src/grammar/GrammarFromXMLParser.h
index fe4c8efbaa..ca218f90dc 100644
--- a/alib2data/src/grammar/GrammarFromXMLParser.h
+++ b/alib2data/src/grammar/GrammarFromXMLParser.h
@@ -45,6 +45,7 @@ protected:
 
 	std::vector<alphabet::Symbol> parseRuleLHS(std::list<sax::Token>& input) const;
 	alphabet::Symbol parseRuleSingleSymbolLHS(std::list<sax::Token>& input) const;
+	std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> parseRuleGNFRHS(std::list<sax::Token>& input) const;
 	std::vector<alphabet::Symbol> parseRuleRHS(std::list<sax::Token>& input) const;
 	std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> parseRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& input) const;
 
diff --git a/alib2data/src/grammar/GrammarToXMLComposer.cpp b/alib2data/src/grammar/GrammarToXMLComposer.cpp
index 3ea70b05e0..2939f67ead 100644
--- a/alib2data/src/grammar/GrammarToXMLComposer.cpp
+++ b/alib2data/src/grammar/GrammarToXMLComposer.cpp
@@ -154,6 +154,7 @@ std::list<sax::Token> GrammarToXMLComposer::compose(const GNF& grammar) const {
 	composeTerminalAlphabet(out, grammar.getTerminalAlphabet());
 	composeInitialSymbol(out, grammar.getInitialSymbol());
 	composeRules(out, grammar);
+	composeGeneratesEpsilon(out, grammar.getGeneratesEpsilon());
 
 	out.push_back(sax::Token("GNF", sax::Token::TokenType::END_ELEMENT));
 	return out;
@@ -339,7 +340,7 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const GNF& g
 			out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT));
 
 			composeRuleSingleSymbolLHS(out, rule.first);
-			composeRuleRHS(out, rhs);
+			composeRuleGNFRHS(out, rhs);
 
 			out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT));
 		}
@@ -518,6 +519,18 @@ void GrammarToXMLComposer::composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>&
 	out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT));
 }
 
+void GrammarToXMLComposer::composeRuleGNFRHS(std::list<sax::Token>& out, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& symbols) const {
+	out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT));
+
+	out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbols.first));
+
+	for (const auto& symbol : symbols.second) {
+		out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol));
+	}
+
+	out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT));
+}
+
 void GrammarToXMLComposer::composeRuleLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const {
 	out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT));
 	if(symbols.is<std::vector<alphabet::Symbol>>()) {
diff --git a/alib2data/src/grammar/GrammarToXMLComposer.h b/alib2data/src/grammar/GrammarToXMLComposer.h
index 0e8923f6b1..427eb2b92d 100644
--- a/alib2data/src/grammar/GrammarToXMLComposer.h
+++ b/alib2data/src/grammar/GrammarToXMLComposer.h
@@ -73,6 +73,7 @@ class GrammarToXMLComposer : public GrammarBase::const_visitor_type {
 	void composeRuleSingleSymbolLHS(std::list<sax::Token>& out, const alphabet::Symbol& symbol) const;
 	void composeRuleRHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const;
 	void composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& symbols) const;
+	void composeRuleGNFRHS(std::list<sax::Token>& out, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& symbols) const;
 	void composeRuleLeftLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const;
 	void composeRuleRightLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>& symbols) const;
 	void composeRuleLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const;
diff --git a/alib2data/test-src/grammar/GrammarTest.cpp b/alib2data/test-src/grammar/GrammarTest.cpp
index ab2c4c8a09..d365f883c5 100644
--- a/alib2data/test-src/grammar/GrammarTest.cpp
+++ b/alib2data/test-src/grammar/GrammarTest.cpp
@@ -260,6 +260,36 @@ void GrammarTest::testContextFreeParser() {
 	grammar::GrammarFromXMLParser parser;
 	grammar::CNF grammar2 = parser.parseCNF(tokens2);
 
+	CPPUNIT_ASSERT( grammar == grammar2 );
+    }
+  }
+  {
+    grammar::GNF grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))));
+
+    grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))));
+    grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))));
+    grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))));
+    grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))));
+    grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))));
+    grammar.setGeneratesEpsilon(true);
+
+    grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}));
+    grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}));
+    grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), std::vector<alphabet::Symbol> {}));
+    grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), std::vector<alphabet::Symbol> {}));
+
+    CPPUNIT_ASSERT( grammar == grammar );
+    {
+	grammar::GrammarToXMLComposer composer;
+	std::list<sax::Token> tokens = composer.compose(grammar);
+	std::string tmp;
+	sax::SaxComposeInterface::printMemory(tmp, tokens);
+
+	std::list<sax::Token> tokens2;
+	sax::SaxParseInterface::parseMemory(tmp, tokens2);
+	grammar::GrammarFromXMLParser parser;
+	grammar::GNF grammar2 = parser.parseGNF(tokens2);
+
 	CPPUNIT_ASSERT( grammar == grammar2 );
     }
   }
-- 
GitLab