From 04660dc69cb15c834ce591e4a8ff936885c783dc Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Wed, 16 Jul 2014 11:46:00 +0200 Subject: [PATCH] fix grammar parser first and parse all --- .../src/grammar/GrammarFromXMLParser.cpp | 79 +++++++++++++++++- alib2data/src/grammar/GrammarFromXMLParser.h | 4 +- .../src/grammar/GrammarToXMLComposer.cpp | 30 ++++++- alib2data/src/grammar/GrammarToXMLComposer.h | 4 +- alib2data/src/grammar/Regular/LeftRG.cpp | 81 +++++++++---------- alib2data/src/grammar/Regular/LeftRG.h | 35 ++++++-- alib2data/src/grammar/Regular/RightRG.cpp | 79 +++++++++--------- alib2data/src/grammar/Regular/RightRG.h | 34 ++++++-- alib2data/test-src/grammar/GrammarTest.cpp | 28 +++++++ alib2data/test-src/grammar/GrammarTest.h | 4 + 10 files changed, 269 insertions(+), 109 deletions(-) diff --git a/alib2data/src/grammar/GrammarFromXMLParser.cpp b/alib2data/src/grammar/GrammarFromXMLParser.cpp index 5ac2fc0ad5..cd0e84d963 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.cpp +++ b/alib2data/src/grammar/GrammarFromXMLParser.cpp @@ -25,12 +25,48 @@ Grammar GrammarFromXMLParser::parse(std::list<sax::Token>& input, const std::set } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "UnrestrictedGrammar")) { if(!features.count(FEATURES::UNRESTRICTED_GRAMMAR)) throw exception::AlibException(); return Grammar(parseUnrestrictedGrammar(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "ContextPreservingUnrestrictedGrammar")) { + if(!features.count(FEATURES::CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR)) throw exception::AlibException(); + return Grammar(parseContextPreservingUnrestrictedGrammar(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "NonContractingGrammar")) { + if(!features.count(FEATURES::NON_CONTRACTING_GRAMMAR)) throw exception::AlibException(); + return Grammar(parseNonContractingGrammar(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "CSG")) { + if(!features.count(FEATURES::CSG)) throw exception::AlibException(); + return Grammar(parseCSG(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "GNF")) { + if(!features.count(FEATURES::GNF)) throw exception::AlibException(); + return Grammar(parseGNF(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "CNF")) { + if(!features.count(FEATURES::CNF)) throw exception::AlibException(); + return Grammar(parseCNF(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "EpsilonFreeCFG")) { + if(!features.count(FEATURES::EPSILON_FREE_CFG)) throw exception::AlibException(); + return Grammar(parseEpsilonFreeCFG(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "CFG")) { + if(!features.count(FEATURES::CFG)) throw exception::AlibException(); + return Grammar(parseCFG(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "LG")) { + if(!features.count(FEATURES::LG)) throw exception::AlibException(); + return Grammar(parseLG(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "RightRG")) { + if(!features.count(FEATURES::RIGHT_RG)) throw exception::AlibException(); + return Grammar(parseRightRG(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "RightLG")) { + if(!features.count(FEATURES::RIGHT_LG)) throw exception::AlibException(); + return Grammar(parseRightLG(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "LeftRG")) { + if(!features.count(FEATURES::LEFT_RG)) throw exception::AlibException(); + return Grammar(parseLeftRG(input)); + } else if(isToken(input, sax::Token::TokenType::START_ELEMENT, "LeftLG")) { + if(!features.count(FEATURES::LEFT_LG)) throw exception::AlibException(); + return Grammar(parseLeftLG(input)); } else throw sax::ParserException(sax::Token("Grammar / UnrestrictedGrammar", sax::Token::TokenType::START_ELEMENT), input.front()); } bool GrammarFromXMLParser::first(std::list<sax::Token>& input) const { - if(isToken(input, sax::Token::TokenType::START_ELEMENT, "grammar") || isToken(input, sax::Token::TokenType::START_ELEMENT, "UnrestrictedGrammar")) { + if(isToken(input, sax::Token::TokenType::START_ELEMENT, "grammar") || isToken(input, sax::Token::TokenType::START_ELEMENT, "UnrestrictedGrammar") || isToken(input, sax::Token::TokenType::START_ELEMENT, "ContextPreservingUnrestrictedGrammar") || isToken(input, sax::Token::TokenType::START_ELEMENT, "NonContractingGrammar") || isToken(input, sax::Token::TokenType::START_ELEMENT, "CSG") || isToken(input, sax::Token::TokenType::START_ELEMENT, "GNF") || isToken(input, sax::Token::TokenType::START_ELEMENT, "CNF") || isToken(input, sax::Token::TokenType::START_ELEMENT, "EpsilonFreeCFG") || isToken(input, sax::Token::TokenType::START_ELEMENT, "CFG") || isToken(input, sax::Token::TokenType::START_ELEMENT, "LG") || isToken(input, sax::Token::TokenType::START_ELEMENT, "RightRG") || isToken(input, sax::Token::TokenType::START_ELEMENT, "RightLG") || isToken(input, sax::Token::TokenType::START_ELEMENT, "LeftLG") || isToken(input, sax::Token::TokenType::START_ELEMENT, "LeftLG")) { return true; } else { return false; @@ -94,7 +130,7 @@ ContextPreservingUnrestrictedGrammar GrammarFromXMLParser::parseContextPreservin return grammar; } -NonContractingGrammar GrammarFromXMLParser::parseNoncontractingGrammar(std::list<sax::Token>& input) const { +NonContractingGrammar GrammarFromXMLParser::parseNonContractingGrammar(std::list<sax::Token>& input) const { popToken(input, sax::Token::TokenType::START_ELEMENT, "NonContractingGrammar"); std::set<alphabet::Symbol> nonterminalAlphabet = parseNonterminalAlphabet(input); @@ -226,6 +262,9 @@ RightRG GrammarFromXMLParser::parseRightRG(std::list<sax::Token>& input) const { parseRules(input, grammar); + bool generatesEpsilon = parseGeneratesEpsilon(input); + grammar.setGeneratesEpsilon(generatesEpsilon); + popToken(input, sax::Token::TokenType::END_ELEMENT, "RightRG"); return grammar; } @@ -260,6 +299,9 @@ LeftRG GrammarFromXMLParser::parseLeftRG(std::list<sax::Token>& input) const { parseRules(input, grammar); + bool generatesEpsilon = parseGeneratesEpsilon(input); + grammar.setGeneratesEpsilon(generatesEpsilon); + popToken(input, sax::Token::TokenType::END_ELEMENT, "LeftRG"); return grammar; } @@ -309,6 +351,22 @@ alphabet::Symbol GrammarFromXMLParser::parseInitialSymbol(std::list<sax::Token>& return blank; } +bool GrammarFromXMLParser::parseGeneratesEpsilon(std::list<sax::Token>& input) const { + bool generatesEpsilon; + popToken(input, sax::Token::TokenType::START_ELEMENT, "generatesEpsilon"); + if(isToken(input, sax::Token::TokenType::START_ELEMENT, "true")) { + input.pop_front(); + popToken(input, sax::Token::TokenType::END_ELEMENT, "true"); + generatesEpsilon = true; + } else { + popToken(input, sax::Token::TokenType::START_ELEMENT, "false"); + popToken(input, sax::Token::TokenType::END_ELEMENT, "false"); + generatesEpsilon = false; + } + popToken(input, sax::Token::TokenType::END_ELEMENT, "generatesEpsilon"); + return generatesEpsilon; +} + template<class T> void GrammarFromXMLParser::parseRules(std::list<sax::Token> &input, T& grammar) const { popToken(input, sax::Token::TokenType::START_ELEMENT, "rules"); @@ -392,7 +450,7 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, LG& grammar) void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, RightRG& grammar) const { alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input); - std::vector<alphabet::Symbol> rhs = parseRuleRHS(input); + std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> rhs = parseRuleOneOrTwoSymbolsRHS(input); grammar.addRule(lhs, rhs); } @@ -404,7 +462,7 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, RightLG& gram void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, LeftRG& grammar) const { alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input); - std::vector<alphabet::Symbol> rhs = parseRuleRHS(input); + std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> rhs = parseRuleOneOrTwoSymbolsRHS(input); grammar.addRule(lhs, rhs); } @@ -442,5 +500,18 @@ std::vector<alphabet::Symbol> GrammarFromXMLParser::parseRuleRHS(std::list<sax:: return rhs; } +std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> GrammarFromXMLParser::parseRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& input) const { + popToken(input, sax::Token::TokenType::START_ELEMENT, "rhs"); + alphabet::Symbol first = alib::FromXMLParsers::symbolParser.parse(input); + if(isTokenType(input, sax::Token::TokenType::START_ELEMENT)) { + alphabet::Symbol second = alib::FromXMLParsers::symbolParser.parse(input); + popToken(input, sax::Token::TokenType::END_ELEMENT, "rhs"); + return std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>(std::make_pair(first, second)); + } else { + popToken(input, sax::Token::TokenType::END_ELEMENT, "rhs"); + return std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>(first); + } +} + } /* namespace grammar */ diff --git a/alib2data/src/grammar/GrammarFromXMLParser.h b/alib2data/src/grammar/GrammarFromXMLParser.h index e4ba86d709..fe4c8efbaa 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.h +++ b/alib2data/src/grammar/GrammarFromXMLParser.h @@ -41,10 +41,12 @@ protected: std::set<alphabet::Symbol> parseNonterminalAlphabet(std::list<sax::Token> &input) const; std::set<alphabet::Symbol> parseTerminalAlphabet(std::list<sax::Token> &input) const; alphabet::Symbol parseInitialSymbol(std::list<sax::Token> &input) const; + bool parseGeneratesEpsilon(std::list<sax::Token> &input) const; std::vector<alphabet::Symbol> parseRuleLHS(std::list<sax::Token>& input) const; alphabet::Symbol parseRuleSingleSymbolLHS(std::list<sax::Token>& input) const; std::vector<alphabet::Symbol> parseRuleRHS(std::list<sax::Token>& input) const; + std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> parseRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& input) const; template<class T> void parseRules(std::list<sax::Token> &input, T& grammar) const; @@ -72,7 +74,7 @@ public: UnknownGrammar parseUnknownGrammar(std::list<sax::Token>& input) const; UnrestrictedGrammar parseUnrestrictedGrammar(std::list<sax::Token>& input) const; ContextPreservingUnrestrictedGrammar parseContextPreservingUnrestrictedGrammar(std::list<sax::Token>& input) const; - NonContractingGrammar parseNoncontractingGrammar(std::list<sax::Token>& input) const; + NonContractingGrammar parseNonContractingGrammar(std::list<sax::Token>& input) const; CSG parseCSG(std::list<sax::Token>& input) const; GNF parseGNF(std::list<sax::Token>& input) const; CNF parseCNF(std::list<sax::Token>& input) const; diff --git a/alib2data/src/grammar/GrammarToXMLComposer.cpp b/alib2data/src/grammar/GrammarToXMLComposer.cpp index 81ad4b5c3f..f76dcce81d 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.cpp +++ b/alib2data/src/grammar/GrammarToXMLComposer.cpp @@ -59,6 +59,7 @@ std::list<sax::Token> GrammarToXMLComposer::compose(const LeftRG& grammar) const composeTerminalAlphabet(out, grammar.getTerminalAlphabet()); composeInitialSymbol(out, grammar.getInitialSymbol()); composeRules(out, grammar); + composeGeneratesEpsilon(out, grammar.getGeneratesEpsilon()); out.push_back(sax::Token("LeftRG", sax::Token::TokenType::END_ELEMENT)); return out; @@ -85,6 +86,7 @@ std::list<sax::Token> GrammarToXMLComposer::compose(const RightRG& grammar) cons composeTerminalAlphabet(out, grammar.getTerminalAlphabet()); composeInitialSymbol(out, grammar.getInitialSymbol()); composeRules(out, grammar); + composeGeneratesEpsilon(out, grammar.getGeneratesEpsilon()); out.push_back(sax::Token("RightRG", sax::Token::TokenType::END_ELEMENT)); return out; @@ -229,6 +231,18 @@ void GrammarToXMLComposer::composeInitialSymbol(std::list<sax::Token>& out, cons out.push_back(sax::Token("initialSymbol", sax::Token::TokenType::END_ELEMENT)); } +void GrammarToXMLComposer::composeGeneratesEpsilon(std::list<sax::Token>& out, bool generatesEpsilon) const { + out.push_back(sax::Token("generatesEpsilon", sax::Token::TokenType::START_ELEMENT)); + if(generatesEpsilon) { + out.push_back(sax::Token("true", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("true", sax::Token::TokenType::END_ELEMENT)); + } else { + out.push_back(sax::Token("false", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("false", sax::Token::TokenType::END_ELEMENT)); + } + out.push_back(sax::Token("generatesEpsilon", sax::Token::TokenType::END_ELEMENT)); +} + void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const UnknownGrammar& grammar) const { out.push_back(sax::Token("rules", sax::Token::TokenType::START_ELEMENT)); @@ -425,7 +439,7 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const RightR out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT)); composeRuleSingleSymbolLHS(out, rule.first); - composeRuleRHS(out, rhs); + composeRuleOneOrTwoSymbolsRHS(out, rhs); out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT)); } @@ -459,7 +473,7 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const LeftRG out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT)); composeRuleSingleSymbolLHS(out, rule.first); - composeRuleRHS(out, rhs); + composeRuleOneOrTwoSymbolsRHS(out, rhs); out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT)); } @@ -490,6 +504,18 @@ void GrammarToXMLComposer::composeRuleRHS(std::list<sax::Token>& out, const std: out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); } +void GrammarToXMLComposer::composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& symbols) const { + out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT)); + if(symbols.is<alphabet::Symbol>()) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbols.get<alphabet::Symbol>())); + } else { + const std::pair<alphabet::Symbol, alphabet::Symbol>& rhs = symbols.get<std::pair<alphabet::Symbol, alphabet::Symbol>>(); + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(rhs.first)); + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(rhs.second)); + } + out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); +} + std::list<sax::Token> GrammarToXMLComposer::compose(const Grammar& grammar) const { std::list<sax::Token> out; grammar.getGrammar().Accept((void*) &out, *this); diff --git a/alib2data/src/grammar/GrammarToXMLComposer.h b/alib2data/src/grammar/GrammarToXMLComposer.h index 489595c1e9..1d501d8b15 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.h +++ b/alib2data/src/grammar/GrammarToXMLComposer.h @@ -52,6 +52,7 @@ class GrammarToXMLComposer : public GrammarBase::const_visitor_type { void composeNonterminalAlphabet(std::list<sax::Token>& out, const std::set<alphabet::Symbol>& symbols) const; void composeTerminalAlphabet(std::list<sax::Token>& out, const std::set<alphabet::Symbol>& symbols) const; void composeInitialSymbol(std::list<sax::Token>& out, const alphabet::Symbol& symbol) const; + void composeGeneratesEpsilon(std::list<sax::Token>& out, bool generatesEpsilon) const; void composeRules(std::list<sax::Token>& out, const UnknownGrammar& grammar) const; void composeRules(std::list<sax::Token>& out, const UnrestrictedGrammar& grammar) const; @@ -69,8 +70,9 @@ class GrammarToXMLComposer : public GrammarBase::const_visitor_type { void composeRules(std::list<sax::Token>& out, const RightRG& grammar) const; void composeRuleLHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const; - void composeRuleSingleSymbolLHS(std::list<sax::Token>& out, const alphabet::Symbol& symbols) const; + void composeRuleSingleSymbolLHS(std::list<sax::Token>& out, const alphabet::Symbol& symbol) const; void composeRuleRHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const; + void composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& symbols) const; public: /** * Prints XML representation of UnknownGrammar to the output stream. diff --git a/alib2data/src/grammar/Regular/LeftRG.cpp b/alib2data/src/grammar/Regular/LeftRG.cpp index 210f4d4673..1e497fd010 100644 --- a/alib2data/src/grammar/Regular/LeftRG.cpp +++ b/alib2data/src/grammar/Regular/LeftRG.cpp @@ -32,86 +32,79 @@ GrammarBase* LeftRG::plunder() && { } bool LeftRG::removeTerminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { - if(rule.first == symbol) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + for(const std::pair<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>>& rule : rules) { + for(const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rhs : rule.second) { + if((rhs.is<alphabet::Symbol>() && rhs.get<alphabet::Symbol>() == symbol) || (rhs.get<std::pair<alphabet::Symbol, alphabet::Symbol>>().first == symbol)) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } } return terminalAlphabet.erase(symbol); } bool LeftRG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const std::pair<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>>& rule : rules) { if(rule.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + for(const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rhs : rule.second) + if(rhs.get<std::pair<alphabet::Symbol, alphabet::Symbol>>().second == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); } if(initialSymbol == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is initial symbol."); - return nonterminalAlphabet.erase(symbol); } -bool LeftRG::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int rSize = rightHandSide.size(); +bool LeftRG::addRule(const alphabet::Symbol& leftHandSide, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rightHandSide) { + if(!nonterminalAlphabet.count(leftHandSide)) + throw GrammarException("Rule must rewrite nonterminal symbol"); - if(leftHandSide == initialSymbol && rSize == 0) { - for(const auto& rule : rules) { - for(const auto& ruleRHS : rule.second) { - if(any_of(ruleRHS.begin(), ruleRHS.end(), [&](const alphabet::Symbol& symbol) { return initialSymbol == symbol; })) { - throw GrammarException("Initial symbol " + (std::string) initialSymbol + "used on right hand side of already existing rule"); - } - } - } + if(rightHandSide.is<alphabet::Symbol>()) { + const alphabet::Symbol& rhs = rightHandSide.get<alphabet::Symbol>(); - generatesEpsilon = true; - return rules[leftHandSide].insert(rightHandSide).second; - } else if(rSize == 1) { - if(!nonterminalAlphabet.count(leftHandSide)) - throw GrammarException("Rule must rewrite nonterminal symbol"); - - if(!terminalAlphabet.count(rightHandSide[0])) + if(!terminalAlphabet.count(rhs)) throw GrammarException("Rule must rewrite to terminal symbol"); return rules[leftHandSide].insert(rightHandSide).second; - } else if(rSize == 2) { - if(!nonterminalAlphabet.count(leftHandSide)) - throw GrammarException("Rule must rewrite nonterminal symbol"); - - if(!nonterminalAlphabet.count(rightHandSide[0])) - throw GrammarException("Rule must rewrite to nonterminal symbol followed by terminal symbol"); + } else { + const std::pair<alphabet::Symbol, alphabet::Symbol>& rhs = rightHandSide.get<std::pair<alphabet::Symbol, alphabet::Symbol>>(); - if(!terminalAlphabet.count(rightHandSide[1])) - throw GrammarException("Rule must rewrite to nonterminal symbol followed by terminal symbol"); + if(!nonterminalAlphabet.count(rhs.first) || !terminalAlphabet.count(rhs.second)) + throw GrammarException("Rule must rewrite to terminal symbol followed by nonterminal symbol"); return rules[leftHandSide].insert(rightHandSide).second; - } else { - throw GrammarException("Invalid size of right hand side of the rule"); } } -const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> LeftRG::getRules() const { - return rules; +bool LeftRG::addRule(const alphabet::Symbol& leftHandSide, const alphabet::Symbol& rightHandSide) { + std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> rhs(rightHandSide); + return addRule(leftHandSide, rhs); } -bool LeftRG::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int rSize = rightHandSide.size(); +bool LeftRG::addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, alphabet::Symbol>& rightHandSide) { + std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> rhs(rightHandSide); + return addRule(leftHandSide, rhs); +} - if(leftHandSide == initialSymbol && rSize == 0) { - generatesEpsilon = false; - } +const std::map<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>> LeftRG::getRules() const { + return rules; +} + +bool LeftRG::removeRule(const alphabet::Symbol& leftHandSide, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rightHandSide) { return rules[leftHandSide].erase(rightHandSide); } +void LeftRG::setGeneratesEpsilon(bool genEps) { + generatesEpsilon = genEps; +} + +bool LeftRG::getGeneratesEpsilon() const { + return generatesEpsilon; +} + bool LeftRG::operator==(const GrammarBase& other) const { return other == *this; } diff --git a/alib2data/src/grammar/Regular/LeftRG.h b/alib2data/src/grammar/Regular/LeftRG.h index 2160488a03..442061f6e3 100644 --- a/alib2data/src/grammar/Regular/LeftRG.h +++ b/alib2data/src/grammar/Regular/LeftRG.h @@ -10,16 +10,32 @@ #include "../GrammarBase.h" #include <map> -#include <vector> +#include "../../std/variant.hpp" #include "../common/TerminalNonterminalAlphabetInitialSymbol.h" namespace grammar { /** - * Left regular grammar in chomsky normal form. Type 3 in Chomsky hierarchy. Produces regular languages. + * Left regular grammar in chomsky normal form. Type 2 in Chomsky hierarchy. Produces regular languages. + * Definition is similar to all common definitions of regular grammars. Additionaly contains boolean signaling whether the grammar generates empty string or don't. + * G = (N, T, P, S, E) + * N = nonempty finite set of nonterminal symbols + * T = finite set of terminal symbols - having this empty won't let grammar do much though + * P = set of production rules of the form A -> aB or A -> a (where A, B \in N and a \in T) + * S = initial nonterminal symbol + * E = boolean signaling wheter grammar generates empty string or don't + * + * This definition has simplier handling of empty string generation and it is compatible with common definitions where the transformation from this definition to the common definition and backwards is trivial */ class LeftRG : public std::element<LeftRG, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol { - std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules; + /** + * Transition function as mapping from nonterminal symbol on the left hand side to set of either terminal symbols or doublets of terminal symbol and nonterminal symbol + */ + std::map<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>> rules; + + /** + * Boolean signaling whether grammar generates empty string or don't. + */ bool generatesEpsilon; public: LeftRG(const alphabet::Symbol& initialSymbol); @@ -30,16 +46,23 @@ public: virtual GrammarBase* plunder() &&; - bool addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const alphabet::Symbol& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, alphabet::Symbol>& rightHandSide); - const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> getRules() const; + const std::map<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>> getRules() const; - bool removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const alphabet::Symbol& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, alphabet::Symbol>& rightHandSide); bool removeTerminalSymbol(const alphabet::Symbol& symbol); bool removeNonterminalSymbol(const alphabet::Symbol& symbol); + void setGeneratesEpsilon(bool genEps); + bool getGeneratesEpsilon() const; + virtual bool operator==(const GrammarBase& other) const; virtual bool operator==(const LeftRG& other) const; diff --git a/alib2data/src/grammar/Regular/RightRG.cpp b/alib2data/src/grammar/Regular/RightRG.cpp index 825039e19f..c185eb08b9 100644 --- a/alib2data/src/grammar/Regular/RightRG.cpp +++ b/alib2data/src/grammar/Regular/RightRG.cpp @@ -32,86 +32,79 @@ GrammarBase* RightRG::plunder() && { } bool RightRG::removeTerminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { - if(rule.first == symbol) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + for(const std::pair<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>>& rule : rules) { + for(const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rhs : rule.second) { + if((rhs.is<alphabet::Symbol>() && rhs.get<alphabet::Symbol>() == symbol) || (rhs.get<std::pair<alphabet::Symbol, alphabet::Symbol>>().first == symbol)) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } } return terminalAlphabet.erase(symbol); } bool RightRG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const std::pair<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>>& rule : rules) { if(rule.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + for(const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rhs : rule.second) + if(rhs.get<std::pair<alphabet::Symbol, alphabet::Symbol>>().second == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); } if(initialSymbol == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is initial symbol."); - return nonterminalAlphabet.erase(symbol); } -bool RightRG::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int rSize = rightHandSide.size(); +bool RightRG::addRule(const alphabet::Symbol& leftHandSide, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rightHandSide) { + if(!nonterminalAlphabet.count(leftHandSide)) + throw GrammarException("Rule must rewrite nonterminal symbol"); - if(leftHandSide == initialSymbol && rSize == 0) { - for(const auto& rule : rules) { - for(const auto& ruleRHS : rule.second) { - if(any_of(ruleRHS.begin(), ruleRHS.end(), [&](const alphabet::Symbol& symbol) { return initialSymbol == symbol; })) { - throw GrammarException("Initial symbol " + (std::string) initialSymbol + "used on right hand side of already existing rule"); - } - } - } + if(rightHandSide.is<alphabet::Symbol>()) { + const alphabet::Symbol& rhs = rightHandSide.get<alphabet::Symbol>(); - generatesEpsilon = true; - return rules[leftHandSide].insert(rightHandSide).second; - } else if(rSize == 1) { - if(!nonterminalAlphabet.count(leftHandSide)) - throw GrammarException("Rule must rewrite nonterminal symbol"); - - if(!terminalAlphabet.count(rightHandSide[0])) + if(!terminalAlphabet.count(rhs)) throw GrammarException("Rule must rewrite to terminal symbol"); return rules[leftHandSide].insert(rightHandSide).second; - } else if(rSize == 2) { - if(!nonterminalAlphabet.count(leftHandSide)) - throw GrammarException("Rule must rewrite nonterminal symbol"); - - if(!terminalAlphabet.count(rightHandSide[0])) - throw GrammarException("Rule must rewrite to terminal symbol followed by nonterminal symbol"); + } else { + const std::pair<alphabet::Symbol, alphabet::Symbol>& rhs = rightHandSide.get<std::pair<alphabet::Symbol, alphabet::Symbol>>(); - if(!nonterminalAlphabet.count(rightHandSide[1])) + if(!terminalAlphabet.count(rhs.first) || !nonterminalAlphabet.count(rhs.second)) throw GrammarException("Rule must rewrite to terminal symbol followed by nonterminal symbol"); return rules[leftHandSide].insert(rightHandSide).second; - } else { - throw GrammarException("Invalid size of right hand side of the rule"); } } -const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> RightRG::getRules() const { - return rules; +bool RightRG::addRule(const alphabet::Symbol& leftHandSide, const alphabet::Symbol& rightHandSide) { + std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> rhs(rightHandSide); + return addRule(leftHandSide, rhs); } -bool RightRG::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int rSize = rightHandSide.size(); +bool RightRG::addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, alphabet::Symbol>& rightHandSide) { + std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> rhs(rightHandSide); + return addRule(leftHandSide, rhs); +} - if(leftHandSide == initialSymbol && rSize == 0) { - generatesEpsilon = false; - } +const std::map<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>> RightRG::getRules() const { + return rules; +} + +bool RightRG::removeRule(const alphabet::Symbol& leftHandSide, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rightHandSide) { return rules[leftHandSide].erase(rightHandSide); } +void RightRG::setGeneratesEpsilon(bool genEps) { + generatesEpsilon = genEps; +} + +bool RightRG::getGeneratesEpsilon() const { + return generatesEpsilon; +} + bool RightRG::operator==(const GrammarBase& other) const { return other == *this; } diff --git a/alib2data/src/grammar/Regular/RightRG.h b/alib2data/src/grammar/Regular/RightRG.h index 3df8d61d95..e70dce3805 100644 --- a/alib2data/src/grammar/Regular/RightRG.h +++ b/alib2data/src/grammar/Regular/RightRG.h @@ -10,21 +10,32 @@ #include "../GrammarBase.h" #include <map> -#include <vector> +#include "../../std/variant.hpp" #include "../common/TerminalNonterminalAlphabetInitialSymbol.h" namespace grammar { /** * Right regular grammar in chomsky normal form. Type 2 in Chomsky hierarchy. Produces regular languages. - * G = (N, T, P, S) + * Definition is similar to all common definitions of regular grammars. Additionaly contains boolean signaling whether the grammar generates empty string or don't. + * G = (N, T, P, S, E) * N = nonempty finite set of nonterminal symbols - * T = finite set of terminal symbols - having this empty wont allow much though - * P = set of production rules of the form A -> aB or A -> a (where A, B \in N and a \in T) or S -> \varepsilon (when S is not on the right hand side of any rule) + * T = finite set of terminal symbols - having this empty won't let grammar do much though + * P = set of production rules of the form A -> aB or A -> a (where A, B \in N and a \in T) * S = initial nonterminal symbol + * E = boolean signaling wheter grammar generates empty string or don't + * + * This definition has simplier handling of empty string generation and it is compatible with common definitions where the transformation from this definition to the common definition and backwards is trivial */ class RightRG : public std::element<RightRG, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol { - std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules; + /** + * Transition function as mapping from nonterminal symbol on the left hand side to set of either terminal symbols or doublets of terminal symbol and nonterminal symbol + */ + std::map<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>> rules; + + /** + * Boolean signaling whether grammar generates empty string or don't. + */ bool generatesEpsilon; public: RightRG(const alphabet::Symbol& initialSymbol); @@ -35,16 +46,23 @@ public: virtual GrammarBase* plunder() &&; - bool addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const alphabet::Symbol& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, alphabet::Symbol>& rightHandSide); - const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> getRules() const; + const std::map<alphabet::Symbol, std::set<std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>>> getRules() const; - bool removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const alphabet::Symbol& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, alphabet::Symbol>& rightHandSide); bool removeTerminalSymbol(const alphabet::Symbol& symbol); bool removeNonterminalSymbol(const alphabet::Symbol& symbol); + void setGeneratesEpsilon(bool genEps); + bool getGeneratesEpsilon() const; + virtual bool operator==(const GrammarBase& other) const; virtual bool operator==(const RightRG& other) const; diff --git a/alib2data/test-src/grammar/GrammarTest.cpp b/alib2data/test-src/grammar/GrammarTest.cpp index 5865912580..b150fe8d52 100644 --- a/alib2data/test-src/grammar/GrammarTest.cpp +++ b/alib2data/test-src/grammar/GrammarTest.cpp @@ -82,3 +82,31 @@ void GrammarTest::testUnrestrictedParser() { CPPUNIT_ASSERT( grammar == grammar2 ); } } + +void GrammarTest::testRegularParser() { + grammar::RightRG grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); + + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))))); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))))); + + CPPUNIT_ASSERT( grammar == grammar ); + { + grammar::GrammarToXMLComposer composer; + std::list<sax::Token> tokens = composer.compose(grammar); + std::string tmp; + sax::SaxComposeInterface::printMemory(tmp, tokens); + + std::list<sax::Token> tokens2; + sax::SaxParseInterface::parseMemory(tmp, tokens2); + grammar::GrammarFromXMLParser parser; + grammar::RightRG grammar2 = parser.parseRightRG(tokens2); + + CPPUNIT_ASSERT( grammar == grammar2 ); + } +} diff --git a/alib2data/test-src/grammar/GrammarTest.h b/alib2data/test-src/grammar/GrammarTest.h index b48c9b970f..58afdc92a2 100644 --- a/alib2data/test-src/grammar/GrammarTest.h +++ b/alib2data/test-src/grammar/GrammarTest.h @@ -8,6 +8,8 @@ class GrammarTest : public CppUnit::TestFixture CPPUNIT_TEST_SUITE( GrammarTest ); CPPUNIT_TEST( testXMLParser ); CPPUNIT_TEST( testUnrestrictedParser ); + + CPPUNIT_TEST( testRegularParser ); CPPUNIT_TEST_SUITE_END(); public: @@ -16,6 +18,8 @@ public: void testXMLParser(); void testUnrestrictedParser(); + + void testRegularParser(); }; #endif // GRAMMAR_TEST_H_ -- GitLab