From 82e04846db893385f8efad37b0a0e71732cfeab6 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 17 Jul 2014 22:21:14 +0200 Subject: [PATCH] GNF internal form --- alib2data/src/grammar/ContextFree/GNF.cpp | 82 +++++++------------ alib2data/src/grammar/ContextFree/GNF.h | 11 ++- .../src/grammar/GrammarFromXMLParser.cpp | 16 +++- alib2data/src/grammar/GrammarFromXMLParser.h | 1 + .../src/grammar/GrammarToXMLComposer.cpp | 15 +++- alib2data/src/grammar/GrammarToXMLComposer.h | 1 + alib2data/test-src/grammar/GrammarTest.cpp | 30 +++++++ 7 files changed, 96 insertions(+), 60 deletions(-) diff --git a/alib2data/src/grammar/ContextFree/GNF.cpp b/alib2data/src/grammar/ContextFree/GNF.cpp index 45e460387f..4bf533992a 100644 --- a/alib2data/src/grammar/ContextFree/GNF.cpp +++ b/alib2data/src/grammar/ContextFree/GNF.cpp @@ -32,12 +32,9 @@ GrammarBase* GNF::plunder() && { } bool GNF::removeTerminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { - if(rule.first == symbol) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + for(const std::pair<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> >& rule : rules) { + for(const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rhs : rule.second) + if(rhs.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); } @@ -45,12 +42,12 @@ bool GNF::removeTerminalSymbol(const alphabet::Symbol& symbol) { } bool GNF::removeNonterminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const std::pair<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> >& rule : rules) { if(rule.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + for(const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rhs : rule.second) + if(std::find(rhs.second.begin(), rhs.second.end(), symbol) != rhs.second.end()) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); } @@ -61,59 +58,36 @@ bool GNF::removeNonterminalSymbol(const alphabet::Symbol& symbol) { return nonterminalAlphabet.erase(symbol); } -bool GNF::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int rSize = rightHandSide.size(); - - if(rSize == 0 && leftHandSide == initialSymbol) { - for(const auto& rule : rules) { - for(const auto& ruleRHS : rule.second) { - if(any_of(ruleRHS.begin(), ruleRHS.end(), [&](const alphabet::Symbol& symbol) { return initialSymbol == symbol; })) { - throw GrammarException("Initial symbol " + (std::string) initialSymbol + "used on right hand side of already existing rule"); - } - } - } - - generatesEpsilon = true; - return rules[leftHandSide].insert(rightHandSide).second; - } else if(rSize == 1) { - if(!nonterminalAlphabet.count(leftHandSide)) - throw GrammarException("Rule must rewrite nonterminal symbol"); - - if(!terminalAlphabet.count(rightHandSide[0])) - throw GrammarException("Rule must rewrite to terminal symbol"); - - return rules[leftHandSide].insert(rightHandSide).second; - } else { - if(!nonterminalAlphabet.count(leftHandSide)) - throw GrammarException("Rule must rewrite nonterminal symbol"); - - if(!terminalAlphabet.count(rightHandSide[0])) - throw GrammarException("First symbol of the rule must be a terminal symbol"); - - for(std::vector<alphabet::Symbol>::const_iterator iter = rightHandSide.begin() + 1; iter != rightHandSide.end(); iter++) { - if(nonterminalAlphabet.find(*iter) == nonterminalAlphabet.end()) - throw GrammarException("Symbol \"" + (std::string) iter->getSymbol() + "\" is not a nonterminal symbol"); - if(generatesEpsilon && *iter == initialSymbol) - throw GrammarException("Initial symbol is already allowed to be rewritten to epsilon"); - } - - return rules[leftHandSide].insert(rightHandSide).second; - } +bool GNF::addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rightHandSide) { + if(!nonterminalAlphabet.count(leftHandSide)) + throw GrammarException("Rule must rewrite nonterminal symbol"); + + if(!terminalAlphabet.count(rightHandSide.first)) + throw GrammarException("Rule must rewrite to terminal symbol"); + + for(const alphabet::Symbol& rhsNTs : rightHandSide.second) + if(nonterminalAlphabet.find(rhsNTs) == nonterminalAlphabet.end()) + throw GrammarException("Symbol \"" + (std::string) rhsNTs.getSymbol() + "\" is not a nonterminal symbol"); + + return rules[leftHandSide].insert(rightHandSide).second; } -const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> GNF::getRules() const { +const std::map<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> > GNF::getRules() const { return rules; } -bool GNF::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int rSize = rightHandSide.size(); - - if(leftHandSide == initialSymbol && rSize == 0) { - generatesEpsilon = false; - } +bool GNF::removeRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rightHandSide) { return rules[leftHandSide].erase(rightHandSide); } +void GNF::setGeneratesEpsilon(bool genEps) { + generatesEpsilon = genEps; +} + +bool GNF::getGeneratesEpsilon() const { + return generatesEpsilon; +} + bool GNF::operator==(const GrammarBase& other) const { return other == *this; } diff --git a/alib2data/src/grammar/ContextFree/GNF.h b/alib2data/src/grammar/ContextFree/GNF.h index c711939be7..b3341ec7be 100644 --- a/alib2data/src/grammar/ContextFree/GNF.h +++ b/alib2data/src/grammar/ContextFree/GNF.h @@ -19,7 +19,7 @@ namespace grammar { * Context free grammar in greibach normal form. Type 2 in Chomsky hierarchy. Produces context free languages. */ class GNF : public std::element<GNF, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol { - std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules; + std::map<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> > rules; bool generatesEpsilon; public: GNF(const alphabet::Symbol& initialSymbol); @@ -30,16 +30,19 @@ public: virtual GrammarBase* plunder() &&; - bool addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rightHandSide); - const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> getRules() const; + const std::map<alphabet::Symbol, std::set<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >> > getRules() const; - bool removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol> >& rightHandSide); bool removeTerminalSymbol(const alphabet::Symbol& symbol); bool removeNonterminalSymbol(const alphabet::Symbol& symbol); + void setGeneratesEpsilon(bool genEps); + bool getGeneratesEpsilon() const; + virtual bool operator==(const GrammarBase& other) const; virtual bool operator==(const GNF& other) const; diff --git a/alib2data/src/grammar/GrammarFromXMLParser.cpp b/alib2data/src/grammar/GrammarFromXMLParser.cpp index 75994a8499..9e60ddbda2 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.cpp +++ b/alib2data/src/grammar/GrammarFromXMLParser.cpp @@ -177,6 +177,9 @@ GNF GrammarFromXMLParser::parseGNF(std::list<sax::Token>& input) const { parseRules(input, grammar); + bool generatesEpsilon = parseGeneratesEpsilon(input); + grammar.setGeneratesEpsilon(generatesEpsilon); + popToken(input, sax::Token::TokenType::END_ELEMENT, "GNF"); return grammar; } @@ -426,7 +429,7 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, CSG& grammar) void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, GNF& grammar) const { alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input); - std::vector<alphabet::Symbol> rhs = parseRuleRHS(input); + std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> rhs = parseRuleGNFRHS(input); grammar.addRule(lhs, rhs); } @@ -499,6 +502,17 @@ alphabet::Symbol GrammarFromXMLParser::parseRuleSingleSymbolLHS(std::list<sax::T return lhs; } +std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> GrammarFromXMLParser::parseRuleGNFRHS(std::list<sax::Token>& input) const { + popToken(input, sax::Token::TokenType::START_ELEMENT, "rhs"); + alphabet::Symbol first = alib::FromXMLParsers::symbolParser.parse(input); + std::vector<alphabet::Symbol> second; + while (isTokenType(input, sax::Token::TokenType::START_ELEMENT)) { + second.push_back(alib::FromXMLParsers::symbolParser.parse(input)); + } + popToken(input, sax::Token::TokenType::END_ELEMENT, "rhs"); + return std::make_pair(first, second); +} + std::vector<alphabet::Symbol> GrammarFromXMLParser::parseRuleRHS(std::list<sax::Token>& input) const { std::vector<alphabet::Symbol> rhs; popToken(input, sax::Token::TokenType::START_ELEMENT, "rhs"); diff --git a/alib2data/src/grammar/GrammarFromXMLParser.h b/alib2data/src/grammar/GrammarFromXMLParser.h index fe4c8efbaa..ca218f90dc 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.h +++ b/alib2data/src/grammar/GrammarFromXMLParser.h @@ -45,6 +45,7 @@ protected: std::vector<alphabet::Symbol> parseRuleLHS(std::list<sax::Token>& input) const; alphabet::Symbol parseRuleSingleSymbolLHS(std::list<sax::Token>& input) const; + std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> parseRuleGNFRHS(std::list<sax::Token>& input) const; std::vector<alphabet::Symbol> parseRuleRHS(std::list<sax::Token>& input) const; std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>> parseRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& input) const; diff --git a/alib2data/src/grammar/GrammarToXMLComposer.cpp b/alib2data/src/grammar/GrammarToXMLComposer.cpp index 3ea70b05e0..2939f67ead 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.cpp +++ b/alib2data/src/grammar/GrammarToXMLComposer.cpp @@ -154,6 +154,7 @@ std::list<sax::Token> GrammarToXMLComposer::compose(const GNF& grammar) const { composeTerminalAlphabet(out, grammar.getTerminalAlphabet()); composeInitialSymbol(out, grammar.getInitialSymbol()); composeRules(out, grammar); + composeGeneratesEpsilon(out, grammar.getGeneratesEpsilon()); out.push_back(sax::Token("GNF", sax::Token::TokenType::END_ELEMENT)); return out; @@ -339,7 +340,7 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const GNF& g out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT)); composeRuleSingleSymbolLHS(out, rule.first); - composeRuleRHS(out, rhs); + composeRuleGNFRHS(out, rhs); out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT)); } @@ -518,6 +519,18 @@ void GrammarToXMLComposer::composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); } +void GrammarToXMLComposer::composeRuleGNFRHS(std::list<sax::Token>& out, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& symbols) const { + out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT)); + + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbols.first)); + + for (const auto& symbol : symbols.second) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + + out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); +} + void GrammarToXMLComposer::composeRuleLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const { out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT)); if(symbols.is<std::vector<alphabet::Symbol>>()) { diff --git a/alib2data/src/grammar/GrammarToXMLComposer.h b/alib2data/src/grammar/GrammarToXMLComposer.h index 0e8923f6b1..427eb2b92d 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.h +++ b/alib2data/src/grammar/GrammarToXMLComposer.h @@ -73,6 +73,7 @@ class GrammarToXMLComposer : public GrammarBase::const_visitor_type { void composeRuleSingleSymbolLHS(std::list<sax::Token>& out, const alphabet::Symbol& symbol) const; void composeRuleRHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const; void composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& symbols) const; + void composeRuleGNFRHS(std::list<sax::Token>& out, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& symbols) const; void composeRuleLeftLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const; void composeRuleRightLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>& symbols) const; void composeRuleLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const; diff --git a/alib2data/test-src/grammar/GrammarTest.cpp b/alib2data/test-src/grammar/GrammarTest.cpp index ab2c4c8a09..d365f883c5 100644 --- a/alib2data/test-src/grammar/GrammarTest.cpp +++ b/alib2data/test-src/grammar/GrammarTest.cpp @@ -260,6 +260,36 @@ void GrammarTest::testContextFreeParser() { grammar::GrammarFromXMLParser parser; grammar::CNF grammar2 = parser.parseCNF(tokens2); + CPPUNIT_ASSERT( grammar == grammar2 ); + } + } + { + grammar::GNF grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); + grammar.setGeneratesEpsilon(true); + + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))})); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))})); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), std::vector<alphabet::Symbol> {})); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), std::vector<alphabet::Symbol> {})); + + CPPUNIT_ASSERT( grammar == grammar ); + { + grammar::GrammarToXMLComposer composer; + std::list<sax::Token> tokens = composer.compose(grammar); + std::string tmp; + sax::SaxComposeInterface::printMemory(tmp, tokens); + + std::list<sax::Token> tokens2; + sax::SaxParseInterface::parseMemory(tmp, tokens2); + grammar::GrammarFromXMLParser parser; + grammar::GNF grammar2 = parser.parseGNF(tokens2); + CPPUNIT_ASSERT( grammar == grammar2 ); } } -- GitLab