From 14658dd0a49ce012834fa1896d54ecba8e79235b Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 17 Jul 2014 10:15:02 +0200 Subject: [PATCH] internal form of LinearGrammar --- alib2data/src/grammar/ContextFree/LG.cpp | 104 ++++++++++++++---- alib2data/src/grammar/ContextFree/LG.h | 14 ++- .../src/grammar/GrammarFromXMLParser.cpp | 2 +- .../src/grammar/GrammarToXMLComposer.cpp | 27 ++++- alib2data/src/grammar/GrammarToXMLComposer.h | 1 + alib2data/test-src/grammar/GrammarTest.cpp | 32 ++++++ alib2data/test-src/grammar/GrammarTest.h | 2 + 7 files changed, 155 insertions(+), 27 deletions(-) diff --git a/alib2data/src/grammar/ContextFree/LG.cpp b/alib2data/src/grammar/ContextFree/LG.cpp index 82a57a17b0..61a5b80b67 100644 --- a/alib2data/src/grammar/ContextFree/LG.cpp +++ b/alib2data/src/grammar/ContextFree/LG.cpp @@ -32,26 +32,41 @@ GrammarBase* LG::plunder() && { } bool LG::removeTerminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { - if(rule.first == symbol) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::pair<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >> >& rule : rules) { + for(const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >& rhsTmp : rule.second) + if(rhsTmp.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = rhsTmp.get<std::vector<alphabet::Symbol>>(); + + if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } else { + const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = rhsTmp.get<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>(); + + const std::vector<alphabet::Symbol>& lPart = std::get<0>(rhs); + if(std::find(lPart.begin(), lPart.end(), symbol) != lPart.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + + const std::vector<alphabet::Symbol>& rPart = std::get<2>(rhs); + if(std::find(rPart.begin(), rPart.end(), symbol) != rPart.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } } return terminalAlphabet.erase(symbol); } bool LG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const std::pair<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >> >& rule : rules) { if(rule.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >& rhsTmp : rule.second) + if(rhsTmp.is<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>()) { + const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = rhsTmp.get<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>(); + + if(std::get<1>(rhs) == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } } if(initialSymbol == symbol) @@ -61,33 +76,78 @@ bool LG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { return nonterminalAlphabet.erase(symbol); } -bool LG::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { +bool LG::addRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >& rightHandSide) { if(!nonterminalAlphabet.count(leftHandSide)) throw GrammarException("Rule must rewrite nonterminal symbol"); - std::vector<alphabet::Symbol>::const_iterator iter = rightHandSide.begin(); - for(; iter != rightHandSide.end() && terminalAlphabet.find(*iter) != terminalAlphabet.end(); iter++); + if(rightHandSide.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = rightHandSide.get<std::vector<alphabet::Symbol>>(); + for(const auto & symbol : rhs ) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) symbol.getSymbol() + " is not a terminal symbol"); + } + + return rules[leftHandSide].insert(rightHandSide).second; + } else { + const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = rightHandSide.get<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>(); - if(iter == rightHandSide.end()) return rules[leftHandSide].insert(rightHandSide).second; + for(const auto & symbol : std::get<0>(rhs) ) + if(terminalAlphabet.find(symbol) == terminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) symbol.getSymbol() + " is not a terminal symbol"); + + if(nonterminalAlphabet.find(std::get<1>(rhs)) == nonterminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) std::get<1>(rhs).getSymbol() + " is not a nonterminal symbol"); + + for(const auto & symbol : std::get<2>(rhs) ) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) symbol.getSymbol() + " is not a terminal symbol"); + } + + return rules[leftHandSide].insert(rightHandSide).second; + } +} - if(nonterminalAlphabet.find(*iter) == nonterminalAlphabet.end()) throw GrammarException("Symbol " + (std::string) iter->getSymbol() + " is not a terminal nor nonterminal symbol"); - iter++; +bool LG::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> > rhs(rightHandSide); + return addRule(leftHandSide, rhs); +} - for(; iter != rightHandSide.end() && terminalAlphabet.find(*iter) != terminalAlphabet.end(); iter++); +bool LG::addRule(const alphabet::Symbol& leftHandSide, const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> > rhs(rightHandSide); + return addRule(leftHandSide, rhs); +} - if(iter == rightHandSide.end()) return rules[leftHandSide].insert(rightHandSide).second; +bool LG::addRawRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + std::vector<alphabet::Symbol>::const_iterator nonterminalPosition = rightHandSide.begin(); + for(; nonterminalPosition != rightHandSide.end(); nonterminalPosition++) { + if(nonterminalAlphabet.count(*nonterminalPosition)) break; + } - throw GrammarException("Symbol " + (std::string) iter->getSymbol() + " is not a terminal symbol"); + if(nonterminalPosition == rightHandSide.end()) { + return addRule(leftHandSide, rightHandSide); + } else { + return addRule(leftHandSide, std::make_tuple(std::vector<alphabet::Symbol>(rightHandSide.begin(), nonterminalPosition), *nonterminalPosition, std::vector<alphabet::Symbol>(nonterminalPosition + 1, rightHandSide.end()))); + } } -const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> LG::getRules() const { +const std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >> > LG::getRules() const { return rules; } -bool LG::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { +bool LG::removeRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >& rightHandSide) { return rules[leftHandSide].erase(rightHandSide); } +bool LG::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> > rhs(rightHandSide); + return removeRule(leftHandSide, rhs); +} + +bool LG::removeRule(const alphabet::Symbol& leftHandSide, const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> > rhs(rightHandSide); + return removeRule(leftHandSide, rhs); +} + bool LG::operator==(const GrammarBase& other) const { return other == *this; } diff --git a/alib2data/src/grammar/ContextFree/LG.h b/alib2data/src/grammar/ContextFree/LG.h index 6c1594f8bd..61fae87f03 100644 --- a/alib2data/src/grammar/ContextFree/LG.h +++ b/alib2data/src/grammar/ContextFree/LG.h @@ -10,16 +10,18 @@ #include "../GrammarBase.h" #include <map> +#include <tuple> #include <vector> +#include "../../std/variant.hpp" #include "../common/TerminalNonterminalAlphabetInitialSymbol.h" namespace grammar { /** - * Linear grammar. Type 2 in Chomsky hierarchy. Produces context free languages. + * Linear grammar in chomsky normal form. Type 3 in Chomsky hierarchy. Produces regular languages. */ class LG : public std::element<LG, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol { - std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules; + std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >> > rules; public: LG(const alphabet::Symbol& initialSymbol); @@ -29,11 +31,17 @@ public: virtual GrammarBase* plunder() &&; + bool addRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >& rightHandSide); bool addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rightHandSide); - const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> getRules() const; + bool addRawRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + const std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >> > getRules() const; + + bool removeRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>> >& rightHandSide); bool removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rightHandSide); bool removeTerminalSymbol(const alphabet::Symbol& symbol); diff --git a/alib2data/src/grammar/GrammarFromXMLParser.cpp b/alib2data/src/grammar/GrammarFromXMLParser.cpp index 8f57b7b443..ede304e500 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.cpp +++ b/alib2data/src/grammar/GrammarFromXMLParser.cpp @@ -445,7 +445,7 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, CFG& grammar) void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, LG& grammar) const { alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input); std::vector<alphabet::Symbol> rhs = parseRuleRHS(input); - grammar.addRule(lhs, rhs); + grammar.addRawRule(lhs, rhs); } void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, RightRG& grammar) const { diff --git a/alib2data/src/grammar/GrammarToXMLComposer.cpp b/alib2data/src/grammar/GrammarToXMLComposer.cpp index d3f2e027a5..e87ff418fb 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.cpp +++ b/alib2data/src/grammar/GrammarToXMLComposer.cpp @@ -405,7 +405,7 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const LG& gr out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT)); composeRuleSingleSymbolLHS(out, rule.first); - composeRuleRHS(out, rhs); + composeRuleLGRHS(out, rhs); out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT)); } @@ -516,6 +516,31 @@ void GrammarToXMLComposer::composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); } +void GrammarToXMLComposer::composeRuleLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const { + out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT)); + if(symbols.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = symbols.get<std::vector<alphabet::Symbol>>(); + if(rhs.size() == 0) { + out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); + } else { + for (const auto& symbol : symbols.get<std::vector<alphabet::Symbol>>()) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + } + } else { + const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = symbols.get<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>(); + for (const auto& symbol : std::get<0>(rhs)) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(std::get<1>(rhs))); + for (const auto& symbol : std::get<2>(rhs)) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + } + out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); +} + void GrammarToXMLComposer::composeRuleLeftLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const { out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT)); if(symbols.is<std::vector<alphabet::Symbol>>()) { diff --git a/alib2data/src/grammar/GrammarToXMLComposer.h b/alib2data/src/grammar/GrammarToXMLComposer.h index 32268126b1..0e8923f6b1 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.h +++ b/alib2data/src/grammar/GrammarToXMLComposer.h @@ -75,6 +75,7 @@ class GrammarToXMLComposer : public GrammarBase::const_visitor_type { void composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& symbols) const; void composeRuleLeftLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const; void composeRuleRightLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>& symbols) const; + void composeRuleLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const; public: /** * Prints XML representation of UnknownGrammar to the output stream. diff --git a/alib2data/test-src/grammar/GrammarTest.cpp b/alib2data/test-src/grammar/GrammarTest.cpp index f85be63828..d3dbb5fc62 100644 --- a/alib2data/test-src/grammar/GrammarTest.cpp +++ b/alib2data/test-src/grammar/GrammarTest.cpp @@ -142,3 +142,35 @@ void GrammarTest::testRegularParser() { } } } + +void GrammarTest::testContextFreeParser() { + { + grammar::LG grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); + + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_tuple(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::vector<alphabet::Symbol>{})); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::make_tuple(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))), std::vector<alphabet::Symbol>{})); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_tuple(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))), std::vector<alphabet::Symbol>{})); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_tuple(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::vector<alphabet::Symbol>{})); + + CPPUNIT_ASSERT( grammar == grammar ); + { + grammar::GrammarToXMLComposer composer; + std::list<sax::Token> tokens = composer.compose(grammar); + std::string tmp; + sax::SaxComposeInterface::printMemory(tmp, tokens); + + std::list<sax::Token> tokens2; + sax::SaxParseInterface::parseMemory(tmp, tokens2); + grammar::GrammarFromXMLParser parser; + grammar::LG grammar2 = parser.parseLG(tokens2); + + CPPUNIT_ASSERT( grammar == grammar2 ); + } + } +} diff --git a/alib2data/test-src/grammar/GrammarTest.h b/alib2data/test-src/grammar/GrammarTest.h index 58afdc92a2..e2d2b9b334 100644 --- a/alib2data/test-src/grammar/GrammarTest.h +++ b/alib2data/test-src/grammar/GrammarTest.h @@ -9,6 +9,7 @@ class GrammarTest : public CppUnit::TestFixture CPPUNIT_TEST( testXMLParser ); CPPUNIT_TEST( testUnrestrictedParser ); + CPPUNIT_TEST( testContextFreeParser ); CPPUNIT_TEST( testRegularParser ); CPPUNIT_TEST_SUITE_END(); @@ -19,6 +20,7 @@ public: void testXMLParser(); void testUnrestrictedParser(); + void testContextFreeParser(); void testRegularParser(); }; -- GitLab