From 8e96c8b6d8adab3846ccaf2575864a3f508b4680 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Fri, 18 Jul 2014 10:34:18 +0200 Subject: [PATCH] ContextPreservingUnrestrictedGrammar internal form --- .../src/grammar/GrammarFromXMLParser.cpp | 6 +- .../src/grammar/GrammarToXMLComposer.cpp | 4 +- .../ContextPreservingUnrestrictedGrammar.cpp | 64 ++++++++++--------- .../ContextPreservingUnrestrictedGrammar.h | 11 ++-- alib2data/test-src/grammar/GrammarTest.cpp | 29 +++++++++ 5 files changed, 74 insertions(+), 40 deletions(-) diff --git a/alib2data/src/grammar/GrammarFromXMLParser.cpp b/alib2data/src/grammar/GrammarFromXMLParser.cpp index 3bff447a56..547cd3d624 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.cpp +++ b/alib2data/src/grammar/GrammarFromXMLParser.cpp @@ -416,9 +416,11 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, UnrestrictedG } void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, ContextPreservingUnrestrictedGrammar& grammar) const { - std::vector<alphabet::Symbol> lhs = parseRuleLHS(input); + std::vector<alphabet::Symbol> lContext = parseRuleLContext(input); + alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input); + std::vector<alphabet::Symbol> rContext = parseRuleRContext(input); std::vector<alphabet::Symbol> rhs = parseRuleRHS(input); - grammar.addRule(lhs, rhs); + grammar.addRule(lContext, lhs, rContext, rhs); } void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, NonContractingGrammar& grammar) const { diff --git a/alib2data/src/grammar/GrammarToXMLComposer.cpp b/alib2data/src/grammar/GrammarToXMLComposer.cpp index 41a592d3d1..90a648879d 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.cpp +++ b/alib2data/src/grammar/GrammarToXMLComposer.cpp @@ -290,7 +290,9 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const Contex for(const auto& rhs : rule.second) { out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT)); - composeRuleLHS(out, rule.first); + composeRuleLContext(out, std::get<0>(rule.first)); + composeRuleSingleSymbolLHS(out, std::get<1>(rule.first)); + composeRuleRContext(out, std::get<2>(rule.first)); composeRuleRHS(out, rhs); out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT)); diff --git a/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.cpp b/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.cpp index 3cf0060277..6938804fc1 100644 --- a/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.cpp +++ b/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.cpp @@ -32,9 +32,14 @@ GrammarBase* ContextPreservingUnrestrictedGrammar::plunder() && { } bool ContextPreservingUnrestrictedGrammar::removeTerminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { - if(std::find(rule.first.begin(), rule.first.end(), symbol) != rule.first.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::pair<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const alphabet::Symbol& lCont : std::get<0>(rule.first)) + if(lCont == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + + for(const alphabet::Symbol& rCont : std::get<2>(rule.first)) + if(rCont == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); for(const std::vector<alphabet::Symbol>& rhs : rule.second) if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) @@ -45,10 +50,18 @@ bool ContextPreservingUnrestrictedGrammar::removeTerminalSymbol(const alphabet:: } bool ContextPreservingUnrestrictedGrammar::removeNonterminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { - if(std::find(rule.first.begin(), rule.first.end(), symbol) != rule.first.end()) + for(const std::pair<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const alphabet::Symbol& lCont : std::get<0>(rule.first)) + if(lCont == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + + if(std::get<1>(rule.first) == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const alphabet::Symbol& rCont : std::get<2>(rule.first)) + if(rCont == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::vector<alphabet::Symbol>& rhs : rule.second) if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); @@ -61,45 +74,34 @@ bool ContextPreservingUnrestrictedGrammar::removeNonterminalSymbol(const alphabe return nonterminalAlphabet.erase(symbol); } -bool ContextPreservingUnrestrictedGrammar::addRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int lSize = leftHandSide.size(); - int rSize = rightHandSide.size(); - - if(lSize > rSize + 1) - throw GrammarException("Invalid size of right hand side of a rule"); - - int lContext; - int rContext; - for(lContext = 0; lContext < lSize - 1 && leftHandSide[lContext] == rightHandSide[lContext]; lContext++); - for(rContext = 0; rContext < lSize - 1 && leftHandSide[lSize - rContext] == rightHandSide[rSize - rContext]; rContext++); - - if(lContext + rContext + 1 < lSize) { - throw GrammarException("Rule must rewrite only one symbol"); - } else - if(lContext + rContext + 1 == lSize) { - if(!nonterminalAlphabet.count(leftHandSide[lContext - 1]) && !nonterminalAlphabet.count(leftHandSide[lSize - rContext])) throw GrammarException("Rule must rewrite nonterminal symbol"); - } else - if(/* lContext + rContext + 1 > lSize */ std::all_of(leftHandSide.end() - rContext, leftHandSide.begin() + lContext + 1, [&](const alphabet::Symbol symbol) {return !nonterminalAlphabet.count(symbol);})) - throw GrammarException("Rule must rewrite nonterminal symbol"); +bool ContextPreservingUnrestrictedGrammar::addRule(const std::vector<alphabet::Symbol>& lContext, const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rContext, const std::vector<alphabet::Symbol>& rightHandSide) { + for(const alphabet::Symbol& symbol : lContext) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); + } + if(!nonterminalAlphabet.count(leftHandSide)) + throw GrammarException("Rule must rewrite nonterminal symbol"); - for(const alphabet::Symbol& symbol : leftHandSide) + for(const alphabet::Symbol& symbol : rContext) { if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); + } - for(const alphabet::Symbol& symbol : rightHandSide) + for(const alphabet::Symbol& symbol : rightHandSide) { if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); + } - return rules[leftHandSide].insert(rightHandSide).second; + return rules[make_tuple(lContext, leftHandSide, rContext)].insert(rightHandSide).second; } -const std::map<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>> ContextPreservingUnrestrictedGrammar::getRules() const { +const std::map<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>> ContextPreservingUnrestrictedGrammar::getRules() const { return rules; } -bool ContextPreservingUnrestrictedGrammar::removeRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - return rules[leftHandSide].erase(rightHandSide); +bool ContextPreservingUnrestrictedGrammar::removeRule(const std::vector<alphabet::Symbol>& lContext, const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rContext, const std::vector<alphabet::Symbol>& rightHandSide) { + return rules[make_tuple(lContext, leftHandSide, rContext)].erase(rightHandSide); } bool ContextPreservingUnrestrictedGrammar::operator==(const GrammarBase& other) const { diff --git a/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.h b/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.h index 079e023c92..81784fb841 100644 --- a/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.h +++ b/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.h @@ -16,11 +16,10 @@ namespace grammar { /** - * Context preserving unrestricted grammar. Type 0 in Chomsky hierarchy. Produces recursively enumerable language. + * Epsilon free context free grammar. Type 2 in Chomsky hierarchy. Produces context free languages. */ class ContextPreservingUnrestrictedGrammar : public std::element<ContextPreservingUnrestrictedGrammar, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol { - std::map<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>> rules; - + std::map<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>> rules; public: ContextPreservingUnrestrictedGrammar(const alphabet::Symbol& initialSymbol); @@ -30,11 +29,11 @@ public: virtual GrammarBase* plunder() &&; - bool addRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool addRule(const std::vector<alphabet::Symbol>& lContext, const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rContext, const std::vector<alphabet::Symbol>& rightHandSide); - const std::map<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>> getRules() const; + const std::map<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>> getRules() const; - bool removeRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool removeRule(const std::vector<alphabet::Symbol>& lContext, const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rContext, const std::vector<alphabet::Symbol>& rightHandSide); bool removeTerminalSymbol(const alphabet::Symbol& symbol); diff --git a/alib2data/test-src/grammar/GrammarTest.cpp b/alib2data/test-src/grammar/GrammarTest.cpp index 91397cd168..20cb3ed63b 100644 --- a/alib2data/test-src/grammar/GrammarTest.cpp +++ b/alib2data/test-src/grammar/GrammarTest.cpp @@ -86,6 +86,35 @@ void GrammarTest::testUnrestrictedParser() { CPPUNIT_ASSERT( grammar == grammar2 ); } + { + grammar::ContextPreservingUnrestrictedGrammar grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); + + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); + grammar.addRule(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::vector<alphabet::Symbol> {}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::vector<alphabet::Symbol> {}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::vector<alphabet::Symbol> {}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); + grammar.addRule(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::vector<alphabet::Symbol> {}, std::vector<alphabet::Symbol> {}); + CPPUNIT_ASSERT( grammar == grammar ); + { + grammar::GrammarToXMLComposer composer; + std::list<sax::Token> tokens = composer.compose(grammar); + std::string tmp; + sax::SaxComposeInterface::printMemory(tmp, tokens); + + std::list<sax::Token> tokens2; + sax::SaxParseInterface::parseMemory(tmp, tokens2); + grammar::GrammarFromXMLParser parser; + grammar::ContextPreservingUnrestrictedGrammar grammar2 = parser.parseContextPreservingUnrestrictedGrammar(tokens2); + + CPPUNIT_ASSERT( grammar == grammar2 ); + } + } } void GrammarTest::testRegularParser() { -- GitLab