From 6e5002422fcca8e37c432bf15e09da6eb5a3e0db Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Wed, 16 Jul 2014 21:49:14 +0200 Subject: [PATCH] New internal form for Right/Left LG --- .../src/grammar/GrammarFromXMLParser.cpp | 16 ++-- .../src/grammar/GrammarToXMLComposer.cpp | 48 +++++++++- alib2data/src/grammar/GrammarToXMLComposer.h | 2 + alib2data/src/grammar/Regular/LeftLG.cpp | 89 +++++++++++++++---- alib2data/src/grammar/Regular/LeftLG.h | 11 ++- alib2data/src/grammar/Regular/RightLG.cpp | 89 +++++++++++++++---- alib2data/src/grammar/Regular/RightLG.h | 11 ++- alib2data/test-src/grammar/GrammarTest.cpp | 54 ++++++++--- 8 files changed, 262 insertions(+), 58 deletions(-) diff --git a/alib2data/src/grammar/GrammarFromXMLParser.cpp b/alib2data/src/grammar/GrammarFromXMLParser.cpp index cd0e84d963..8f57b7b443 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.cpp +++ b/alib2data/src/grammar/GrammarFromXMLParser.cpp @@ -270,7 +270,7 @@ RightRG GrammarFromXMLParser::parseRightRG(std::list<sax::Token>& input) const { } RightLG GrammarFromXMLParser::parseRightLG(std::list<sax::Token>& input) const { - popToken(input, sax::Token::TokenType::START_ELEMENT, "LeftLG"); + popToken(input, sax::Token::TokenType::START_ELEMENT, "RightLG"); std::set<alphabet::Symbol> nonterminalAlphabet = parseNonterminalAlphabet(input); std::set<alphabet::Symbol> terminalAlphabet = parseTerminalAlphabet(input); @@ -457,7 +457,7 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, RightRG& gram void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, RightLG& grammar) const { alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input); std::vector<alphabet::Symbol> rhs = parseRuleRHS(input); - grammar.addRule(lhs, rhs); + grammar.addRawRule(lhs, rhs); } void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, LeftRG& grammar) const { @@ -469,14 +469,17 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, LeftRG& gramm void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, LeftLG& grammar) const { alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input); std::vector<alphabet::Symbol> rhs = parseRuleRHS(input); - grammar.addRule(lhs, rhs); + grammar.addRawRule(lhs, rhs); } std::vector<alphabet::Symbol> GrammarFromXMLParser::parseRuleLHS(std::list<sax::Token>& input) const { std::vector<alphabet::Symbol> lhs; popToken(input, sax::Token::TokenType::START_ELEMENT, "lhs"); - while (isTokenType(input, sax::Token::TokenType::START_ELEMENT)) { + if(isToken(input, sax::Token::TokenType::START_ELEMENT, "epsilon")) { + input.pop_front(); + popToken(input, sax::Token::TokenType::END_ELEMENT, "epsilon"); + } else while (isTokenType(input, sax::Token::TokenType::START_ELEMENT)) { lhs.push_back(alib::FromXMLParsers::symbolParser.parse(input)); } popToken(input, sax::Token::TokenType::END_ELEMENT, "lhs"); @@ -493,7 +496,10 @@ alphabet::Symbol GrammarFromXMLParser::parseRuleSingleSymbolLHS(std::list<sax::T std::vector<alphabet::Symbol> GrammarFromXMLParser::parseRuleRHS(std::list<sax::Token>& input) const { std::vector<alphabet::Symbol> rhs; popToken(input, sax::Token::TokenType::START_ELEMENT, "rhs"); - while (isTokenType(input, sax::Token::TokenType::START_ELEMENT)) { + if(isToken(input, sax::Token::TokenType::START_ELEMENT, "epsilon")) { + input.pop_front(); + popToken(input, sax::Token::TokenType::END_ELEMENT, "epsilon"); + } else while (isTokenType(input, sax::Token::TokenType::START_ELEMENT)) { rhs.push_back(alib::FromXMLParsers::symbolParser.parse(input)); } popToken(input, sax::Token::TokenType::END_ELEMENT, "rhs"); diff --git a/alib2data/src/grammar/GrammarToXMLComposer.cpp b/alib2data/src/grammar/GrammarToXMLComposer.cpp index f76dcce81d..d3f2e027a5 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.cpp +++ b/alib2data/src/grammar/GrammarToXMLComposer.cpp @@ -422,7 +422,7 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const RightL out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT)); composeRuleSingleSymbolLHS(out, rule.first); - composeRuleRHS(out, rhs); + composeRuleRightLGRHS(out, rhs); out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT)); } @@ -456,7 +456,7 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const LeftLG out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT)); composeRuleSingleSymbolLHS(out, rule.first); - composeRuleRHS(out, rhs); + composeRuleLeftLGRHS(out, rhs); out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT)); } @@ -516,6 +516,50 @@ void GrammarToXMLComposer::composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); } +void GrammarToXMLComposer::composeRuleLeftLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const { + out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT)); + if(symbols.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = symbols.get<std::vector<alphabet::Symbol>>(); + if(rhs.size() == 0) { + out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); + } else { + for (const auto& symbol : symbols.get<std::vector<alphabet::Symbol>>()) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + } + } else { + const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = symbols.get<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>(); + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(rhs.first)); + for (const auto& symbol : rhs.second) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + } + out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); +} + +void GrammarToXMLComposer::composeRuleRightLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>& symbols) const { + out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT)); + if(symbols.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = symbols.get<std::vector<alphabet::Symbol>>(); + if(rhs.size() == 0) { + out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); + } else { + for (const auto& symbol : symbols.get<std::vector<alphabet::Symbol>>()) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + } + } else { + const std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>& rhs = symbols.get<std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>(); + for (const auto& symbol : rhs.first) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(rhs.second)); + } + out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); +} + std::list<sax::Token> GrammarToXMLComposer::compose(const Grammar& grammar) const { std::list<sax::Token> out; grammar.getGrammar().Accept((void*) &out, *this); diff --git a/alib2data/src/grammar/GrammarToXMLComposer.h b/alib2data/src/grammar/GrammarToXMLComposer.h index 1d501d8b15..32268126b1 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.h +++ b/alib2data/src/grammar/GrammarToXMLComposer.h @@ -73,6 +73,8 @@ class GrammarToXMLComposer : public GrammarBase::const_visitor_type { void composeRuleSingleSymbolLHS(std::list<sax::Token>& out, const alphabet::Symbol& symbol) const; void composeRuleRHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const; void composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& symbols) const; + void composeRuleLeftLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>& symbols) const; + void composeRuleRightLGRHS(std::list<sax::Token>& out, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>& symbols) const; public: /** * Prints XML representation of UnknownGrammar to the output stream. diff --git a/alib2data/src/grammar/Regular/LeftLG.cpp b/alib2data/src/grammar/Regular/LeftLG.cpp index 7c35514090..29db7f37bc 100644 --- a/alib2data/src/grammar/Regular/LeftLG.cpp +++ b/alib2data/src/grammar/Regular/LeftLG.cpp @@ -32,26 +32,39 @@ GrammarBase* LeftLG::plunder() && { } bool LeftLG::removeTerminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const std::pair<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >> >& rule : rules) { if(rule.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >& rhsTmp : rule.second) + if(rhsTmp.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = rhsTmp.get<std::vector<alphabet::Symbol>>(); + + if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } else { + const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = rhsTmp.get<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>(); + + if(std::find(rhs.second.begin(), rhs.second.end(), symbol) != rhs.second.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } } return terminalAlphabet.erase(symbol); } bool LeftLG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const std::pair<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >> >& rule : rules) { if(rule.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >& rhsTmp : rule.second) + if(rhsTmp.is<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>()) { + const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = rhsTmp.get<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>(); + + if(rhs.first == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } } if(initialSymbol == symbol) @@ -61,31 +74,71 @@ bool LeftLG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { return nonterminalAlphabet.erase(symbol); } -bool LeftLG::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { +bool LeftLG::addRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >& rightHandSide) { if(!nonterminalAlphabet.count(leftHandSide)) throw GrammarException("Rule must rewrite nonterminal symbol"); - std::vector<alphabet::Symbol>::const_iterator iter = rightHandSide.begin(); - if(iter == rightHandSide.end()) return rules[leftHandSide].insert(rightHandSide).second; + if(rightHandSide.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = rightHandSide.get<std::vector<alphabet::Symbol>>(); + for(const auto & symbol : rhs ) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) symbol.getSymbol() + " is not a terminal symbol"); + } + + return rules[leftHandSide].insert(rightHandSide).second; + } else { + const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = rightHandSide.get<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>(); - if(nonterminalAlphabet.find(*iter) != nonterminalAlphabet.end() && terminalAlphabet.find(*iter) != terminalAlphabet.end()) throw GrammarException("Symbol " + (std::string) iter->getSymbol() + " is not a terminal nor nonterminal symbol"); - iter++; + if(nonterminalAlphabet.find(rhs.first) == nonterminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) rhs.first.getSymbol() + " is not a nonterminal symbol"); - for(; iter != rightHandSide.end() && terminalAlphabet.find(*iter) != terminalAlphabet.end(); iter++); + for(const auto & symbol : rhs.second ) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) symbol.getSymbol() + " is not a terminal symbol"); + } - if(iter == rightHandSide.end()) return rules[leftHandSide].insert(rightHandSide).second; + return rules[leftHandSide].insert(rightHandSide).second; + } +} + +bool LeftLG::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> > rhs(rightHandSide); + return addRule(leftHandSide, rhs); +} - throw GrammarException("Symbol " + (std::string) iter->getSymbol() + " is not a terminal symbol"); +bool LeftLG::addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> > rhs(rightHandSide); + return addRule(leftHandSide, rhs); } -const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> LeftLG::getRules() const { +bool LeftLG::addRawRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + if(rightHandSide.size() == 0) { + return addRule(leftHandSide, rightHandSide); + } else if(nonterminalAlphabet.count(rightHandSide[0])) { + return addRule(leftHandSide, std::make_pair(rightHandSide[0], std::vector<alphabet::Symbol>(rightHandSide.begin() + 1, rightHandSide.end()))); + } else { + return addRule(leftHandSide, rightHandSide); + } +} + +const std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >> > LeftLG::getRules() const { return rules; } -bool LeftLG::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { +bool LeftLG::removeRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >& rightHandSide) { return rules[leftHandSide].erase(rightHandSide); } +bool LeftLG::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> > rhs(rightHandSide); + return removeRule(leftHandSide, rhs); +} + +bool LeftLG::removeRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> > rhs(rightHandSide); + return removeRule(leftHandSide, rhs); +} + bool LeftLG::operator==(const GrammarBase& other) const { return other == *this; } diff --git a/alib2data/src/grammar/Regular/LeftLG.h b/alib2data/src/grammar/Regular/LeftLG.h index 8127000f7f..b251bb09c2 100644 --- a/alib2data/src/grammar/Regular/LeftLG.h +++ b/alib2data/src/grammar/Regular/LeftLG.h @@ -11,6 +11,7 @@ #include "../GrammarBase.h" #include <map> #include <vector> +#include "../../std/variant.hpp" #include "../common/TerminalNonterminalAlphabetInitialSymbol.h" namespace grammar { @@ -19,7 +20,7 @@ namespace grammar { * Left linear grammar in chomsky normal form. Type 3 in Chomsky hierarchy. Produces regular languages. */ class LeftLG : public std::element<LeftLG, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol { - std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules; + std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >> > rules; public: LeftLG(const alphabet::Symbol& initialSymbol); @@ -29,11 +30,17 @@ public: virtual GrammarBase* plunder() &&; + bool addRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >& rightHandSide); bool addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rightHandSide); - const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> getRules() const; + bool addRawRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + const std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >> > getRules() const; + + bool removeRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> >& rightHandSide); bool removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rightHandSide); bool removeTerminalSymbol(const alphabet::Symbol& symbol); diff --git a/alib2data/src/grammar/Regular/RightLG.cpp b/alib2data/src/grammar/Regular/RightLG.cpp index 39a4da54ff..a124a0151a 100644 --- a/alib2data/src/grammar/Regular/RightLG.cpp +++ b/alib2data/src/grammar/Regular/RightLG.cpp @@ -32,26 +32,39 @@ GrammarBase* RightLG::plunder() && { } bool RightLG::removeTerminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const std::pair<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >> >& rule : rules) { if(rule.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >& rhsTmp : rule.second) + if(rhsTmp.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = rhsTmp.get<std::vector<alphabet::Symbol>>(); + + if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } else { + const std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>& rhs = rhsTmp.get<std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>(); + + if(std::find(rhs.first.begin(), rhs.first.end(), symbol) != rhs.first.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } } return terminalAlphabet.erase(symbol); } bool RightLG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const std::pair<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >> >& rule : rules) { if(rule.first == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); - for(const std::vector<alphabet::Symbol>& rhs : rule.second) - if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >& rhsTmp : rule.second) + if(rhsTmp.is<std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>()) { + const std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>& rhs = rhsTmp.get<std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>(); + + if(rhs.second == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + } } if(initialSymbol == symbol) @@ -61,31 +74,71 @@ bool RightLG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { return nonterminalAlphabet.erase(symbol); } -bool RightLG::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { +bool RightLG::addRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >& rightHandSide) { if(!nonterminalAlphabet.count(leftHandSide)) throw GrammarException("Rule must rewrite nonterminal symbol"); - std::vector<alphabet::Symbol>::const_iterator iter = rightHandSide.begin(); - for(; iter != rightHandSide.end() && terminalAlphabet.find(*iter) != terminalAlphabet.end(); iter++); + if(rightHandSide.is<std::vector<alphabet::Symbol>>()) { + const std::vector<alphabet::Symbol>& rhs = rightHandSide.get<std::vector<alphabet::Symbol>>(); + for(const auto & symbol : rhs ) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) symbol.getSymbol() + " is not a terminal symbol"); + } + + return rules[leftHandSide].insert(rightHandSide).second; + } else { + const std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>& rhs = rightHandSide.get<std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>>(); - if(iter == rightHandSide.end()) return rules[leftHandSide].insert(rightHandSide).second; + if(nonterminalAlphabet.find(rhs.second) == nonterminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) rhs.second.getSymbol() + " is not a nonterminal symbol"); - if(nonterminalAlphabet.find(*iter) == nonterminalAlphabet.end()) throw GrammarException("Symbol " + (std::string) iter->getSymbol() + " is not a terminal nor nonterminal symbol"); - iter++; + for(const auto & symbol : rhs.first ) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end()) + throw GrammarException("Symbol " + (std::string) symbol.getSymbol() + " is not a terminal symbol"); + } - if(iter == rightHandSide.end()) return rules[leftHandSide].insert(rightHandSide).second; + return rules[leftHandSide].insert(rightHandSide).second; + } +} + +bool RightLG::addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> > rhs(rightHandSide); + return addRule(leftHandSide, rhs); +} - throw GrammarException("Symbol " + (std::string) iter->getSymbol() + " is not a terminal symbol"); +bool RightLG::addRule(const alphabet::Symbol& leftHandSide, const std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> > rhs(rightHandSide); + return addRule(leftHandSide, rhs); } -const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> RightLG::getRules() const { +bool RightLG::addRawRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + if(rightHandSide.size() == 0) { + return addRule(leftHandSide, rightHandSide); + } else if(nonterminalAlphabet.count(rightHandSide[rightHandSide.size() - 1])) { + return addRule(leftHandSide, std::make_pair(std::vector<alphabet::Symbol>(rightHandSide.begin(), rightHandSide.end() - 1), rightHandSide[rightHandSide.size() - 1])); + } else { + return addRule(leftHandSide, rightHandSide); + } +} + +const std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >> > RightLG::getRules() const { return rules; } -bool RightLG::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { +bool RightLG::removeRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >& rightHandSide) { return rules[leftHandSide].erase(rightHandSide); } +bool RightLG::removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> > rhs(rightHandSide); + return removeRule(leftHandSide, rhs); +} + +bool RightLG::removeRule(const alphabet::Symbol& leftHandSide, const std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>& rightHandSide) { + std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> > rhs(rightHandSide); + return removeRule(leftHandSide, rhs); +} + bool RightLG::operator==(const GrammarBase& other) const { return other == *this; } diff --git a/alib2data/src/grammar/Regular/RightLG.h b/alib2data/src/grammar/Regular/RightLG.h index 9cba0827f4..3446bf5774 100644 --- a/alib2data/src/grammar/Regular/RightLG.h +++ b/alib2data/src/grammar/Regular/RightLG.h @@ -11,6 +11,7 @@ #include "../GrammarBase.h" #include <map> #include <vector> +#include "../../std/variant.hpp" #include "../common/TerminalNonterminalAlphabetInitialSymbol.h" namespace grammar { @@ -19,7 +20,7 @@ namespace grammar { * Right linear grammar in chomsky normal form. Type 3 in Chomsky hierarchy. Produces regular languages. */ class RightLG : public std::element<RightLG, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol { - std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules; + std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >> > rules; public: RightLG(const alphabet::Symbol& initialSymbol); @@ -29,11 +30,17 @@ public: virtual GrammarBase* plunder() &&; + bool addRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >& rightHandSide); bool addRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool addRule(const alphabet::Symbol& leftHandSide, const std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>& rightHandSide); - const std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> getRules() const; + bool addRawRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + const std::map<alphabet::Symbol, std::set<std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >> > getRules() const; + + bool removeRule(const alphabet::Symbol& leftHandSide, const std::variant<std::vector<alphabet::Symbol>, std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol> >& rightHandSide); bool removeRule(const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool removeRule(const alphabet::Symbol& leftHandSide, const std::pair<std::vector<alphabet::Symbol>, alphabet::Symbol>& rightHandSide); bool removeTerminalSymbol(const alphabet::Symbol& symbol); diff --git a/alib2data/test-src/grammar/GrammarTest.cpp b/alib2data/test-src/grammar/GrammarTest.cpp index b150fe8d52..f85be63828 100644 --- a/alib2data/test-src/grammar/GrammarTest.cpp +++ b/alib2data/test-src/grammar/GrammarTest.cpp @@ -84,19 +84,20 @@ void GrammarTest::testUnrestrictedParser() { } void GrammarTest::testRegularParser() { - grammar::RightRG grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + { + grammar::RightRG grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); - grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); - grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); - grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); - grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); - grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); - - grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))))); - grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); - CPPUNIT_ASSERT( grammar == grammar ); - { + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))))); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::make_pair(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))))); + + CPPUNIT_ASSERT( grammar == grammar ); + { grammar::GrammarToXMLComposer composer; std::list<sax::Token> tokens = composer.compose(grammar); std::string tmp; @@ -108,5 +109,36 @@ void GrammarTest::testRegularParser() { grammar::RightRG grammar2 = parser.parseRightRG(tokens2); CPPUNIT_ASSERT( grammar == grammar2 ); + } + } + + { + grammar::RightLG grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); + + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))))); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::make_pair(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))))); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3)))))); + grammar.addRule(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::make_pair(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))))); + + CPPUNIT_ASSERT( grammar == grammar ); + { + grammar::GrammarToXMLComposer composer; + std::list<sax::Token> tokens = composer.compose(grammar); + std::string tmp; + sax::SaxComposeInterface::printMemory(tmp, tokens); + + std::list<sax::Token> tokens2; + sax::SaxParseInterface::parseMemory(tmp, tokens2); + grammar::GrammarFromXMLParser parser; + grammar::RightLG grammar2 = parser.parseRightLG(tokens2); + + CPPUNIT_ASSERT( grammar == grammar2 ); + } } } -- GitLab