From 00ea07ce025c3a38ea3ea5e79c79a10f3d20c20a Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Fri, 18 Jul 2014 09:36:37 +0200 Subject: [PATCH] NonContracting and CSG internal form --- .../src/grammar/ContextSensitive/CSG.cpp | 87 +++++++++---------- alib2data/src/grammar/ContextSensitive/CSG.h | 13 +-- .../NonContractingGrammar.cpp | 62 ++++++------- .../ContextSensitive/NonContractingGrammar.h | 3 + .../src/grammar/GrammarFromXMLParser.cpp | 38 +++++++- alib2data/src/grammar/GrammarFromXMLParser.h | 2 + .../src/grammar/GrammarToXMLComposer.cpp | 50 ++++++++--- alib2data/src/grammar/GrammarToXMLComposer.h | 2 + alib2data/test-src/grammar/GrammarTest.cpp | 66 ++++++++++++++ alib2data/test-src/grammar/GrammarTest.h | 4 +- 10 files changed, 224 insertions(+), 103 deletions(-) diff --git a/alib2data/src/grammar/ContextSensitive/CSG.cpp b/alib2data/src/grammar/ContextSensitive/CSG.cpp index a27da90794..7e0a12b5e0 100644 --- a/alib2data/src/grammar/ContextSensitive/CSG.cpp +++ b/alib2data/src/grammar/ContextSensitive/CSG.cpp @@ -32,9 +32,14 @@ GrammarBase* CSG::plunder() && { } bool CSG::removeTerminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { - if(std::find(rule.first.begin(), rule.first.end(), symbol) != rule.first.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::pair<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const alphabet::Symbol& lCont : std::get<0>(rule.first)) + if(lCont == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + + for(const alphabet::Symbol& rCont : std::get<2>(rule.first)) + if(rCont == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); for(const std::vector<alphabet::Symbol>& rhs : rule.second) if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) @@ -45,10 +50,18 @@ bool CSG::removeTerminalSymbol(const alphabet::Symbol& symbol) { } bool CSG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { - for(const std::pair<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { - if(std::find(rule.first.begin(), rule.first.end(), symbol) != rule.first.end()) + for(const std::pair<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>>& rule : rules) { + for(const alphabet::Symbol& lCont : std::get<0>(rule.first)) + if(lCont == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + + if(std::get<1>(rule.first) == symbol) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const alphabet::Symbol& rCont : std::get<2>(rule.first)) + if(rCont == symbol) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); + for(const std::vector<alphabet::Symbol>& rhs : rule.second) if(std::find(rhs.begin(), rhs.end(), symbol) != rhs.end()) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is used in rule."); @@ -61,67 +74,48 @@ bool CSG::removeNonterminalSymbol(const alphabet::Symbol& symbol) { return nonterminalAlphabet.erase(symbol); } -bool CSG::addRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int lSize = leftHandSide.size(); +bool CSG::addRule(const std::vector<alphabet::Symbol>& lContext, const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rContext, const std::vector<alphabet::Symbol>& rightHandSide) { int rSize = rightHandSide.size(); - if(lSize == 1 && leftHandSide[0] == initialSymbol && rSize == 0) { - for(const auto& rule : rules) { - for(const auto& ruleRHS : rule.second) { - if(any_of(ruleRHS.begin(), ruleRHS.end(), [&](const alphabet::Symbol& symbol) { return initialSymbol == symbol; })) { - throw GrammarException("Initial symbol " + (std::string) initialSymbol + "used on right hand side of already existing rule"); - } - } + if(rSize == 0) { + throw GrammarException("Epsilon rule is not allowed"); + } else { + for(const alphabet::Symbol& symbol : lContext) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); } - generatesEpsilon = true; - return rules[leftHandSide].insert(rightHandSide).second; - } else { - if(lSize > rSize) - throw GrammarException("Invalid size of right hand side of a rule"); - - int lContext; - int rContext; - for(lContext = 0; lContext < lSize - 1 && leftHandSide[lContext] == rightHandSide[lContext]; lContext++); - for(rContext = 0; rContext < lSize - 1 && leftHandSide[lSize - rContext] == rightHandSide[rSize - rContext]; rContext++); - - if(lContext + rContext + 1 < lSize) { - throw GrammarException("Rule must rewrite only one symbol"); - } else - if(lContext + rContext + 1 == lSize) { - if(!nonterminalAlphabet.count(leftHandSide[lContext - 1]) && !nonterminalAlphabet.count(leftHandSide[lSize - rContext])) throw GrammarException("Rule must rewrite nonterminal symbol"); - } else - if(/* lContext + rContext + 1 > lSize */ std::all_of(leftHandSide.end() - rContext, leftHandSide.begin() + lContext + 1, [&](const alphabet::Symbol symbol) {return !nonterminalAlphabet.count(symbol);})) + if(!nonterminalAlphabet.count(leftHandSide)) throw GrammarException("Rule must rewrite nonterminal symbol"); - - for(const alphabet::Symbol& symbol : leftHandSide) + for(const alphabet::Symbol& symbol : rContext) { if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); + } for(const alphabet::Symbol& symbol : rightHandSide) { if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); - if(generatesEpsilon && symbol == initialSymbol) - throw GrammarException("Initial symbol is already allowed to be rewritten to epsilon"); } - return rules[leftHandSide].insert(rightHandSide).second; + return rules[make_tuple(lContext, leftHandSide, rContext)].insert(rightHandSide).second; } } -const std::map<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>> CSG::getRules() const { +const std::map<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>> CSG::getRules() const { return rules; } -bool CSG::removeRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int lSize = leftHandSide.size(); - int rSize = rightHandSide.size(); +bool CSG::removeRule(const std::vector<alphabet::Symbol>& lContext, const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rContext, const std::vector<alphabet::Symbol>& rightHandSide) { + return rules[make_tuple(lContext, leftHandSide, rContext)].erase(rightHandSide); +} - if(lSize == 1 && leftHandSide[0] == initialSymbol && rSize == 0) { - generatesEpsilon = false; - } - return rules[leftHandSide].erase(rightHandSide); +void CSG::setGeneratesEpsilon(bool genEps) { + generatesEpsilon = genEps; +} + +bool CSG::getGeneratesEpsilon() const { + return generatesEpsilon; } bool CSG::operator==(const GrammarBase& other) const { @@ -129,7 +123,7 @@ bool CSG::operator==(const GrammarBase& other) const { } bool CSG::operator==(const CSG& other) const { - return this->nonterminalAlphabet == other.nonterminalAlphabet && this->terminalAlphabet == other.terminalAlphabet && this->initialSymbol == other.initialSymbol && this->rules == other.rules; + return this->nonterminalAlphabet == other.nonterminalAlphabet && this->terminalAlphabet == other.terminalAlphabet && this->initialSymbol == other.initialSymbol && this->rules == other.rules && this->generatesEpsilon == other.generatesEpsilon; } void CSG::operator>>(std::ostream& out) const { @@ -138,6 +132,7 @@ void CSG::operator>>(std::ostream& out) const { << "terminalAlphabet = " << terminalAlphabet << "initialSymbol = " << initialSymbol << "rules = " << rules + << "generatesEpsilon = " << generatesEpsilon << ")"; } diff --git a/alib2data/src/grammar/ContextSensitive/CSG.h b/alib2data/src/grammar/ContextSensitive/CSG.h index eb7dc59443..d1384c28ff 100644 --- a/alib2data/src/grammar/ContextSensitive/CSG.h +++ b/alib2data/src/grammar/ContextSensitive/CSG.h @@ -16,10 +16,10 @@ namespace grammar { /** - * Context sensitive grammar. Type 1 in Chomsky hierarchy. Produces context sensitive language. + * Epsilon free context free grammar. Type 2 in Chomsky hierarchy. Produces context free languages. */ class CSG : public std::element<CSG, GrammarBase>, public TerminalNonterminalAlphabetInitialSymbol { - std::map<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>> rules; + std::map<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>> rules; bool generatesEpsilon; public: CSG(const alphabet::Symbol& initialSymbol); @@ -30,16 +30,19 @@ public: virtual GrammarBase* plunder() &&; - bool addRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool addRule(const std::vector<alphabet::Symbol>& lContext, const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rContext, const std::vector<alphabet::Symbol>& rightHandSide); - const std::map<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>> getRules() const; + const std::map<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>, std::set<std::vector<alphabet::Symbol>>> getRules() const; - bool removeRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide); + bool removeRule(const std::vector<alphabet::Symbol>& lContext, const alphabet::Symbol& leftHandSide, const std::vector<alphabet::Symbol>& rContext, const std::vector<alphabet::Symbol>& rightHandSide); bool removeTerminalSymbol(const alphabet::Symbol& symbol); bool removeNonterminalSymbol(const alphabet::Symbol& symbol); + void setGeneratesEpsilon(bool genEps); + bool getGeneratesEpsilon() const; + virtual bool operator==(const GrammarBase& other) const; virtual bool operator==(const CSG& other) const; diff --git a/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.cpp b/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.cpp index a1d73a8a5c..710964b9d7 100644 --- a/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.cpp +++ b/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.cpp @@ -65,37 +65,22 @@ bool NonContractingGrammar::addRule(const std::vector<alphabet::Symbol>& leftHan int lSize = leftHandSide.size(); int rSize = rightHandSide.size(); - if(lSize == 1 && leftHandSide[0] == initialSymbol && rSize == 0) { - for(const auto& rule : rules) { - for(const auto& ruleRHS : rule.second) { - if(any_of(ruleRHS.begin(), ruleRHS.end(), [&](const alphabet::Symbol& symbol) { return initialSymbol == symbol; })) { - throw GrammarException("Initial symbol " + (std::string) initialSymbol + "used on right hand side of already existing rule"); - } - } - } - - generatesEpsilon = true; - return rules[leftHandSide].insert(rightHandSide).second; - } else { - if(lSize > rSize) - throw GrammarException("Invalid size of right hand side of a rule"); - - if(std::all_of(leftHandSide.begin(), leftHandSide.end(), [&](const alphabet::Symbol symbol) {return !nonterminalAlphabet.count(symbol);})) - throw GrammarException("Rule must rewrite nonterminal symbol"); - - for(const alphabet::Symbol& symbol : leftHandSide) - if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); - - for(const alphabet::Symbol& symbol : rightHandSide) { - if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) - throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); - if(generatesEpsilon && symbol == initialSymbol) - throw GrammarException("Initial symbol is already allowed to be rewritten to epsilon"); - } - - return rules[leftHandSide].insert(rightHandSide).second; + if(lSize > rSize) + throw GrammarException("Invalid size of right hand side of a rule"); + + if(std::all_of(leftHandSide.begin(), leftHandSide.end(), [&](const alphabet::Symbol symbol) {return !nonterminalAlphabet.count(symbol);})) + throw GrammarException("Rule must rewrite nonterminal symbol"); + + for(const alphabet::Symbol& symbol : leftHandSide) + if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); + + for(const alphabet::Symbol& symbol : rightHandSide) { + if(terminalAlphabet.find(symbol) == terminalAlphabet.end() && nonterminalAlphabet.find(symbol) == nonterminalAlphabet.end()) + throw GrammarException("Symbol \"" + (std::string) symbol.getSymbol() + "\" is not neither terminal nor nonterminal symbol"); } + + return rules[leftHandSide].insert(rightHandSide).second; } const std::map<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Symbol>>> NonContractingGrammar::getRules() const { @@ -103,21 +88,23 @@ const std::map<std::vector<alphabet::Symbol>, std::set<std::vector<alphabet::Sym } bool NonContractingGrammar::removeRule(const std::vector<alphabet::Symbol>& leftHandSide, const std::vector<alphabet::Symbol>& rightHandSide) { - int lSize = leftHandSide.size(); - int rSize = rightHandSide.size(); - - if(lSize == 1 && leftHandSide[0] == initialSymbol && rSize == 0) { - generatesEpsilon = false; - } return rules[leftHandSide].erase(rightHandSide); } +void NonContractingGrammar::setGeneratesEpsilon(bool genEps) { + generatesEpsilon = genEps; +} + +bool NonContractingGrammar::getGeneratesEpsilon() const { + return generatesEpsilon; +} + bool NonContractingGrammar::operator==(const GrammarBase& other) const { return other == *this; } bool NonContractingGrammar::operator==(const NonContractingGrammar& other) const { - return this->nonterminalAlphabet == other.nonterminalAlphabet && this->terminalAlphabet == other.terminalAlphabet && this->initialSymbol == other.initialSymbol && this->rules == other.rules; + return this->nonterminalAlphabet == other.nonterminalAlphabet && this->terminalAlphabet == other.terminalAlphabet && this->initialSymbol == other.initialSymbol && this->rules == other.rules && this->generatesEpsilon == other.generatesEpsilon; } void NonContractingGrammar::operator>>(std::ostream& out) const { @@ -126,6 +113,7 @@ void NonContractingGrammar::operator>>(std::ostream& out) const { << "terminalAlphabet = " << terminalAlphabet << "initialSymbol = " << initialSymbol << "rules = " << rules + << "generatesEpsilon = " << generatesEpsilon << ")"; } diff --git a/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.h b/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.h index 9a6866faf9..9565901647 100644 --- a/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.h +++ b/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.h @@ -40,6 +40,9 @@ public: bool removeNonterminalSymbol(const alphabet::Symbol& symbol); + void setGeneratesEpsilon(bool genEps); + bool getGeneratesEpsilon() const; + virtual bool operator==(const GrammarBase& other) const; virtual bool operator==(const NonContractingGrammar& other) const; diff --git a/alib2data/src/grammar/GrammarFromXMLParser.cpp b/alib2data/src/grammar/GrammarFromXMLParser.cpp index 9e60ddbda2..3bff447a56 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.cpp +++ b/alib2data/src/grammar/GrammarFromXMLParser.cpp @@ -143,6 +143,9 @@ NonContractingGrammar GrammarFromXMLParser::parseNonContractingGrammar(std::list parseRules(input, grammar); + bool generatesEpsilon = parseGeneratesEpsilon(input); + grammar.setGeneratesEpsilon(generatesEpsilon); + popToken(input, sax::Token::TokenType::END_ELEMENT, "NonContractingGrammar"); return grammar; } @@ -160,6 +163,9 @@ CSG GrammarFromXMLParser::parseCSG(std::list<sax::Token>& input) const { parseRules(input, grammar); + bool generatesEpsilon = parseGeneratesEpsilon(input); + grammar.setGeneratesEpsilon(generatesEpsilon); + popToken(input, sax::Token::TokenType::END_ELEMENT, "CSG"); return grammar; } @@ -422,9 +428,11 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, NonContractin } void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, CSG& grammar) const { - std::vector<alphabet::Symbol> lhs = parseRuleLHS(input); + std::vector<alphabet::Symbol> lContext = parseRuleLContext(input); + alphabet::Symbol lhs = parseRuleSingleSymbolLHS(input); + std::vector<alphabet::Symbol> rContext = parseRuleRContext(input); std::vector<alphabet::Symbol> rhs = parseRuleRHS(input); - grammar.addRule(lhs, rhs); + grammar.addRule(lContext, lhs, rContext, rhs); } void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, GNF& grammar) const { @@ -482,6 +490,19 @@ void GrammarFromXMLParser::parseRule(std::list<sax::Token>& input, LeftLG& gramm } +std::vector<alphabet::Symbol> GrammarFromXMLParser::parseRuleLContext(std::list<sax::Token>& input) const { + std::vector<alphabet::Symbol> lContext; + popToken(input, sax::Token::TokenType::START_ELEMENT, "lContext"); + if(isToken(input, sax::Token::TokenType::START_ELEMENT, "epsilon")) { + input.pop_front(); + popToken(input, sax::Token::TokenType::END_ELEMENT, "epsilon"); + } else while (isTokenType(input, sax::Token::TokenType::START_ELEMENT)) { + lContext.push_back(alib::FromXMLParsers::symbolParser.parse(input)); + } + popToken(input, sax::Token::TokenType::END_ELEMENT, "lContext"); + return lContext; +} + std::vector<alphabet::Symbol> GrammarFromXMLParser::parseRuleLHS(std::list<sax::Token>& input) const { std::vector<alphabet::Symbol> lhs; popToken(input, sax::Token::TokenType::START_ELEMENT, "lhs"); @@ -502,6 +523,19 @@ alphabet::Symbol GrammarFromXMLParser::parseRuleSingleSymbolLHS(std::list<sax::T return lhs; } +std::vector<alphabet::Symbol> GrammarFromXMLParser::parseRuleRContext(std::list<sax::Token>& input) const { + std::vector<alphabet::Symbol> rContext; + popToken(input, sax::Token::TokenType::START_ELEMENT, "rContext"); + if(isToken(input, sax::Token::TokenType::START_ELEMENT, "epsilon")) { + input.pop_front(); + popToken(input, sax::Token::TokenType::END_ELEMENT, "epsilon"); + } else while (isTokenType(input, sax::Token::TokenType::START_ELEMENT)) { + rContext.push_back(alib::FromXMLParsers::symbolParser.parse(input)); + } + popToken(input, sax::Token::TokenType::END_ELEMENT, "rContext"); + return rContext; +} + std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> GrammarFromXMLParser::parseRuleGNFRHS(std::list<sax::Token>& input) const { popToken(input, sax::Token::TokenType::START_ELEMENT, "rhs"); alphabet::Symbol first = alib::FromXMLParsers::symbolParser.parse(input); diff --git a/alib2data/src/grammar/GrammarFromXMLParser.h b/alib2data/src/grammar/GrammarFromXMLParser.h index ca218f90dc..27587f27dc 100644 --- a/alib2data/src/grammar/GrammarFromXMLParser.h +++ b/alib2data/src/grammar/GrammarFromXMLParser.h @@ -43,7 +43,9 @@ protected: alphabet::Symbol parseInitialSymbol(std::list<sax::Token> &input) const; bool parseGeneratesEpsilon(std::list<sax::Token> &input) const; + std::vector<alphabet::Symbol> parseRuleLContext(std::list<sax::Token>& input) const; std::vector<alphabet::Symbol> parseRuleLHS(std::list<sax::Token>& input) const; + std::vector<alphabet::Symbol> parseRuleRContext(std::list<sax::Token>& input) const; alphabet::Symbol parseRuleSingleSymbolLHS(std::list<sax::Token>& input) const; std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>> parseRuleGNFRHS(std::list<sax::Token>& input) const; std::vector<alphabet::Symbol> parseRuleRHS(std::list<sax::Token>& input) const; diff --git a/alib2data/src/grammar/GrammarToXMLComposer.cpp b/alib2data/src/grammar/GrammarToXMLComposer.cpp index 2939f67ead..41a592d3d1 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.cpp +++ b/alib2data/src/grammar/GrammarToXMLComposer.cpp @@ -168,6 +168,7 @@ std::list<sax::Token> GrammarToXMLComposer::compose(const CSG& grammar) const { composeTerminalAlphabet(out, grammar.getTerminalAlphabet()); composeInitialSymbol(out, grammar.getInitialSymbol()); composeRules(out, grammar); + composeGeneratesEpsilon(out, grammar.getGeneratesEpsilon()); out.push_back(sax::Token("CSG", sax::Token::TokenType::END_ELEMENT)); return out; @@ -181,6 +182,7 @@ std::list<sax::Token> GrammarToXMLComposer::compose(const NonContractingGrammar& composeTerminalAlphabet(out, grammar.getTerminalAlphabet()); composeInitialSymbol(out, grammar.getInitialSymbol()); composeRules(out, grammar); + composeGeneratesEpsilon(out, grammar.getGeneratesEpsilon()); out.push_back(sax::Token("NonContractingGrammar", sax::Token::TokenType::END_ELEMENT)); return out; @@ -322,7 +324,9 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const CSG& g for(const auto& rhs : rule.second) { out.push_back(sax::Token("rule", sax::Token::TokenType::START_ELEMENT)); - composeRuleLHS(out, rule.first); + composeRuleLContext(out, std::get<0>(rule.first)); + composeRuleSingleSymbolLHS(out, std::get<1>(rule.first)); + composeRuleRContext(out, std::get<2>(rule.first)); composeRuleRHS(out, rhs); out.push_back(sax::Token("rule", sax::Token::TokenType::END_ELEMENT)); @@ -485,14 +489,39 @@ void GrammarToXMLComposer::composeRules(std::list<sax::Token>& out, const LeftRG out.push_back(sax::Token("rules", sax::Token::TokenType::END_ELEMENT)); } +void GrammarToXMLComposer::composeRuleLContext(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const { + out.push_back(sax::Token("lContext", sax::Token::TokenType::START_ELEMENT)); + if(symbols.size() == 0) { + out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); + } else for (const auto& symbol : symbols) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + out.push_back(sax::Token("lContext", sax::Token::TokenType::END_ELEMENT)); +} + void GrammarToXMLComposer::composeRuleLHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const { out.push_back(sax::Token("lhs", sax::Token::TokenType::START_ELEMENT)); - for (const auto& symbol : symbols) { + if(symbols.size() == 0) { + out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); + } else for (const auto& symbol : symbols) { out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); } out.push_back(sax::Token("lhs", sax::Token::TokenType::END_ELEMENT)); } +void GrammarToXMLComposer::composeRuleRContext(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const { + out.push_back(sax::Token("rContext", sax::Token::TokenType::START_ELEMENT)); + if(symbols.size() == 0) { + out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); + } else for (const auto& symbol : symbols) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); + } + out.push_back(sax::Token("rContext", sax::Token::TokenType::END_ELEMENT)); +} + void GrammarToXMLComposer::composeRuleSingleSymbolLHS(std::list<sax::Token>& out, const alphabet::Symbol& symbol) const { out.push_back(sax::Token("lhs", sax::Token::TokenType::START_ELEMENT)); out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); @@ -501,7 +530,10 @@ void GrammarToXMLComposer::composeRuleSingleSymbolLHS(std::list<sax::Token>& out void GrammarToXMLComposer::composeRuleRHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const { out.push_back(sax::Token("rhs", sax::Token::TokenType::START_ELEMENT)); - for (const auto& symbol : symbols) { + if(symbols.size() == 0) { + out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); + out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); + } else for (const auto& symbol : symbols) { out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); } out.push_back(sax::Token("rhs", sax::Token::TokenType::END_ELEMENT)); @@ -538,10 +570,8 @@ void GrammarToXMLComposer::composeRuleLGRHS(std::list<sax::Token>& out, const st if(rhs.size() == 0) { out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); - } else { - for (const auto& symbol : symbols.get<std::vector<alphabet::Symbol>>()) { - out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); - } + } else for (const auto& symbol : symbols.get<std::vector<alphabet::Symbol>>()) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); } } else { const std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = symbols.get<std::tuple<std::vector<alphabet::Symbol>, alphabet::Symbol, std::vector<alphabet::Symbol>>>(); @@ -563,10 +593,8 @@ void GrammarToXMLComposer::composeRuleLeftLGRHS(std::list<sax::Token>& out, cons if(rhs.size() == 0) { out.push_back(sax::Token("epsilon", sax::Token::TokenType::START_ELEMENT)); out.push_back(sax::Token("epsilon", sax::Token::TokenType::END_ELEMENT)); - } else { - for (const auto& symbol : symbols.get<std::vector<alphabet::Symbol>>()) { - out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); - } + } else for (const auto& symbol : symbols.get<std::vector<alphabet::Symbol>>()) { + out.splice(out.end(), alib::ToXMLComposers::symbolComposer.compose(symbol)); } } else { const std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>& rhs = symbols.get<std::pair<alphabet::Symbol, std::vector<alphabet::Symbol>>>(); diff --git a/alib2data/src/grammar/GrammarToXMLComposer.h b/alib2data/src/grammar/GrammarToXMLComposer.h index 427eb2b92d..5ae2d7cb50 100644 --- a/alib2data/src/grammar/GrammarToXMLComposer.h +++ b/alib2data/src/grammar/GrammarToXMLComposer.h @@ -69,7 +69,9 @@ class GrammarToXMLComposer : public GrammarBase::const_visitor_type { void composeRules(std::list<sax::Token>& out, const RightLG& grammar) const; void composeRules(std::list<sax::Token>& out, const RightRG& grammar) const; + void composeRuleLContext(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const; void composeRuleLHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const; + void composeRuleRContext(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const; void composeRuleSingleSymbolLHS(std::list<sax::Token>& out, const alphabet::Symbol& symbol) const; void composeRuleRHS(std::list<sax::Token>& out, const std::vector<alphabet::Symbol>& symbols) const; void composeRuleOneOrTwoSymbolsRHS(std::list<sax::Token>& out, const std::variant<alphabet::Symbol, std::pair<alphabet::Symbol, alphabet::Symbol>>& symbols) const; diff --git a/alib2data/test-src/grammar/GrammarTest.cpp b/alib2data/test-src/grammar/GrammarTest.cpp index d365f883c5..91397cd168 100644 --- a/alib2data/test-src/grammar/GrammarTest.cpp +++ b/alib2data/test-src/grammar/GrammarTest.cpp @@ -66,6 +66,11 @@ void GrammarTest::testUnrestrictedParser() { grammar.addRule({alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))}, {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); grammar.addRule({alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, std::vector<alphabet::Symbol> {}); CPPUNIT_ASSERT( grammar == grammar ); { @@ -294,3 +299,64 @@ void GrammarTest::testContextFreeParser() { } } } + +void GrammarTest::testContextSensitiveParser() { + { + grammar::CSG grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); + + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); + grammar.addRule(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), std::vector<alphabet::Symbol> {}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::vector<alphabet::Symbol> {}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {}, alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1)))), std::vector<alphabet::Symbol> {}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); + + CPPUNIT_ASSERT( grammar == grammar ); + { + grammar::GrammarToXMLComposer composer; + std::list<sax::Token> tokens = composer.compose(grammar); + std::string tmp; + sax::SaxComposeInterface::printMemory(tmp, tokens); + + std::list<sax::Token> tokens2; + sax::SaxParseInterface::parseMemory(tmp, tokens2); + grammar::GrammarFromXMLParser parser; + grammar::CSG grammar2 = parser.parseCSG(tokens2); + + CPPUNIT_ASSERT( grammar == grammar2 ); + } + } + { + grammar::NonContractingGrammar grammar(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))); + grammar.addNonterminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a"))))); + grammar.addTerminalSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b"))))); + + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2)))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("b")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(3))))}); + grammar.addRule(std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(1))))}, std::vector<alphabet::Symbol> {alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel("a")))), alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::IntegerLabel(2))))}); + + CPPUNIT_ASSERT( grammar == grammar ); + { + grammar::GrammarToXMLComposer composer; + std::list<sax::Token> tokens = composer.compose(grammar); + std::string tmp; + sax::SaxComposeInterface::printMemory(tmp, tokens); + + std::list<sax::Token> tokens2; + sax::SaxParseInterface::parseMemory(tmp, tokens2); + grammar::GrammarFromXMLParser parser; + grammar::NonContractingGrammar grammar2 = parser.parseNonContractingGrammar(tokens2); + + CPPUNIT_ASSERT( grammar == grammar2 ); + } + } +} diff --git a/alib2data/test-src/grammar/GrammarTest.h b/alib2data/test-src/grammar/GrammarTest.h index e2d2b9b334..6dcea84220 100644 --- a/alib2data/test-src/grammar/GrammarTest.h +++ b/alib2data/test-src/grammar/GrammarTest.h @@ -8,7 +8,7 @@ class GrammarTest : public CppUnit::TestFixture CPPUNIT_TEST_SUITE( GrammarTest ); CPPUNIT_TEST( testXMLParser ); CPPUNIT_TEST( testUnrestrictedParser ); - + CPPUNIT_TEST( testContextSensitiveParser ); CPPUNIT_TEST( testContextFreeParser ); CPPUNIT_TEST( testRegularParser ); CPPUNIT_TEST_SUITE_END(); @@ -19,7 +19,7 @@ public: void testXMLParser(); void testUnrestrictedParser(); - + void testContextSensitiveParser(); void testContextFreeParser(); void testRegularParser(); }; -- GitLab