From ea99c5b3477d703d72f27ed5f86b0378c395f734 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Fri, 9 Jan 2015 19:22:18 +0100 Subject: [PATCH] add CNF from string parsing --- .../src/grammar/GrammarFromStringLexer.cpp | 11 ++ .../src/grammar/GrammarFromStringLexer.h | 4 +- .../src/grammar/GrammarFromStringParser.cpp | 146 ++++++++++++++++-- .../src/grammar/GrammarFromStringParser.h | 6 + .../src/grammar/GrammarToStringComposer.cpp | 19 ++- ...rminalNonterminalAlphabetInitialSymbol.cpp | 7 +- alib2data/test-src/grammar/GrammarTest.cpp | 23 +++ alib2data/test-src/grammar/GrammarTest.h | 2 + 8 files changed, 194 insertions(+), 24 deletions(-) diff --git a/alib2data/src/grammar/GrammarFromStringLexer.cpp b/alib2data/src/grammar/GrammarFromStringLexer.cpp index bf516057a4..7dcc4a9bfb 100644 --- a/alib2data/src/grammar/GrammarFromStringLexer.cpp +++ b/alib2data/src/grammar/GrammarFromStringLexer.cpp @@ -6,6 +6,7 @@ */ #include "GrammarFromStringLexer.h" +#include "../std/istream.h" namespace grammar { @@ -62,6 +63,16 @@ L0: token.value += character; token.raw += character; goto L1; + } else if(in.unget(), in >> "RIGHT_RG") { + token.type = TokenType::RIGHT_RG; + token.value = "RIGHT_RG"; + token.raw = "RIGHT_RG"; + return token; + } else if(in.clear(), in >> "CNF") { + token.type = TokenType::CNF; + token.value = "CNF"; + token.raw = "CNF"; + return token; } else { in.putback(character); putback(in, std::move(token)); diff --git a/alib2data/src/grammar/GrammarFromStringLexer.h b/alib2data/src/grammar/GrammarFromStringLexer.h index 98f0f41b14..85020bc2b3 100644 --- a/alib2data/src/grammar/GrammarFromStringLexer.h +++ b/alib2data/src/grammar/GrammarFromStringLexer.h @@ -24,8 +24,10 @@ public: SEPARATOR, EPSILON, MAPS_TO, + RIGHT_RG, + CNF, TEOF, - ERROR + ERROR, }; struct Token { diff --git a/alib2data/src/grammar/GrammarFromStringParser.cpp b/alib2data/src/grammar/GrammarFromStringParser.cpp index 94128b7969..178a91e3c7 100644 --- a/alib2data/src/grammar/GrammarFromStringParser.cpp +++ b/alib2data/src/grammar/GrammarFromStringParser.cpp @@ -9,16 +9,30 @@ #include "../exception/AlibException.h" +#include "Regular/RightRG.h" +#include "ContextFree/CNF.h" + #include "../StringApi.hpp" namespace grammar { Grammar GrammarFromStringParser::parseGrammar(std::istream& input) const { - return parseGrammar(input, std::set<FEATURES>({})); + return parseGrammar(input, std::set<FEATURES>({FEATURES::CNF, FEATURES::RIGHT_RG})); } Grammar GrammarFromStringParser::parseGrammar(std::istream& input, const std::set<FEATURES>& features) const { - throw exception::AlibException(); + GrammarFromStringLexer::Token token = m_GrammarLexer.next(input); + if(token.type == GrammarFromStringLexer::TokenType::CNF) { + if(!features.count(FEATURES::CNF)) throw exception::AlibException("Disabled formalism CNF"); + m_GrammarLexer.putback(input, token); + return Grammar(parseCNF(input)); + } else if(token.type == GrammarFromStringLexer::TokenType::RIGHT_RG) { + if(!features.count(FEATURES::RIGHT_RG)) throw exception::AlibException("Disabled Formalism RightRG"); + m_GrammarLexer.putback(input, token); + return Grammar(parseRightRG(input)); + } else { + throw exception::AlibException("Formalism not recognised (token = \"" + token.value + "\")"); + } } std::set<alphabet::Symbol> GrammarFromStringParser::parseSet(std::istream& input) const { @@ -30,17 +44,19 @@ std::set<alphabet::Symbol> GrammarFromStringParser::parseSet(std::istream& input } token = m_GrammarLexer.next(input); - if(token.type != GrammarFromStringLexer::TokenType::SET_END) while(true) { + if(token.type != GrammarFromStringLexer::TokenType::SET_END) { m_GrammarLexer.putback(input, token); - alphabet::Symbol symbol = alib::stringApi<alphabet::Symbol>::parse(input); - res.insert(symbol); + while(true) { + alphabet::Symbol symbol = alib::stringApi<alphabet::Symbol>::parse(input); + res.insert(symbol); - token = m_GrammarLexer.next(input); - if(token.type != GrammarFromStringLexer::TokenType::SET_END) { - break; - } - if(token.type != GrammarFromStringLexer::TokenType::COMMA) { - throw exception::AlibException("Expected SET_END or COMMA token"); + token = m_GrammarLexer.next(input); + if(token.type == GrammarFromStringLexer::TokenType::SET_END) { + break; + } + if(token.type != GrammarFromStringLexer::TokenType::COMMA) { + throw exception::AlibException("Expected SET_END or COMMA token"); + } } } @@ -51,4 +67,112 @@ std::set<alphabet::Symbol> GrammarFromStringParser::parseSet(std::istream& input return res; } +std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> GrammarFromStringParser::parseCFLikeRules(std::istream& input) const { + std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> result; + + GrammarFromStringLexer::Token token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::SET_BEGIN) { + throw exception::AlibException(); + } + + token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::SET_END) { + m_GrammarLexer.putback(input, token); + while(true) { + alphabet::Symbol lhs = alib::stringApi<alphabet::Symbol>::parse(input); + + token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::MAPS_TO) { + throw exception::AlibException(); + } + + while(true) { + std::vector<alphabet::Symbol> rhs; + + token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::COMMA && token.type != GrammarFromStringLexer::TokenType::SET_END && token.type != GrammarFromStringLexer::TokenType::SEPARATOR) while(true) { + m_GrammarLexer.putback(input, token); + + rhs.push_back(alib::stringApi<alphabet::Symbol>::parse(input)); + token = m_GrammarLexer.next(input); + if(token.type == GrammarFromStringLexer::TokenType::SEPARATOR || token.type == GrammarFromStringLexer::TokenType::COMMA || token.type == GrammarFromStringLexer::TokenType::SET_END) { + break; + } + } + result[lhs].insert(rhs); + if(token.type == GrammarFromStringLexer::TokenType::COMMA || token.type == GrammarFromStringLexer::TokenType::SET_END) { + break; + } + if(token.type != GrammarFromStringLexer::TokenType::SEPARATOR) { + throw exception::AlibException("Expected SEPARATOR, SETEND or COMMA token"); + } + } + + if(token.type == GrammarFromStringLexer::TokenType::SET_END) { + break; + } + if(token.type != GrammarFromStringLexer::TokenType::COMMA) { + throw exception::AlibException("Expected SET_END or COMMA token"); + } + } + } + + if(token.type != GrammarFromStringLexer::TokenType::SET_END) { + throw exception::AlibException("Expected SET_END token"); + } + return result; +} + +RightRG GrammarFromStringParser::parseRightRG(std::istream& input) const { + throw exception::AlibException("Unimplemented"); +} + +CNF GrammarFromStringParser::parseCNF(std::istream& input) const { + GrammarFromStringLexer::Token token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::CNF) { + throw exception::AlibException("Unrecognised CNF token."); + } + + token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::TUPLE_BEGIN) { + throw exception::AlibException("Unrecognised Tuple begin token."); + } + + std::set<alphabet::Symbol> nonterminals = parseSet(input); + + token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::COMMA) { + throw exception::AlibException("Unrecognised Comma token."); + } + + std::set<alphabet::Symbol> terminals = parseSet(input); + + token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::COMMA) { + throw exception::AlibException("Unrecognised Comma token."); + } + + std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules = parseCFLikeRules(input); + + token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::COMMA) { + throw exception::AlibException("Unrecognised Comma token."); + } + + alphabet::Symbol initialSymbol = alib::stringApi<alphabet::Symbol>::parse(input); + + token = m_GrammarLexer.next(input); + if(token.type != GrammarFromStringLexer::TokenType::TUPLE_END) { + throw exception::AlibException("Unrecognised Tuple end token."); + } + + CNF cnf(nonterminals, terminals, initialSymbol); + for(const auto& rule : rules) { + for(const auto& ruleRHS : rule.second) { + cnf.addRawRule(rule.first, ruleRHS); + } + } + return cnf; +} + } /* namespace grammar */ diff --git a/alib2data/src/grammar/GrammarFromStringParser.h b/alib2data/src/grammar/GrammarFromStringParser.h index 95bc75547b..9f2fead435 100644 --- a/alib2data/src/grammar/GrammarFromStringParser.h +++ b/alib2data/src/grammar/GrammarFromStringParser.h @@ -12,6 +12,8 @@ #include "Grammar.h" #include "GrammarFeatures.h" #include "../alphabet/Symbol.h" +#include <set> +#include <vector> namespace alib { @@ -26,10 +28,14 @@ class GrammarFromStringParser { GrammarFromStringLexer m_GrammarLexer; std::set<alphabet::Symbol> parseSet(std::istream& input) const; + std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> parseCFLikeRules(std::istream& input) const; Grammar parseGrammar(std::istream& input) const; Grammar parseGrammar(std::istream& input, const std::set<FEATURES>& features) const; + RightRG parseRightRG(std::istream& input) const; + CNF parseCNF(std::istream& input) const; + template<typename T> friend class alib::stringApi; }; diff --git a/alib2data/src/grammar/GrammarToStringComposer.cpp b/alib2data/src/grammar/GrammarToStringComposer.cpp index d71393cb8e..4daef6225f 100644 --- a/alib2data/src/grammar/GrammarToStringComposer.cpp +++ b/alib2data/src/grammar/GrammarToStringComposer.cpp @@ -41,7 +41,7 @@ void GrammarToStringComposer::compose(std::ostream& output, const EpsilonFreeCFG void GrammarToStringComposer::compose(std::ostream& output, const CNF& grammar) const { bool first; - output << "(CNF," << std::endl; + output << "CNF (" << std::endl; output << "{"; first = false; @@ -64,29 +64,28 @@ void GrammarToStringComposer::compose(std::ostream& output, const CNF& grammar) } output << "}," << std::endl; output << "{ "; - first = false; + first = true; for(const auto& rule : grammar.getRawRules() ) { if(first) - output << ", " << std::endl << " "; + first = false; else - first = true; + output << "," << std::endl << " "; alib::stringApi<alphabet::Symbol>::compose(output, rule.first); - output << " -> "; - bool innerFirst = false; + output << " ->"; + bool innerFirst = true; for(const auto& rhs : rule.second) { if(innerFirst) - output << " | "; + innerFirst = false; else - innerFirst = true; + output << " |"; for(const auto& symbol : rhs) { - alib::stringApi<alphabet::Symbol>::compose(output, symbol); output << " "; + alib::stringApi<alphabet::Symbol>::compose(output, symbol); } } } output << "}," << std::endl; alib::stringApi<alphabet::Symbol>::compose(output, grammar.getInitialSymbol()); - output << std::endl; output << ")" << std::endl; } diff --git a/alib2data/src/grammar/common/TerminalNonterminalAlphabetInitialSymbol.cpp b/alib2data/src/grammar/common/TerminalNonterminalAlphabetInitialSymbol.cpp index 03295dc7d9..b2ec4a5a65 100644 --- a/alib2data/src/grammar/common/TerminalNonterminalAlphabetInitialSymbol.cpp +++ b/alib2data/src/grammar/common/TerminalNonterminalAlphabetInitialSymbol.cpp @@ -13,6 +13,9 @@ #include "../../alphabet/Symbol.h" #include <climits> #include <algorithm> +#include <iostream> + +#include "../../XmlApi.hpp" namespace grammar { @@ -38,7 +41,7 @@ void TerminalNonterminalAlphabetInitialSymbol::setTerminalAlphabet(const std::se std::set<alphabet::Symbol> added; std::set_difference(alphabet.begin(), alphabet.end(), terminalAlphabet.begin(), terminalAlphabet.end(), std::inserter(added, added.end())); - + for(const alphabet::Symbol& removedSymbol : removed) { removeTerminalSymbol(removedSymbol); } @@ -66,7 +69,7 @@ void TerminalNonterminalAlphabetInitialSymbol::setNonterminalAlphabet(const std: std::set<alphabet::Symbol> added; std::set_difference(alphabet.begin(), alphabet.end(), nonterminalAlphabet.begin(), nonterminalAlphabet.end(), std::inserter(added, added.end())); - + for(const alphabet::Symbol& removedSymbol : removed) { removeNonterminalSymbol(removedSymbol); } diff --git a/alib2data/test-src/grammar/GrammarTest.cpp b/alib2data/test-src/grammar/GrammarTest.cpp index 594da58a51..a318a0de96 100644 --- a/alib2data/test-src/grammar/GrammarTest.cpp +++ b/alib2data/test-src/grammar/GrammarTest.cpp @@ -7,6 +7,7 @@ #include "grammar/Unrestricted/UnrestrictedGrammar.h" #include "factory/XmlDataFactory.hpp" +#include "factory/StringDataFactory.hpp" #include "alphabet/LabeledSymbol.h" @@ -20,6 +21,28 @@ void GrammarTest::setUp() { void GrammarTest::tearDown() { } +void GrammarTest::stringParserTest() { + { + std::string input = "CNF (\n" + "{A, B, S},\n" + "{a, b},\n" + "{ A -> A A | a,\n" + " B -> B B | b,\n" + " S -> | B S | S A},\n" + "S)\n"; + grammar::Grammar grammar = alib::StringDataFactory::fromString<grammar::Grammar>(input); + + std::string output = alib::StringDataFactory::toString(grammar); + + std::cout << "\"" << input << "\"" << std::endl << std::endl << "\"" << output << "\"" << std::endl; + CPPUNIT_ASSERT( input == output ); + + grammar::Grammar grammar2 = alib::StringDataFactory::fromString<grammar::Grammar>(output); + + CPPUNIT_ASSERT( grammar == grammar2 ); + } +} + void GrammarTest::testUnrestrictedParser() { grammar::UnrestrictedGrammar grammar(alphabet::symbolFrom(1)); diff --git a/alib2data/test-src/grammar/GrammarTest.h b/alib2data/test-src/grammar/GrammarTest.h index 4a386d2ceb..71658af4b2 100644 --- a/alib2data/test-src/grammar/GrammarTest.h +++ b/alib2data/test-src/grammar/GrammarTest.h @@ -6,6 +6,7 @@ class GrammarTest : public CppUnit::TestFixture { CPPUNIT_TEST_SUITE( GrammarTest ); + CPPUNIT_TEST( stringParserTest ); CPPUNIT_TEST( testUnrestrictedParser ); CPPUNIT_TEST( testContextSensitiveParser ); CPPUNIT_TEST( testContextFreeParser ); @@ -16,6 +17,7 @@ public: void setUp(); void tearDown(); + void stringParserTest(); void testUnrestrictedParser(); void testContextSensitiveParser(); void testContextFreeParser(); -- GitLab