Skip to content
Snippets Groups Projects
Commit ea99c5b3 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

add CNF from string parsing

parent 82302862
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,7 @@
*/
 
#include "GrammarFromStringLexer.h"
#include "../std/istream.h"
 
namespace grammar {
 
......@@ -62,6 +63,16 @@ L0:
token.value += character;
token.raw += character;
goto L1;
} else if(in.unget(), in >> "RIGHT_RG") {
token.type = TokenType::RIGHT_RG;
token.value = "RIGHT_RG";
token.raw = "RIGHT_RG";
return token;
} else if(in.clear(), in >> "CNF") {
token.type = TokenType::CNF;
token.value = "CNF";
token.raw = "CNF";
return token;
} else {
in.putback(character);
putback(in, std::move(token));
......
......@@ -24,8 +24,10 @@ public:
SEPARATOR,
EPSILON,
MAPS_TO,
RIGHT_RG,
CNF,
TEOF,
ERROR
ERROR,
};
 
struct Token {
......
......@@ -9,16 +9,30 @@
 
#include "../exception/AlibException.h"
 
#include "Regular/RightRG.h"
#include "ContextFree/CNF.h"
#include "../StringApi.hpp"
 
namespace grammar {
 
Grammar GrammarFromStringParser::parseGrammar(std::istream& input) const {
return parseGrammar(input, std::set<FEATURES>({}));
return parseGrammar(input, std::set<FEATURES>({FEATURES::CNF, FEATURES::RIGHT_RG}));
}
 
Grammar GrammarFromStringParser::parseGrammar(std::istream& input, const std::set<FEATURES>& features) const {
throw exception::AlibException();
GrammarFromStringLexer::Token token = m_GrammarLexer.next(input);
if(token.type == GrammarFromStringLexer::TokenType::CNF) {
if(!features.count(FEATURES::CNF)) throw exception::AlibException("Disabled formalism CNF");
m_GrammarLexer.putback(input, token);
return Grammar(parseCNF(input));
} else if(token.type == GrammarFromStringLexer::TokenType::RIGHT_RG) {
if(!features.count(FEATURES::RIGHT_RG)) throw exception::AlibException("Disabled Formalism RightRG");
m_GrammarLexer.putback(input, token);
return Grammar(parseRightRG(input));
} else {
throw exception::AlibException("Formalism not recognised (token = \"" + token.value + "\")");
}
}
 
std::set<alphabet::Symbol> GrammarFromStringParser::parseSet(std::istream& input) const {
......@@ -30,17 +44,19 @@ std::set<alphabet::Symbol> GrammarFromStringParser::parseSet(std::istream& input
}
 
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::SET_END) while(true) {
if(token.type != GrammarFromStringLexer::TokenType::SET_END) {
m_GrammarLexer.putback(input, token);
alphabet::Symbol symbol = alib::stringApi<alphabet::Symbol>::parse(input);
res.insert(symbol);
while(true) {
alphabet::Symbol symbol = alib::stringApi<alphabet::Symbol>::parse(input);
res.insert(symbol);
 
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::SET_END) {
break;
}
if(token.type != GrammarFromStringLexer::TokenType::COMMA) {
throw exception::AlibException("Expected SET_END or COMMA token");
token = m_GrammarLexer.next(input);
if(token.type == GrammarFromStringLexer::TokenType::SET_END) {
break;
}
if(token.type != GrammarFromStringLexer::TokenType::COMMA) {
throw exception::AlibException("Expected SET_END or COMMA token");
}
}
}
 
......@@ -51,4 +67,112 @@ std::set<alphabet::Symbol> GrammarFromStringParser::parseSet(std::istream& input
return res;
}
 
std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> GrammarFromStringParser::parseCFLikeRules(std::istream& input) const {
std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> result;
GrammarFromStringLexer::Token token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::SET_BEGIN) {
throw exception::AlibException();
}
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::SET_END) {
m_GrammarLexer.putback(input, token);
while(true) {
alphabet::Symbol lhs = alib::stringApi<alphabet::Symbol>::parse(input);
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::MAPS_TO) {
throw exception::AlibException();
}
while(true) {
std::vector<alphabet::Symbol> rhs;
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::COMMA && token.type != GrammarFromStringLexer::TokenType::SET_END && token.type != GrammarFromStringLexer::TokenType::SEPARATOR) while(true) {
m_GrammarLexer.putback(input, token);
rhs.push_back(alib::stringApi<alphabet::Symbol>::parse(input));
token = m_GrammarLexer.next(input);
if(token.type == GrammarFromStringLexer::TokenType::SEPARATOR || token.type == GrammarFromStringLexer::TokenType::COMMA || token.type == GrammarFromStringLexer::TokenType::SET_END) {
break;
}
}
result[lhs].insert(rhs);
if(token.type == GrammarFromStringLexer::TokenType::COMMA || token.type == GrammarFromStringLexer::TokenType::SET_END) {
break;
}
if(token.type != GrammarFromStringLexer::TokenType::SEPARATOR) {
throw exception::AlibException("Expected SEPARATOR, SETEND or COMMA token");
}
}
if(token.type == GrammarFromStringLexer::TokenType::SET_END) {
break;
}
if(token.type != GrammarFromStringLexer::TokenType::COMMA) {
throw exception::AlibException("Expected SET_END or COMMA token");
}
}
}
if(token.type != GrammarFromStringLexer::TokenType::SET_END) {
throw exception::AlibException("Expected SET_END token");
}
return result;
}
RightRG GrammarFromStringParser::parseRightRG(std::istream& input) const {
throw exception::AlibException("Unimplemented");
}
CNF GrammarFromStringParser::parseCNF(std::istream& input) const {
GrammarFromStringLexer::Token token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::CNF) {
throw exception::AlibException("Unrecognised CNF token.");
}
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::TUPLE_BEGIN) {
throw exception::AlibException("Unrecognised Tuple begin token.");
}
std::set<alphabet::Symbol> nonterminals = parseSet(input);
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::COMMA) {
throw exception::AlibException("Unrecognised Comma token.");
}
std::set<alphabet::Symbol> terminals = parseSet(input);
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::COMMA) {
throw exception::AlibException("Unrecognised Comma token.");
}
std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> rules = parseCFLikeRules(input);
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::COMMA) {
throw exception::AlibException("Unrecognised Comma token.");
}
alphabet::Symbol initialSymbol = alib::stringApi<alphabet::Symbol>::parse(input);
token = m_GrammarLexer.next(input);
if(token.type != GrammarFromStringLexer::TokenType::TUPLE_END) {
throw exception::AlibException("Unrecognised Tuple end token.");
}
CNF cnf(nonterminals, terminals, initialSymbol);
for(const auto& rule : rules) {
for(const auto& ruleRHS : rule.second) {
cnf.addRawRule(rule.first, ruleRHS);
}
}
return cnf;
}
} /* namespace grammar */
......@@ -12,6 +12,8 @@
#include "Grammar.h"
#include "GrammarFeatures.h"
#include "../alphabet/Symbol.h"
#include <set>
#include <vector>
 
namespace alib {
 
......@@ -26,10 +28,14 @@ class GrammarFromStringParser {
GrammarFromStringLexer m_GrammarLexer;
 
std::set<alphabet::Symbol> parseSet(std::istream& input) const;
std::map<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>> parseCFLikeRules(std::istream& input) const;
 
Grammar parseGrammar(std::istream& input) const;
Grammar parseGrammar(std::istream& input, const std::set<FEATURES>& features) const;
 
RightRG parseRightRG(std::istream& input) const;
CNF parseCNF(std::istream& input) const;
template<typename T> friend class alib::stringApi;
};
 
......
......@@ -41,7 +41,7 @@ void GrammarToStringComposer::compose(std::ostream& output, const EpsilonFreeCFG
void GrammarToStringComposer::compose(std::ostream& output, const CNF& grammar) const {
bool first;
 
output << "(CNF," << std::endl;
output << "CNF (" << std::endl;
 
output << "{";
first = false;
......@@ -64,29 +64,28 @@ void GrammarToStringComposer::compose(std::ostream& output, const CNF& grammar)
}
output << "}," << std::endl;
output << "{ ";
first = false;
first = true;
for(const auto& rule : grammar.getRawRules() ) {
if(first)
output << ", " << std::endl << " ";
first = false;
else
first = true;
output << "," << std::endl << " ";
alib::stringApi<alphabet::Symbol>::compose(output, rule.first);
output << " -> ";
bool innerFirst = false;
output << " ->";
bool innerFirst = true;
for(const auto& rhs : rule.second) {
if(innerFirst)
output << " | ";
innerFirst = false;
else
innerFirst = true;
output << " |";
for(const auto& symbol : rhs) {
alib::stringApi<alphabet::Symbol>::compose(output, symbol);
output << " ";
alib::stringApi<alphabet::Symbol>::compose(output, symbol);
}
}
}
output << "}," << std::endl;
alib::stringApi<alphabet::Symbol>::compose(output, grammar.getInitialSymbol());
output << std::endl;
output << ")" << std::endl;
}
 
......
......@@ -13,6 +13,9 @@
#include "../../alphabet/Symbol.h"
#include <climits>
#include <algorithm>
#include <iostream>
#include "../../XmlApi.hpp"
 
namespace grammar {
 
......@@ -38,7 +41,7 @@ void TerminalNonterminalAlphabetInitialSymbol::setTerminalAlphabet(const std::se
 
std::set<alphabet::Symbol> added;
std::set_difference(alphabet.begin(), alphabet.end(), terminalAlphabet.begin(), terminalAlphabet.end(), std::inserter(added, added.end()));
for(const alphabet::Symbol& removedSymbol : removed) {
removeTerminalSymbol(removedSymbol);
}
......@@ -66,7 +69,7 @@ void TerminalNonterminalAlphabetInitialSymbol::setNonterminalAlphabet(const std:
 
std::set<alphabet::Symbol> added;
std::set_difference(alphabet.begin(), alphabet.end(), nonterminalAlphabet.begin(), nonterminalAlphabet.end(), std::inserter(added, added.end()));
for(const alphabet::Symbol& removedSymbol : removed) {
removeNonterminalSymbol(removedSymbol);
}
......
......@@ -7,6 +7,7 @@
#include "grammar/Unrestricted/UnrestrictedGrammar.h"
 
#include "factory/XmlDataFactory.hpp"
#include "factory/StringDataFactory.hpp"
 
#include "alphabet/LabeledSymbol.h"
 
......@@ -20,6 +21,28 @@ void GrammarTest::setUp() {
void GrammarTest::tearDown() {
}
 
void GrammarTest::stringParserTest() {
{
std::string input = "CNF (\n"
"{A, B, S},\n"
"{a, b},\n"
"{ A -> A A | a,\n"
" B -> B B | b,\n"
" S -> | B S | S A},\n"
"S)\n";
grammar::Grammar grammar = alib::StringDataFactory::fromString<grammar::Grammar>(input);
std::string output = alib::StringDataFactory::toString(grammar);
std::cout << "\"" << input << "\"" << std::endl << std::endl << "\"" << output << "\"" << std::endl;
CPPUNIT_ASSERT( input == output );
grammar::Grammar grammar2 = alib::StringDataFactory::fromString<grammar::Grammar>(output);
CPPUNIT_ASSERT( grammar == grammar2 );
}
}
void GrammarTest::testUnrestrictedParser() {
grammar::UnrestrictedGrammar grammar(alphabet::symbolFrom(1));
 
......
......@@ -6,6 +6,7 @@
class GrammarTest : public CppUnit::TestFixture
{
CPPUNIT_TEST_SUITE( GrammarTest );
CPPUNIT_TEST( stringParserTest );
CPPUNIT_TEST( testUnrestrictedParser );
CPPUNIT_TEST( testContextSensitiveParser );
CPPUNIT_TEST( testContextFreeParser );
......@@ -16,6 +17,7 @@ public:
void setUp();
void tearDown();
 
void stringParserTest();
void testUnrestrictedParser();
void testContextSensitiveParser();
void testContextFreeParser();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment