From b43525d4ad9855e8d4c7561648b73ea71ecb94b4 Mon Sep 17 00:00:00 2001
From: Martin Zak <zakmart1@fit.cvut.cz>
Date: Sat, 23 Nov 2013 19:48:12 +0100
Subject: [PATCH] RegExp implementation

---
 acat/src/acat.cpp                 |   6 ++
 alib/src/regexp/Alternation.cpp   |  24 +++++++
 alib/src/regexp/Alternation.h     |  29 ++++++++
 alib/src/regexp/Iteration.cpp     |  22 +++++++
 alib/src/regexp/Iteration.h       |  27 ++++++++
 alib/src/regexp/RegExp.cpp        |  34 ++++++++++
 alib/src/regexp/RegExp.h          |  34 ++++++++++
 alib/src/regexp/RegExpElement.cpp |  15 +++++
 alib/src/regexp/RegExpElement.h   |  22 +++++++
 alib/src/regexp/RegExpParser.cpp  | 106 ++++++++++++++++++++++++++++++
 alib/src/regexp/RegExpParser.h    |  34 ++++++++++
 alib/src/regexp/RegExpPrinter.cpp |  71 ++++++++++++++++++++
 alib/src/regexp/RegExpPrinter.h   |  34 ++++++++++
 alib/src/regexp/RegExpSymbol.cpp  |  20 ++++++
 alib/src/regexp/RegExpSymbol.h    |  28 ++++++++
 examples/regexp/regexp.xml        |  17 +++++
 examples/regexp/regexp2.xml       |  27 ++++++++
 examples/regexp/regexp3.xml       |  27 ++++++++
 18 files changed, 577 insertions(+)
 create mode 100644 alib/src/regexp/Alternation.cpp
 create mode 100644 alib/src/regexp/Alternation.h
 create mode 100644 alib/src/regexp/Iteration.cpp
 create mode 100644 alib/src/regexp/Iteration.h
 create mode 100644 alib/src/regexp/RegExp.cpp
 create mode 100644 alib/src/regexp/RegExp.h
 create mode 100644 alib/src/regexp/RegExpElement.cpp
 create mode 100644 alib/src/regexp/RegExpElement.h
 create mode 100644 alib/src/regexp/RegExpParser.cpp
 create mode 100644 alib/src/regexp/RegExpParser.h
 create mode 100644 alib/src/regexp/RegExpPrinter.cpp
 create mode 100644 alib/src/regexp/RegExpPrinter.h
 create mode 100644 alib/src/regexp/RegExpSymbol.cpp
 create mode 100644 alib/src/regexp/RegExpSymbol.h
 create mode 100644 examples/regexp/regexp.xml
 create mode 100644 examples/regexp/regexp2.xml
 create mode 100644 examples/regexp/regexp3.xml

diff --git a/acat/src/acat.cpp b/acat/src/acat.cpp
index 5f7ab254b8..cafee32369 100644
--- a/acat/src/acat.cpp
+++ b/acat/src/acat.cpp
@@ -11,6 +11,8 @@
 #include "GrammarFactory.h"
 #include "AlibException.h"
 
+#include "regexp/RegExpParser.h"
+
 #include "automaton/AutomatonParser.h"
 #include "grammar/GrammarParser.h"
 #include "grammar/RightRegularGrammar.h"
@@ -23,6 +25,7 @@
 
 using namespace std;
 using namespace automaton;
+using namespace regexp;
 using namespace alib;
 using namespace sax;
 using namespace grammar;
@@ -49,6 +52,9 @@ int main(int argc, char** argv) {
 		} else if (tokens.front().getData() == "grammar") {
 			UnknownGrammar grammar = GrammarParser::parse(tokens);
 			grammar.toXML(cout);
+		} else if (tokens.front().getData() == "regexp") {
+			RegExp regexp = RegExpParser::parse(tokens);
+			regexp.toXML(cout);
 		} else {
 			throw AlibException(
 					"Expected root tag automaton or grammar. Read: "
diff --git a/alib/src/regexp/Alternation.cpp b/alib/src/regexp/Alternation.cpp
new file mode 100644
index 0000000000..23e98e68b9
--- /dev/null
+++ b/alib/src/regexp/Alternation.cpp
@@ -0,0 +1,24 @@
+/*
+ * Alternation.cpp
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#include "Alternation.h"
+
+namespace regexp {
+
+Alternation::~Alternation() {
+}
+
+list<RegExpElement*>& Alternation::getFirst() {
+	return first;
+}
+
+list<RegExpElement*>& Alternation::getSecond() {
+	return second;
+}
+
+} /* namespace regexp */
+
diff --git a/alib/src/regexp/Alternation.h b/alib/src/regexp/Alternation.h
new file mode 100644
index 0000000000..6e1cdf5ed4
--- /dev/null
+++ b/alib/src/regexp/Alternation.h
@@ -0,0 +1,29 @@
+/*
+ * Alternation.h
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#ifndef ALTERNATION_H_
+#define ALTERNATION_H_
+
+#include <list>
+#include "RegExpElement.h"
+
+namespace regexp {
+
+using namespace std;
+
+class Alternation: public RegExpElement {
+private:
+	list<RegExpElement*> first;
+	list<RegExpElement*> second;
+public:
+	~Alternation();
+	list<RegExpElement*>& getFirst();
+	list<RegExpElement*>& getSecond();
+};
+
+} /* namespace regexp */
+#endif /* ALTERNATION_H_ */
diff --git a/alib/src/regexp/Iteration.cpp b/alib/src/regexp/Iteration.cpp
new file mode 100644
index 0000000000..6e95c17edf
--- /dev/null
+++ b/alib/src/regexp/Iteration.cpp
@@ -0,0 +1,22 @@
+/*
+ * Iteration.cpp
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#include "Iteration.h"
+
+namespace regexp {
+regexp::Iteration::~Iteration() {
+	for (auto element : elements) {
+		delete element;
+	}
+}
+
+list<RegExpElement*>& regexp::Iteration::getElements() {
+	return elements;
+}
+
+} /* namespace regexp */
+
diff --git a/alib/src/regexp/Iteration.h b/alib/src/regexp/Iteration.h
new file mode 100644
index 0000000000..0bec4d49f5
--- /dev/null
+++ b/alib/src/regexp/Iteration.h
@@ -0,0 +1,27 @@
+/*
+ * Iteration.h
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#ifndef ITERATION_H_
+#define ITERATION_H_
+
+#include <list>
+#include "RegExpElement.h"
+
+namespace regexp {
+
+using namespace std;
+
+class Iteration : public RegExpElement {
+private:
+	list<RegExpElement*> elements;
+public:
+	~Iteration();
+	list<RegExpElement*>& getElements();
+};
+
+} /* namespace regexp */
+#endif /* ITERATION_H_ */
diff --git a/alib/src/regexp/RegExp.cpp b/alib/src/regexp/RegExp.cpp
new file mode 100644
index 0000000000..0151ffd4f3
--- /dev/null
+++ b/alib/src/regexp/RegExp.cpp
@@ -0,0 +1,34 @@
+/*
+ * RegExp.cpp
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#include "RegExp.h"
+#include "RegExpPrinter.h"
+
+namespace regexp {
+
+RegExp::RegExp() {
+}
+
+RegExp::~RegExp() {
+	for(auto element : regexp) {
+		delete element;
+	}
+}
+
+RegExp::RegExp(const list<RegExpElement*>& regexp) :
+		regexp(regexp) {
+}
+
+list<RegExpElement*>& RegExp::getRegExp() {
+	return regexp;
+}
+
+void RegExp::toXML(ostream& out) {
+	RegExpPrinter::toXML(*this, out);
+}
+
+} /* namespace regexp */
diff --git a/alib/src/regexp/RegExp.h b/alib/src/regexp/RegExp.h
new file mode 100644
index 0000000000..35a2f77235
--- /dev/null
+++ b/alib/src/regexp/RegExp.h
@@ -0,0 +1,34 @@
+/*
+ * RegExp.h
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#ifndef REGEXP_H_
+#define REGEXP_H_
+
+#include <vector>
+#include <list>
+#include <string>
+#include "RegExpElement.h"
+
+namespace regexp {
+
+using namespace std;
+
+class RegExp {
+private:
+	list<RegExpElement*> regexp;
+
+public:
+	RegExp();
+	~RegExp();
+	RegExp(const list<RegExpElement*>& regexp) ;
+	list<RegExpElement*>& getRegExp();
+
+	void toXML(ostream& out);
+};
+
+} /* namespace regexp */
+#endif /* REGEXP_H_ */
diff --git a/alib/src/regexp/RegExpElement.cpp b/alib/src/regexp/RegExpElement.cpp
new file mode 100644
index 0000000000..eacf72d353
--- /dev/null
+++ b/alib/src/regexp/RegExpElement.cpp
@@ -0,0 +1,15 @@
+/*
+ * RegExpElement.cpp
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#include "RegExpElement.h"
+
+namespace regexp {
+
+} /* namespace regexp */
+
+regexp::RegExpElement::~RegExpElement() {
+}
diff --git a/alib/src/regexp/RegExpElement.h b/alib/src/regexp/RegExpElement.h
new file mode 100644
index 0000000000..b2ed1df406
--- /dev/null
+++ b/alib/src/regexp/RegExpElement.h
@@ -0,0 +1,22 @@
+/*
+ * RegExpElement.h
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#ifndef REGEXPELEMENT_H_
+#define REGEXPELEMENT_H_
+
+namespace regexp {
+
+using namespace std;
+
+class RegExpElement {
+public:
+	virtual ~RegExpElement();
+
+};
+
+} /* namespace regexp */
+#endif /* REGEXPELEMENT_H_ */
diff --git a/alib/src/regexp/RegExpParser.cpp b/alib/src/regexp/RegExpParser.cpp
new file mode 100644
index 0000000000..01e380cff2
--- /dev/null
+++ b/alib/src/regexp/RegExpParser.cpp
@@ -0,0 +1,106 @@
+/*
+ * RegExpParser.cpp
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: Martin Zak
+ */
+
+#include "RegExpParser.h"
+#include "../sax/ParserException.h"
+
+namespace regexp {
+
+RegExp RegExpParser::parse(list<Token>& input) {
+	list<RegExpElement*> elements;
+
+	try {
+		popToken(input, Token::START_ELEMENT, "regexp");
+
+		parseContent(input, elements);
+
+		popToken(input, Token::END_ELEMENT, "regexp");
+		return RegExp(elements);
+	} catch (ParserException& e) {
+		for (auto element : elements) {
+			delete element;
+		}
+		throw e;
+	}
+}
+
+Iteration* RegExpParser::parseIteration(list<Token>& input) {
+	popToken(input, Token::START_ELEMENT, "iteration");
+
+	Iteration* iteration = new Iteration();
+	parseContent(input,iteration->getElements());
+
+	popToken(input, Token::END_ELEMENT, "iteration");
+	return iteration;
+}
+
+Alternation* RegExpParser::parseAlternation(list<Token>& input) {
+	popToken(input, Token::START_ELEMENT, "alternation");
+
+	Alternation* alternation = new Alternation;
+	popToken(input, Token::START_ELEMENT, "first");
+	parseContent(input, alternation->getFirst());
+	popToken(input, Token::END_ELEMENT, "first");
+
+	popToken(input, Token::START_ELEMENT, "second");
+	parseContent(input, alternation->getSecond());
+	popToken(input, Token::END_ELEMENT, "second");
+
+	popToken(input, Token::END_ELEMENT, "alternation");
+	return alternation;
+}
+
+void RegExpParser::parseContent(list<Token>& input, list<RegExpElement*>& elements) {
+	while (true) {
+		if (isToken(input, Token::START_ELEMENT, "symbol")) {
+			elements.push_back(parseSymbol(input));
+		} else if (isToken(input, Token::START_ELEMENT, "iteration")) {
+			elements.push_back(parseIteration(input));
+		} else if (isToken(input, Token::START_ELEMENT, "alternation")) {
+			elements.push_back(parseAlternation(input));
+		} else {
+			return;
+		}
+	}
+
+}
+
+RegExpSymbol* RegExpParser::parseSymbol(list<Token>& input, string tagName) {
+	popToken(input, Token::START_ELEMENT, tagName);
+
+	if (input.front().getType() == Token::CHARACTER) {
+		RegExpSymbol* symbol = new RegExpSymbol(input.front().getData());
+		input.pop_front();
+		popToken(input, Token::END_ELEMENT, tagName);
+		return symbol;
+	} else if (isToken(input, Token::END_ELEMENT, tagName)) {
+		input.pop_front();
+		return new RegExpSymbol("");
+	} else if (isToken(input, Token::START_ELEMENT, "eps")) {
+		input.pop_front();
+		popToken(input, Token::END_ELEMENT, "eps");
+		popToken(input, Token::END_ELEMENT, tagName);
+		return new RegExpSymbol("");
+	} else {
+		throw ParserException(Token("", Token::CHARACTER), input.front());
+	}
+}
+
+bool RegExpParser::isToken(list<Token>& input, Token::TokenType type, string data) {
+	return input.front().getType() == type && input.front().getData() == data;
+}
+
+void RegExpParser::popToken(list<Token>& input, Token::TokenType type, string data) {
+	if (isToken(input, type, data)) {
+		input.pop_front();
+	} else {
+		throw ParserException(Token(data, type), input.front());
+	}
+}
+
+} /* namespace regexp */
+
diff --git a/alib/src/regexp/RegExpParser.h b/alib/src/regexp/RegExpParser.h
new file mode 100644
index 0000000000..3181ca47e9
--- /dev/null
+++ b/alib/src/regexp/RegExpParser.h
@@ -0,0 +1,34 @@
+/*
+ * RegExpParser.h
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#ifndef REGEXPPARSER_H_
+#define REGEXPPARSER_H_
+
+#include "RegExp.h"
+#include "../sax/Token.h"
+#include "RegExpSymbol.h"
+#include "Iteration.h"
+#include "Alternation.h"
+
+namespace regexp {
+
+using namespace sax;
+
+class RegExpParser {
+public:
+	static RegExp parse(list<Token>& input);
+protected:
+	static void parseContent(list<Token>& input, list<RegExpElement*>& elements);
+	static RegExpSymbol* parseSymbol(list<Token> &input, string tagName="symbol");
+	static Iteration* parseIteration(list<Token> &input);
+	static Alternation* parseAlternation(list<Token> &input);
+	static bool isToken(list<Token> &input, Token::TokenType type, string data);
+	static void popToken(list<Token> &input, Token::TokenType type, string data);
+};
+
+} /* namespace regexp */
+#endif /* REGEXPPARSER_H_ */
diff --git a/alib/src/regexp/RegExpPrinter.cpp b/alib/src/regexp/RegExpPrinter.cpp
new file mode 100644
index 0000000000..962df86e67
--- /dev/null
+++ b/alib/src/regexp/RegExpPrinter.cpp
@@ -0,0 +1,71 @@
+/*
+ * RegExpPrinter.cpp
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#include "RegExpPrinter.h"
+
+namespace regexp {
+
+const string RegExpPrinter::INDENTATION = "\t";
+
+void RegExpPrinter::toXML(RegExp& regexp, ostream& out) {
+	out << "<regexp>\n";
+	printContent(regexp.getRegExp(), out, INDENTATION);
+	out << "</regexp>\n";
+}
+
+void RegExpPrinter::printContent(list<RegExpElement*>& content, ostream& out, string prefix) {
+	for (auto element : content) {
+		Alternation* alternation = dynamic_cast<Alternation*>(element);
+		if (alternation) {
+			printAlternation(alternation, out, prefix);
+			continue;
+		}
+
+		Iteration* iteration = dynamic_cast<Iteration*>(element);
+		if (iteration) {
+			printIteration(iteration, out, prefix);
+			continue;
+		}
+
+		RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>(element);
+		if (symbol) {
+			printSymbol(symbol, out, prefix);
+			continue;
+		}
+
+	}
+}
+
+void RegExpPrinter::printAlternation(Alternation* alternation, ostream& out, string prefix) {
+	string doubleIndentation = prefix + INDENTATION + INDENTATION;
+	out << prefix << "<alternation>\n";
+
+	out << prefix << INDENTATION << "<first>\n";
+	printContent(alternation->getFirst(), out, doubleIndentation);
+	out << prefix << INDENTATION << "</first>\n";
+
+	out << prefix << INDENTATION << "<second>\n";
+	printContent(alternation->getSecond(), out, doubleIndentation);
+	out << prefix << INDENTATION << "</second>\n";
+
+	out << prefix << "</alternation>\n";
+}
+
+void RegExpPrinter::printIteration(Iteration* iteration, ostream& out, string prefix) {
+	out << prefix << "<iteration>\n";
+	printContent(iteration->getElements(), out, prefix + INDENTATION);
+	out << prefix << "</iteration>\n";
+}
+
+void RegExpPrinter::printSymbol(RegExpSymbol* symbol, ostream& out, string prefix) {
+	out << prefix << "<symbol>";
+	out << symbol->getSymbol();
+	out << "</symbol>\n";
+}
+
+} /* namespace regexp */
+
diff --git a/alib/src/regexp/RegExpPrinter.h b/alib/src/regexp/RegExpPrinter.h
new file mode 100644
index 0000000000..e1e3998175
--- /dev/null
+++ b/alib/src/regexp/RegExpPrinter.h
@@ -0,0 +1,34 @@
+/*
+ * RegExpPrinter.h
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#ifndef REGEXPPRINTER_H_
+#define REGEXPPRINTER_H_
+
+#include <ostream>
+#include "RegExp.h"
+#include "Alternation.h"
+#include "Iteration.h"
+#include "RegExpSymbol.h"
+
+namespace regexp {
+
+using namespace std;
+
+class RegExpPrinter {
+protected:
+	static const string INDENTATION;
+	static void printContent(list<RegExpElement*>& content, ostream& out, string prefix);
+	static void printAlternation(Alternation* alternation, ostream& out, string prefix);
+	static void printIteration(Iteration* iteration, ostream& out, string prefix);
+	static void printSymbol(RegExpSymbol* symbol, ostream& out, string prefix);
+
+public:
+	static void toXML(RegExp& regexp, ostream& out);
+};
+
+} /* namespace regexp */
+#endif /* REGEXPPRINTER_H_ */
diff --git a/alib/src/regexp/RegExpSymbol.cpp b/alib/src/regexp/RegExpSymbol.cpp
new file mode 100644
index 0000000000..85c6f159dd
--- /dev/null
+++ b/alib/src/regexp/RegExpSymbol.cpp
@@ -0,0 +1,20 @@
+/*
+ * RegExpSymbol.cpp
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#include "RegExpSymbol.h"
+
+namespace regexp {
+
+} /* namespace regexp */
+
+regexp::RegExpSymbol::RegExpSymbol() :
+		Symbol("") {
+}
+
+regexp::RegExpSymbol::RegExpSymbol(const string& symbol) :
+		Symbol(symbol) {
+}
diff --git a/alib/src/regexp/RegExpSymbol.h b/alib/src/regexp/RegExpSymbol.h
new file mode 100644
index 0000000000..c2bac3c251
--- /dev/null
+++ b/alib/src/regexp/RegExpSymbol.h
@@ -0,0 +1,28 @@
+/*
+ * RegExpSymbol.h
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: martin
+ */
+
+#ifndef REGEXPSYMBOL_H_
+#define REGEXPSYMBOL_H_
+
+#include <string>
+#include "RegExpElement.h"
+#include "../alphabet/Symbol.h"
+
+namespace regexp {
+
+using namespace std;
+using namespace alphabet;
+
+class RegExpSymbol: public RegExpElement, public Symbol {
+public:
+	RegExpSymbol();
+	RegExpSymbol(const string& symbol);
+
+};
+
+} /* namespace regexp */
+#endif /* REGEXPSYMBOL_H_ */
diff --git a/examples/regexp/regexp.xml b/examples/regexp/regexp.xml
new file mode 100644
index 0000000000..f1d847322a
--- /dev/null
+++ b/examples/regexp/regexp.xml
@@ -0,0 +1,17 @@
+<regexp>
+	<symbol>0</symbol>
+	<symbol>1</symbol>
+	<iteration>
+		<symbol>0</symbol>
+		<symbol>0</symbol>
+	</iteration>
+
+	<alternation>
+		<first>
+			<symbol>11</symbol>
+		</first>
+		<second>
+			<symbol>10</symbol>
+		</second>
+	</alternation>
+</regexp>
diff --git a/examples/regexp/regexp2.xml b/examples/regexp/regexp2.xml
new file mode 100644
index 0000000000..4da5d19420
--- /dev/null
+++ b/examples/regexp/regexp2.xml
@@ -0,0 +1,27 @@
+<regexp>
+	<symbol>0</symbol>
+	<symbol>1</symbol>
+	<iteration>
+		<alternation>
+			<first>
+				<symbol>0</symbol>
+			</first>
+			<second>
+				<symbol>1</symbol>
+			</second>
+		</alternation>
+	</iteration>
+
+	<alternation>
+		<first>
+			<iteration>
+				<symbol>11</symbol>
+			</iteration>
+		</first>
+		<second>
+			<iteration>
+				<symbol>10</symbol>
+			</iteration>
+		</second>
+	</alternation>
+</regexp>
diff --git a/examples/regexp/regexp3.xml b/examples/regexp/regexp3.xml
new file mode 100644
index 0000000000..7359cb4989
--- /dev/null
+++ b/examples/regexp/regexp3.xml
@@ -0,0 +1,27 @@
+<regexp>
+	<iteration>
+		<iteration>
+			<iteration>
+				<iteration>
+					<iteration>
+						<iteration>
+							<iteration>
+								<iteration>
+									<iteration>
+										<iteration>
+											<symbol>We</symbol>
+											<symbol>have</symbol>
+											<symbol>to</symbol>
+											<symbol>go</symbol>
+											<symbol>deeper</symbol>
+										</iteration>
+									</iteration>
+								</iteration>
+							</iteration>
+						</iteration>
+					</iteration>
+				</iteration>
+			</iteration>
+		</iteration>
+	</iteration>
+</regexp>
-- 
GitLab