From b43525d4ad9855e8d4c7561648b73ea71ecb94b4 Mon Sep 17 00:00:00 2001 From: Martin Zak <zakmart1@fit.cvut.cz> Date: Sat, 23 Nov 2013 19:48:12 +0100 Subject: [PATCH] RegExp implementation --- acat/src/acat.cpp | 6 ++ alib/src/regexp/Alternation.cpp | 24 +++++++ alib/src/regexp/Alternation.h | 29 ++++++++ alib/src/regexp/Iteration.cpp | 22 +++++++ alib/src/regexp/Iteration.h | 27 ++++++++ alib/src/regexp/RegExp.cpp | 34 ++++++++++ alib/src/regexp/RegExp.h | 34 ++++++++++ alib/src/regexp/RegExpElement.cpp | 15 +++++ alib/src/regexp/RegExpElement.h | 22 +++++++ alib/src/regexp/RegExpParser.cpp | 106 ++++++++++++++++++++++++++++++ alib/src/regexp/RegExpParser.h | 34 ++++++++++ alib/src/regexp/RegExpPrinter.cpp | 71 ++++++++++++++++++++ alib/src/regexp/RegExpPrinter.h | 34 ++++++++++ alib/src/regexp/RegExpSymbol.cpp | 20 ++++++ alib/src/regexp/RegExpSymbol.h | 28 ++++++++ examples/regexp/regexp.xml | 17 +++++ examples/regexp/regexp2.xml | 27 ++++++++ examples/regexp/regexp3.xml | 27 ++++++++ 18 files changed, 577 insertions(+) create mode 100644 alib/src/regexp/Alternation.cpp create mode 100644 alib/src/regexp/Alternation.h create mode 100644 alib/src/regexp/Iteration.cpp create mode 100644 alib/src/regexp/Iteration.h create mode 100644 alib/src/regexp/RegExp.cpp create mode 100644 alib/src/regexp/RegExp.h create mode 100644 alib/src/regexp/RegExpElement.cpp create mode 100644 alib/src/regexp/RegExpElement.h create mode 100644 alib/src/regexp/RegExpParser.cpp create mode 100644 alib/src/regexp/RegExpParser.h create mode 100644 alib/src/regexp/RegExpPrinter.cpp create mode 100644 alib/src/regexp/RegExpPrinter.h create mode 100644 alib/src/regexp/RegExpSymbol.cpp create mode 100644 alib/src/regexp/RegExpSymbol.h create mode 100644 examples/regexp/regexp.xml create mode 100644 examples/regexp/regexp2.xml create mode 100644 examples/regexp/regexp3.xml diff --git a/acat/src/acat.cpp b/acat/src/acat.cpp index 5f7ab254b8..cafee32369 100644 --- a/acat/src/acat.cpp +++ b/acat/src/acat.cpp @@ -11,6 +11,8 @@ #include "GrammarFactory.h" #include "AlibException.h" +#include "regexp/RegExpParser.h" + #include "automaton/AutomatonParser.h" #include "grammar/GrammarParser.h" #include "grammar/RightRegularGrammar.h" @@ -23,6 +25,7 @@ using namespace std; using namespace automaton; +using namespace regexp; using namespace alib; using namespace sax; using namespace grammar; @@ -49,6 +52,9 @@ int main(int argc, char** argv) { } else if (tokens.front().getData() == "grammar") { UnknownGrammar grammar = GrammarParser::parse(tokens); grammar.toXML(cout); + } else if (tokens.front().getData() == "regexp") { + RegExp regexp = RegExpParser::parse(tokens); + regexp.toXML(cout); } else { throw AlibException( "Expected root tag automaton or grammar. Read: " diff --git a/alib/src/regexp/Alternation.cpp b/alib/src/regexp/Alternation.cpp new file mode 100644 index 0000000000..23e98e68b9 --- /dev/null +++ b/alib/src/regexp/Alternation.cpp @@ -0,0 +1,24 @@ +/* + * Alternation.cpp + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#include "Alternation.h" + +namespace regexp { + +Alternation::~Alternation() { +} + +list<RegExpElement*>& Alternation::getFirst() { + return first; +} + +list<RegExpElement*>& Alternation::getSecond() { + return second; +} + +} /* namespace regexp */ + diff --git a/alib/src/regexp/Alternation.h b/alib/src/regexp/Alternation.h new file mode 100644 index 0000000000..6e1cdf5ed4 --- /dev/null +++ b/alib/src/regexp/Alternation.h @@ -0,0 +1,29 @@ +/* + * Alternation.h + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#ifndef ALTERNATION_H_ +#define ALTERNATION_H_ + +#include <list> +#include "RegExpElement.h" + +namespace regexp { + +using namespace std; + +class Alternation: public RegExpElement { +private: + list<RegExpElement*> first; + list<RegExpElement*> second; +public: + ~Alternation(); + list<RegExpElement*>& getFirst(); + list<RegExpElement*>& getSecond(); +}; + +} /* namespace regexp */ +#endif /* ALTERNATION_H_ */ diff --git a/alib/src/regexp/Iteration.cpp b/alib/src/regexp/Iteration.cpp new file mode 100644 index 0000000000..6e95c17edf --- /dev/null +++ b/alib/src/regexp/Iteration.cpp @@ -0,0 +1,22 @@ +/* + * Iteration.cpp + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#include "Iteration.h" + +namespace regexp { +regexp::Iteration::~Iteration() { + for (auto element : elements) { + delete element; + } +} + +list<RegExpElement*>& regexp::Iteration::getElements() { + return elements; +} + +} /* namespace regexp */ + diff --git a/alib/src/regexp/Iteration.h b/alib/src/regexp/Iteration.h new file mode 100644 index 0000000000..0bec4d49f5 --- /dev/null +++ b/alib/src/regexp/Iteration.h @@ -0,0 +1,27 @@ +/* + * Iteration.h + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#ifndef ITERATION_H_ +#define ITERATION_H_ + +#include <list> +#include "RegExpElement.h" + +namespace regexp { + +using namespace std; + +class Iteration : public RegExpElement { +private: + list<RegExpElement*> elements; +public: + ~Iteration(); + list<RegExpElement*>& getElements(); +}; + +} /* namespace regexp */ +#endif /* ITERATION_H_ */ diff --git a/alib/src/regexp/RegExp.cpp b/alib/src/regexp/RegExp.cpp new file mode 100644 index 0000000000..0151ffd4f3 --- /dev/null +++ b/alib/src/regexp/RegExp.cpp @@ -0,0 +1,34 @@ +/* + * RegExp.cpp + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#include "RegExp.h" +#include "RegExpPrinter.h" + +namespace regexp { + +RegExp::RegExp() { +} + +RegExp::~RegExp() { + for(auto element : regexp) { + delete element; + } +} + +RegExp::RegExp(const list<RegExpElement*>& regexp) : + regexp(regexp) { +} + +list<RegExpElement*>& RegExp::getRegExp() { + return regexp; +} + +void RegExp::toXML(ostream& out) { + RegExpPrinter::toXML(*this, out); +} + +} /* namespace regexp */ diff --git a/alib/src/regexp/RegExp.h b/alib/src/regexp/RegExp.h new file mode 100644 index 0000000000..35a2f77235 --- /dev/null +++ b/alib/src/regexp/RegExp.h @@ -0,0 +1,34 @@ +/* + * RegExp.h + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#ifndef REGEXP_H_ +#define REGEXP_H_ + +#include <vector> +#include <list> +#include <string> +#include "RegExpElement.h" + +namespace regexp { + +using namespace std; + +class RegExp { +private: + list<RegExpElement*> regexp; + +public: + RegExp(); + ~RegExp(); + RegExp(const list<RegExpElement*>& regexp) ; + list<RegExpElement*>& getRegExp(); + + void toXML(ostream& out); +}; + +} /* namespace regexp */ +#endif /* REGEXP_H_ */ diff --git a/alib/src/regexp/RegExpElement.cpp b/alib/src/regexp/RegExpElement.cpp new file mode 100644 index 0000000000..eacf72d353 --- /dev/null +++ b/alib/src/regexp/RegExpElement.cpp @@ -0,0 +1,15 @@ +/* + * RegExpElement.cpp + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#include "RegExpElement.h" + +namespace regexp { + +} /* namespace regexp */ + +regexp::RegExpElement::~RegExpElement() { +} diff --git a/alib/src/regexp/RegExpElement.h b/alib/src/regexp/RegExpElement.h new file mode 100644 index 0000000000..b2ed1df406 --- /dev/null +++ b/alib/src/regexp/RegExpElement.h @@ -0,0 +1,22 @@ +/* + * RegExpElement.h + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#ifndef REGEXPELEMENT_H_ +#define REGEXPELEMENT_H_ + +namespace regexp { + +using namespace std; + +class RegExpElement { +public: + virtual ~RegExpElement(); + +}; + +} /* namespace regexp */ +#endif /* REGEXPELEMENT_H_ */ diff --git a/alib/src/regexp/RegExpParser.cpp b/alib/src/regexp/RegExpParser.cpp new file mode 100644 index 0000000000..01e380cff2 --- /dev/null +++ b/alib/src/regexp/RegExpParser.cpp @@ -0,0 +1,106 @@ +/* + * RegExpParser.cpp + * + * Created on: Nov 23, 2013 + * Author: Martin Zak + */ + +#include "RegExpParser.h" +#include "../sax/ParserException.h" + +namespace regexp { + +RegExp RegExpParser::parse(list<Token>& input) { + list<RegExpElement*> elements; + + try { + popToken(input, Token::START_ELEMENT, "regexp"); + + parseContent(input, elements); + + popToken(input, Token::END_ELEMENT, "regexp"); + return RegExp(elements); + } catch (ParserException& e) { + for (auto element : elements) { + delete element; + } + throw e; + } +} + +Iteration* RegExpParser::parseIteration(list<Token>& input) { + popToken(input, Token::START_ELEMENT, "iteration"); + + Iteration* iteration = new Iteration(); + parseContent(input,iteration->getElements()); + + popToken(input, Token::END_ELEMENT, "iteration"); + return iteration; +} + +Alternation* RegExpParser::parseAlternation(list<Token>& input) { + popToken(input, Token::START_ELEMENT, "alternation"); + + Alternation* alternation = new Alternation; + popToken(input, Token::START_ELEMENT, "first"); + parseContent(input, alternation->getFirst()); + popToken(input, Token::END_ELEMENT, "first"); + + popToken(input, Token::START_ELEMENT, "second"); + parseContent(input, alternation->getSecond()); + popToken(input, Token::END_ELEMENT, "second"); + + popToken(input, Token::END_ELEMENT, "alternation"); + return alternation; +} + +void RegExpParser::parseContent(list<Token>& input, list<RegExpElement*>& elements) { + while (true) { + if (isToken(input, Token::START_ELEMENT, "symbol")) { + elements.push_back(parseSymbol(input)); + } else if (isToken(input, Token::START_ELEMENT, "iteration")) { + elements.push_back(parseIteration(input)); + } else if (isToken(input, Token::START_ELEMENT, "alternation")) { + elements.push_back(parseAlternation(input)); + } else { + return; + } + } + +} + +RegExpSymbol* RegExpParser::parseSymbol(list<Token>& input, string tagName) { + popToken(input, Token::START_ELEMENT, tagName); + + if (input.front().getType() == Token::CHARACTER) { + RegExpSymbol* symbol = new RegExpSymbol(input.front().getData()); + input.pop_front(); + popToken(input, Token::END_ELEMENT, tagName); + return symbol; + } else if (isToken(input, Token::END_ELEMENT, tagName)) { + input.pop_front(); + return new RegExpSymbol(""); + } else if (isToken(input, Token::START_ELEMENT, "eps")) { + input.pop_front(); + popToken(input, Token::END_ELEMENT, "eps"); + popToken(input, Token::END_ELEMENT, tagName); + return new RegExpSymbol(""); + } else { + throw ParserException(Token("", Token::CHARACTER), input.front()); + } +} + +bool RegExpParser::isToken(list<Token>& input, Token::TokenType type, string data) { + return input.front().getType() == type && input.front().getData() == data; +} + +void RegExpParser::popToken(list<Token>& input, Token::TokenType type, string data) { + if (isToken(input, type, data)) { + input.pop_front(); + } else { + throw ParserException(Token(data, type), input.front()); + } +} + +} /* namespace regexp */ + diff --git a/alib/src/regexp/RegExpParser.h b/alib/src/regexp/RegExpParser.h new file mode 100644 index 0000000000..3181ca47e9 --- /dev/null +++ b/alib/src/regexp/RegExpParser.h @@ -0,0 +1,34 @@ +/* + * RegExpParser.h + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#ifndef REGEXPPARSER_H_ +#define REGEXPPARSER_H_ + +#include "RegExp.h" +#include "../sax/Token.h" +#include "RegExpSymbol.h" +#include "Iteration.h" +#include "Alternation.h" + +namespace regexp { + +using namespace sax; + +class RegExpParser { +public: + static RegExp parse(list<Token>& input); +protected: + static void parseContent(list<Token>& input, list<RegExpElement*>& elements); + static RegExpSymbol* parseSymbol(list<Token> &input, string tagName="symbol"); + static Iteration* parseIteration(list<Token> &input); + static Alternation* parseAlternation(list<Token> &input); + static bool isToken(list<Token> &input, Token::TokenType type, string data); + static void popToken(list<Token> &input, Token::TokenType type, string data); +}; + +} /* namespace regexp */ +#endif /* REGEXPPARSER_H_ */ diff --git a/alib/src/regexp/RegExpPrinter.cpp b/alib/src/regexp/RegExpPrinter.cpp new file mode 100644 index 0000000000..962df86e67 --- /dev/null +++ b/alib/src/regexp/RegExpPrinter.cpp @@ -0,0 +1,71 @@ +/* + * RegExpPrinter.cpp + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#include "RegExpPrinter.h" + +namespace regexp { + +const string RegExpPrinter::INDENTATION = "\t"; + +void RegExpPrinter::toXML(RegExp& regexp, ostream& out) { + out << "<regexp>\n"; + printContent(regexp.getRegExp(), out, INDENTATION); + out << "</regexp>\n"; +} + +void RegExpPrinter::printContent(list<RegExpElement*>& content, ostream& out, string prefix) { + for (auto element : content) { + Alternation* alternation = dynamic_cast<Alternation*>(element); + if (alternation) { + printAlternation(alternation, out, prefix); + continue; + } + + Iteration* iteration = dynamic_cast<Iteration*>(element); + if (iteration) { + printIteration(iteration, out, prefix); + continue; + } + + RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>(element); + if (symbol) { + printSymbol(symbol, out, prefix); + continue; + } + + } +} + +void RegExpPrinter::printAlternation(Alternation* alternation, ostream& out, string prefix) { + string doubleIndentation = prefix + INDENTATION + INDENTATION; + out << prefix << "<alternation>\n"; + + out << prefix << INDENTATION << "<first>\n"; + printContent(alternation->getFirst(), out, doubleIndentation); + out << prefix << INDENTATION << "</first>\n"; + + out << prefix << INDENTATION << "<second>\n"; + printContent(alternation->getSecond(), out, doubleIndentation); + out << prefix << INDENTATION << "</second>\n"; + + out << prefix << "</alternation>\n"; +} + +void RegExpPrinter::printIteration(Iteration* iteration, ostream& out, string prefix) { + out << prefix << "<iteration>\n"; + printContent(iteration->getElements(), out, prefix + INDENTATION); + out << prefix << "</iteration>\n"; +} + +void RegExpPrinter::printSymbol(RegExpSymbol* symbol, ostream& out, string prefix) { + out << prefix << "<symbol>"; + out << symbol->getSymbol(); + out << "</symbol>\n"; +} + +} /* namespace regexp */ + diff --git a/alib/src/regexp/RegExpPrinter.h b/alib/src/regexp/RegExpPrinter.h new file mode 100644 index 0000000000..e1e3998175 --- /dev/null +++ b/alib/src/regexp/RegExpPrinter.h @@ -0,0 +1,34 @@ +/* + * RegExpPrinter.h + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#ifndef REGEXPPRINTER_H_ +#define REGEXPPRINTER_H_ + +#include <ostream> +#include "RegExp.h" +#include "Alternation.h" +#include "Iteration.h" +#include "RegExpSymbol.h" + +namespace regexp { + +using namespace std; + +class RegExpPrinter { +protected: + static const string INDENTATION; + static void printContent(list<RegExpElement*>& content, ostream& out, string prefix); + static void printAlternation(Alternation* alternation, ostream& out, string prefix); + static void printIteration(Iteration* iteration, ostream& out, string prefix); + static void printSymbol(RegExpSymbol* symbol, ostream& out, string prefix); + +public: + static void toXML(RegExp& regexp, ostream& out); +}; + +} /* namespace regexp */ +#endif /* REGEXPPRINTER_H_ */ diff --git a/alib/src/regexp/RegExpSymbol.cpp b/alib/src/regexp/RegExpSymbol.cpp new file mode 100644 index 0000000000..85c6f159dd --- /dev/null +++ b/alib/src/regexp/RegExpSymbol.cpp @@ -0,0 +1,20 @@ +/* + * RegExpSymbol.cpp + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#include "RegExpSymbol.h" + +namespace regexp { + +} /* namespace regexp */ + +regexp::RegExpSymbol::RegExpSymbol() : + Symbol("") { +} + +regexp::RegExpSymbol::RegExpSymbol(const string& symbol) : + Symbol(symbol) { +} diff --git a/alib/src/regexp/RegExpSymbol.h b/alib/src/regexp/RegExpSymbol.h new file mode 100644 index 0000000000..c2bac3c251 --- /dev/null +++ b/alib/src/regexp/RegExpSymbol.h @@ -0,0 +1,28 @@ +/* + * RegExpSymbol.h + * + * Created on: Nov 23, 2013 + * Author: martin + */ + +#ifndef REGEXPSYMBOL_H_ +#define REGEXPSYMBOL_H_ + +#include <string> +#include "RegExpElement.h" +#include "../alphabet/Symbol.h" + +namespace regexp { + +using namespace std; +using namespace alphabet; + +class RegExpSymbol: public RegExpElement, public Symbol { +public: + RegExpSymbol(); + RegExpSymbol(const string& symbol); + +}; + +} /* namespace regexp */ +#endif /* REGEXPSYMBOL_H_ */ diff --git a/examples/regexp/regexp.xml b/examples/regexp/regexp.xml new file mode 100644 index 0000000000..f1d847322a --- /dev/null +++ b/examples/regexp/regexp.xml @@ -0,0 +1,17 @@ +<regexp> + <symbol>0</symbol> + <symbol>1</symbol> + <iteration> + <symbol>0</symbol> + <symbol>0</symbol> + </iteration> + + <alternation> + <first> + <symbol>11</symbol> + </first> + <second> + <symbol>10</symbol> + </second> + </alternation> +</regexp> diff --git a/examples/regexp/regexp2.xml b/examples/regexp/regexp2.xml new file mode 100644 index 0000000000..4da5d19420 --- /dev/null +++ b/examples/regexp/regexp2.xml @@ -0,0 +1,27 @@ +<regexp> + <symbol>0</symbol> + <symbol>1</symbol> + <iteration> + <alternation> + <first> + <symbol>0</symbol> + </first> + <second> + <symbol>1</symbol> + </second> + </alternation> + </iteration> + + <alternation> + <first> + <iteration> + <symbol>11</symbol> + </iteration> + </first> + <second> + <iteration> + <symbol>10</symbol> + </iteration> + </second> + </alternation> +</regexp> diff --git a/examples/regexp/regexp3.xml b/examples/regexp/regexp3.xml new file mode 100644 index 0000000000..7359cb4989 --- /dev/null +++ b/examples/regexp/regexp3.xml @@ -0,0 +1,27 @@ +<regexp> + <iteration> + <iteration> + <iteration> + <iteration> + <iteration> + <iteration> + <iteration> + <iteration> + <iteration> + <iteration> + <symbol>We</symbol> + <symbol>have</symbol> + <symbol>to</symbol> + <symbol>go</symbol> + <symbol>deeper</symbol> + </iteration> + </iteration> + </iteration> + </iteration> + </iteration> + </iteration> + </iteration> + </iteration> + </iteration> + </iteration> +</regexp> -- GitLab