diff --git a/alib2/src/alphabet/Symbol.h b/alib2/src/alphabet/Symbol.h index 57da01c69efc994d1e210c7c0d42cf7d4762a64d..41b44a0dcbaf7f42849aa1161e26905a1807ee06 100644 --- a/alib2/src/alphabet/Symbol.h +++ b/alib2/src/alphabet/Symbol.h @@ -17,6 +17,7 @@ namespace alphabet { * Represents symbol in an alphabet. */ class Symbol { +protected: std::string symbol; public: /** diff --git a/alib2/src/regexp/Alternation.cpp b/alib2/src/regexp/Alternation.cpp index ee296541c1acffb91d6be46ce1d73cb7e437c9c9..3140e27bc76216dfce12083029610ffa4387a89e 100644 --- a/alib2/src/regexp/Alternation.cpp +++ b/alib2/src/regexp/Alternation.cpp @@ -12,13 +12,13 @@ namespace regexp { Alternation::Alternation(RegExpElement&& left, RegExpElement&& right) { - this->elements.push_back(std::move(left).plunder()); - this->elements.push_back(std::move(right).plunder()); + appendElement(std::move(left)); + appendElement(std::move(right)); } Alternation::Alternation(const RegExpElement& left, const RegExpElement& right) { - this->elements.push_back(left.clone()); - this->elements.push_back(right.clone()); + appendElement(left); + appendElement(right); } Alternation::Alternation(const Alternation& other) { @@ -59,11 +59,17 @@ const std::vector<const RegExpElement*> & Alternation::getElements() const { } void Alternation::appendElement(const RegExpElement& element) { - this->elements.push_back(element.clone()); + RegExpElement* elem = element.clone(); + if(this->parentRegExp && !elem->attachRegExp(this->parentRegExp)) + throw alib::AlibException("Input symbols not in the alphabet."); + this->elements.push_back(elem); } void Alternation::appendElement(RegExpElement&& element) { - this->elements.push_back(std::move(element).plunder()); + RegExpElement* elem = std::move(element).plunder(); + if(this->parentRegExp && !elem->attachRegExp(this->parentRegExp)) + throw alib::AlibException("Input symbols not in the alphabet."); + this->elements.push_back(elem); } RegExpElement* Alternation::clone() const { @@ -107,9 +113,22 @@ bool Alternation::operator<(const Alternation& other) const { return **thisIter < **otherIter; } -void Alternation::getAlphabet( std::set<alphabet::Symbol> & alphabet ) const { +bool Alternation::testSymbol( const alphabet::Symbol & symbol ) const { for(const auto& child : this->elements) - child->getAlphabet(alphabet); + if(child->testSymbol(symbol)) return true; + return false; +} + +bool Alternation::attachRegExp(const RegExp * regexp ) { + this->parentRegExp = regexp; + for(const auto& child : this->elements) + if(!child->attachRegExp(regexp)) return false; + return true; +} + +void Alternation::computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const { + for(const auto& child : this->elements) + child->computeMinimalAlphabet(alphabet); } bool Alternation::operator==(const Alternation& other) const { diff --git a/alib2/src/regexp/Alternation.h b/alib2/src/regexp/Alternation.h index 6783724e6076c2791a0274af978e8054a8ecd5fb..dfece1ff417c117f51b39ec28019c4cd33c7b2c7 100644 --- a/alib2/src/regexp/Alternation.h +++ b/alib2/src/regexp/Alternation.h @@ -30,6 +30,22 @@ protected: virtual RegExpElement* plunder() &&; std::vector<RegExpElement*> elements; + + /** + * @copydoc RegExpElement::testSymbol() const + */ + virtual bool testSymbol( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc RegExpElement::attachRegExp() + */ + virtual bool attachRegExp ( const RegExp * regexp ); + + /** + * @copydoc RegExpElement::computeMinimalAlphabet() + */ + virtual void computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const; + public: Alternation(RegExpElement&& left, RegExpElement&& right); Alternation(const RegExpElement& left, const RegExpElement& right); @@ -68,11 +84,6 @@ public: */ virtual void operator>>(std::ostream& out) const; - /** - * @copydoc RegExpElement::getAlphabet() const - */ - virtual void getAlphabet( std::set<alphabet::Symbol> & alphabet ) const; - /** * @copydoc RegExpElement::containsEmptyString() const */ diff --git a/alib2/src/regexp/Concatenation.cpp b/alib2/src/regexp/Concatenation.cpp index 7b14bcaaf460dc88725a74d57369240de783966f..1e109ceb9d6cf3ba21717657c7f72b4bec0d92bf 100644 --- a/alib2/src/regexp/Concatenation.cpp +++ b/alib2/src/regexp/Concatenation.cpp @@ -6,17 +6,18 @@ */ #include "Concatenation.h" +#include "../AlibException.h" namespace regexp { Concatenation::Concatenation(RegExpElement&& left, RegExpElement&& right) { - this->elements.push_back(std::move(left).plunder()); - this->elements.push_back(std::move(right).plunder()); + appendElement(std::move(left)); + appendElement(std::move(right)); } Concatenation::Concatenation(const RegExpElement& left, const RegExpElement& right) { - this->elements.push_back(left.clone()); - this->elements.push_back(right.clone()); + appendElement(left); + appendElement(right); } Concatenation::Concatenation(const Concatenation& other) { @@ -57,11 +58,17 @@ const std::vector<const RegExpElement*> & Concatenation::getElements() const { } void Concatenation::appendElement(const RegExpElement& element) { - this->elements.push_back(element.clone()); + RegExpElement* elem = element.clone(); + if(this->parentRegExp && !elem->attachRegExp(this->parentRegExp)) + throw alib::AlibException("Input symbols not in the alphabet."); + this->elements.push_back(elem); } void Concatenation::appendElement(RegExpElement&& element) { - this->elements.push_back(std::move(element).plunder()); + RegExpElement* elem = std::move(element).plunder(); + if(this->parentRegExp && !elem->attachRegExp(this->parentRegExp)) + throw alib::AlibException("Input symbols not in the alphabet."); + this->elements.push_back(elem); } RegExpElement* Concatenation::clone() const { @@ -121,9 +128,22 @@ void Concatenation::operator>>(std::ostream& out) const { out << ")"; } -void Concatenation::getAlphabet( std::set<alphabet::Symbol> & alphabet ) const { +bool Concatenation::testSymbol( const alphabet::Symbol & symbol ) const { for(const auto& child : this->elements) - child->getAlphabet(alphabet); + if(child->testSymbol(symbol)) return true; + return false; +} + +bool Concatenation::attachRegExp(const RegExp * regexp ) { + this->parentRegExp = regexp; + for(const auto& child : this->elements) + if(!child->attachRegExp(regexp)) return false; + return true; +} + +void Concatenation::computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const { + for(const auto& child : this->elements) + child->computeMinimalAlphabet(alphabet); } bool Concatenation::containsEmptyString() const { diff --git a/alib2/src/regexp/Concatenation.h b/alib2/src/regexp/Concatenation.h index a9688a11ff2cbe4335fb430a7c3a59288461ff2c..cd19fda03bfaae3a772a371a55b13c763a2c789e 100644 --- a/alib2/src/regexp/Concatenation.h +++ b/alib2/src/regexp/Concatenation.h @@ -27,6 +27,22 @@ protected: virtual RegExpElement* plunder() &&; std::vector<RegExpElement*> elements; + + /** + * @copydoc RegExpElement::testSymbol() const + */ + virtual bool testSymbol( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc RegExpElement::attachRegExp() + */ + virtual bool attachRegExp ( const RegExp * regexp ); + + /** + * @copydoc RegExpElement::computeMinimalAlphabet() + */ + virtual void computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const; + public: Concatenation(RegExpElement&& left, RegExpElement&& right); Concatenation(const RegExpElement& left, const RegExpElement& right); @@ -60,11 +76,6 @@ public: * @copydoc RegExpElement::operator>>() const */ virtual void operator>>(std::ostream& out) const; - - /** - * @copydoc RegExpElement::getAlphabet() const - */ - virtual void getAlphabet( std::set<alphabet::Symbol> & alphabet ) const; /** * @copydoc RegExpElement::containsEmptyString() const diff --git a/alib2/src/regexp/Iteration.cpp b/alib2/src/regexp/Iteration.cpp index 5bffb8e69692fb80f21de771b322a601053b24ad..7119754ecd49d4779f2581f5bb385b4b7e9903e0 100644 --- a/alib2/src/regexp/Iteration.cpp +++ b/alib2/src/regexp/Iteration.cpp @@ -10,12 +10,12 @@ namespace regexp { -Iteration::Iteration(RegExpElement&& element) { - this->element = std::move(element).plunder(); +Iteration::Iteration(RegExpElement&& element) : element( NULL ){ + this->setElement(std::move(element)); } -Iteration::Iteration(const RegExpElement& element) { - this->element = element.clone(); +Iteration::Iteration(const RegExpElement& element) : element( NULL ) { + this->setElement(element); } @@ -52,13 +52,19 @@ const RegExpElement & Iteration::getElement() const { } void Iteration::setElement(const RegExpElement& element) { + RegExpElement* elem = element.clone(); + if(this->parentRegExp && !this->element->attachRegExp(this->parentRegExp)) + throw alib::AlibException("Input symbols not in the alphabet."); delete this->element; - this->element = element.clone(); + this->element = elem; } void Iteration::setElement(RegExpElement&& element) { + RegExpElement* elem = std::move(element).plunder(); + if(this->parentRegExp && !this->element->attachRegExp(this->parentRegExp)) + throw alib::AlibException("Input symbols not in the alphabet."); delete this->element; - this->element = std::move(element).plunder(); + this->element = elem; } RegExpElement* Iteration::clone() const { @@ -101,8 +107,17 @@ void Iteration::operator>>(std::ostream& out) const { out << "(RegExpIteration " << *element << ")"; } -void Iteration::getAlphabet( std::set<alphabet::Symbol> & alphabet ) const { - element->getAlphabet( alphabet ); +bool Iteration::testSymbol( const alphabet::Symbol & symbol ) const { + return element->testSymbol( symbol ); +} + +bool Iteration::attachRegExp(const RegExp * regexp ) { + this->parentRegExp = regexp; + return this->element->attachRegExp(regexp); +} + +void Iteration::computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const { + element->computeMinimalAlphabet(alphabet); } bool Iteration::containsEmptyString() const { diff --git a/alib2/src/regexp/Iteration.h b/alib2/src/regexp/Iteration.h index a03409319107034c38ccddfe0f67aa713a76ab70..985e9d6505ad388c5152156693aeca765892471d 100644 --- a/alib2/src/regexp/Iteration.h +++ b/alib2/src/regexp/Iteration.h @@ -27,6 +27,22 @@ protected: virtual RegExpElement* clone() const; virtual RegExpElement* plunder() &&; + + /** + * @copydoc RegExpElement::testSymbol() const + */ + virtual bool testSymbol( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc RegExpElement::attachRegExp() + */ + virtual bool attachRegExp ( const RegExp * regexp ); + + /** + * @copydoc RegExpElement::computeMinimalAlphabet() + */ + virtual void computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const; + public: Iteration(RegExpElement&&); Iteration(const RegExpElement&); @@ -63,11 +79,6 @@ public: */ virtual void operator>>(std::ostream& out) const; - /** - * @copydoc RegExpElement::getAlphabet() const - */ - virtual void getAlphabet( std::set<alphabet::Symbol> & alphabet ) const; - /** * @copydoc RegExpElement::containsEmptyString() const */ diff --git a/alib2/src/regexp/RegExp.cpp b/alib2/src/regexp/RegExp.cpp index 7b8c093237ee2db53b240e873ded849b3b106485..c4094e544ac9c6e8dd26078e139b8443ad53e459 100644 --- a/alib2/src/regexp/RegExp.cpp +++ b/alib2/src/regexp/RegExp.cpp @@ -7,6 +7,8 @@ #include "RegExp.h" #include "../AlibException.h" +#include "RegExpEmpty.h" +#include "RegExpSymbol.h" #include <iostream> @@ -16,19 +18,33 @@ RegExp::RegExp() { this->regExp = new RegExpEmpty(); } +RegExp::RegExp(const std::set<alphabet::Symbol>& alphabet, const RegExpElement& regExp) : alphabet(alphabet) { + this->regExp = NULL; + setRegExp(regExp); +} + +RegExp::RegExp(std::set<alphabet::Symbol>&& alphabet, RegExpElement&& regExp) : alphabet(std::move(alphabet)) { + this->regExp = NULL; + setRegExp(std::move(regExp)); +} + RegExp::RegExp(const RegExpElement& regExp) { - this->regExp = regExp.clone(); + regExp.computeMinimalAlphabet(alphabet); + this->regExp = NULL; + setRegExp(regExp); } RegExp::RegExp(RegExpElement&& regExp) { - this->regExp = std::move(regExp).plunder(); + regExp.computeMinimalAlphabet(alphabet); + this->regExp = NULL; + setRegExp(std::move(regExp)); } -RegExp::RegExp(const RegExp& other) : regExp(other.regExp->clone()) { +RegExp::RegExp(const RegExp& other) : regExp(other.regExp->clone()), alphabet(other.alphabet) { } -RegExp::RegExp(RegExp&& other) noexcept : regExp(other.regExp) { +RegExp::RegExp(RegExp&& other) noexcept : regExp(other.regExp), alphabet(std::move(other.alphabet) ) { other.regExp = NULL; } @@ -59,19 +75,34 @@ const RegExpElement& RegExp::getRegExp() const { void RegExp::setRegExp(const RegExpElement& regExp) { delete this->regExp; this->regExp = regExp.clone(); + if(!this->regExp->attachRegExp(this)) + throw alib::AlibException("Input symbols not in the alphabet."); } void RegExp::setRegExp(RegExpElement&& regExp) { delete this->regExp; this->regExp = std::move(regExp).plunder(); + if(!this->regExp->attachRegExp(this)) + throw alib::AlibException("Input symbols not in the alphabet."); } -std::set<alphabet::Symbol> RegExp::getAlphabet() const { - std::set<alphabet::Symbol> alphabet; +const std::set<alphabet::Symbol>& RegExp::getAlphabet() const { + return alphabet; +} - regExp->getAlphabet( alphabet ); +void RegExp::addSymbolToAlphabet(const alphabet::Symbol & symbol) { + std::pair<std::set<alphabet::Symbol>::iterator, bool> ret = alphabet.insert(symbol); + if (!ret.second) + throw alib::AlibException("Symbol \"" + symbol.getSymbol() + "\" is already in the alphabet."); +} - return alphabet; +void RegExp::removeSymbolFromAlphabet(const alphabet::Symbol & symbol) { + if(this->regExp->testSymbol(symbol)) + throw alib::AlibException("Input symbol \"" + symbol.getSymbol() + "\" is used."); + + int removed = alphabet.erase(symbol); + if (!removed) + throw alib::AlibException("Input symbol \"" + symbol.getSymbol() + "\" doesn't exist."); } bool RegExp::isEmpty() const { diff --git a/alib2/src/regexp/RegExp.h b/alib2/src/regexp/RegExp.h index 445956c6d8a5c99f202371e510d19d696a69d498..24c6bd915a76673ccf31aa12fe40a6f58878614a 100644 --- a/alib2/src/regexp/RegExp.h +++ b/alib2/src/regexp/RegExp.h @@ -11,11 +11,13 @@ #include <vector> #include <list> #include <string> +#include <set> #include "RegExpElement.h" -#include "RegExpEmpty.h" #include "../std/visitor.hpp" namespace regexp { + +class RegExpElement; /** * Represents regular expression parsed from the XML. Regular expression is stored @@ -24,9 +26,13 @@ namespace regexp { class RegExp : public std::element<RegExp, std::visitor<RegExp> > { protected: RegExpElement* regExp; + + std::set<alphabet::Symbol> alphabet; public: RegExp(); + RegExp(const std::set<alphabet::Symbol>& alphabet, const RegExpElement& regExp); + RegExp(std::set<alphabet::Symbol>&& alphabet, RegExpElement&& regExp); RegExp(const RegExpElement& regExp); RegExp(RegExpElement&& regExp); @@ -63,7 +69,19 @@ public: * Gets alphabet symbols used in RegExp. * @return set of alphabet symbols used in regexp. */ - std::set<alphabet::Symbol> getAlphabet() const; + const std::set<alphabet::Symbol>& getAlphabet() const; + + /** + * Adds symbol to the alphabet available in the regular expression + * @param symbol new symbol added to the alphabet + */ + void addSymbolToAlphabet(const alphabet::Symbol & symbol); + + /** + * Removes symbol from the alphabet of symbol available in the regular expression + * @param symbol removed symbol from the alphabet + */ + void removeSymbolFromAlphabet(const alphabet::Symbol & symbol); /** * @return true if regexp represents empty language diff --git a/alib2/src/regexp/RegExpElement.cpp b/alib2/src/regexp/RegExpElement.cpp index 91065141e6ce99123547ba18a82e5539021775b4..bebb38e03e215d79b8a6d36203116f13ac8678bd 100644 --- a/alib2/src/regexp/RegExpElement.cpp +++ b/alib2/src/regexp/RegExpElement.cpp @@ -9,6 +9,10 @@ namespace regexp { +RegExpElement::RegExpElement() : parentRegExp(NULL) { + +} + RegExpElement::~RegExpElement() noexcept { } diff --git a/alib2/src/regexp/RegExpElement.h b/alib2/src/regexp/RegExpElement.h index 7fc1beb87a6e7c03b447035895f05ee63ecd9ef1..604e546e2542b2f89533eab6d777383e56cf4ba1 100644 --- a/alib2/src/regexp/RegExpElement.h +++ b/alib2/src/regexp/RegExpElement.h @@ -10,10 +10,13 @@ #include "../std/visitor.hpp" #include "../alphabet/Symbol.h" +#include "RegExp.h" #include <set> namespace regexp { +class RegExp; + class Alternation; class Concatenation; class Iteration; @@ -26,13 +29,37 @@ class RegExpEpsilon; */ class RegExpElement : virtual public std::elementBase<std::visitor<Alternation, Concatenation, Iteration, RegExpSymbol, RegExpEmpty, RegExpEpsilon> > { protected: - class PointerLess { - public: - bool operator()(const RegExpElement* a, const RegExpElement* b) { - return *a < *b; - } - }; + /* + * Parent regexp contanining this instance of RegExpElement + */ + const RegExp * parentRegExp; + + /** + * Traverses the regexp tree looking if particular Symbol is used in the regexp. + * + * @param symbol to test if used in regexp element + * @return true if symbol is used by the element and its successor + */ + virtual bool testSymbol( const alphabet::Symbol & symbol ) const = 0; + + /** + * Attaches the regexp to this instance and all its childs + * @param regexp parent regexp to attach as parent + * @return true if symbols used in regexp element are in the regexp's alphabet + */ + virtual bool attachRegExp ( const RegExp * regexp ) = 0; + + /** + * Traverses the regexp tree computing minimal alphabet needed by regexp + * + * @param alphabet All alphabet symbols encountered are added into this set + * @return true if symbol is used by the element and its successor + */ + virtual void computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const = 0; + public: + RegExpElement(); + /** * Creates copy of the element. * @return copy of the element @@ -89,12 +116,14 @@ public: */ virtual bool isEmpty() const = 0; - /** - * Traverses the regexp tree to get alphabet symbols used. - * - * @param alphabet All alphabet symbols encountered are added into this set - */ - virtual void getAlphabet( std::set<alphabet::Symbol> & alphabet ) const = 0; + friend class RegExp; + + friend class Alternation; + friend class Concatenation; + friend class Iteration; + friend class RegExpSymbol; + friend class RegExpEmpty; + friend class RegExpEpsilon; }; } /* namespace regexp */ diff --git a/alib2/src/regexp/RegExpEmpty.cpp b/alib2/src/regexp/RegExpEmpty.cpp index d5d016317684d6342ba09b58c06697073381cdf0..916a7d1e720cc31dd820d7779fe28e4894e029f8 100644 --- a/alib2/src/regexp/RegExpEmpty.cpp +++ b/alib2/src/regexp/RegExpEmpty.cpp @@ -58,7 +58,16 @@ void RegExpEmpty::operator>>(std::ostream& out) const { out << "(RegExpEmpty)"; } -void RegExpEmpty::getAlphabet( std::set<alphabet::Symbol> & alphabet ) const { +bool RegExpEmpty::testSymbol( const alphabet::Symbol & ) const { + return false; +} + +bool RegExpEmpty::attachRegExp(const RegExp * regexp ) { + this->parentRegExp = regexp; + return true; +} + +void RegExpEmpty::computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const { } diff --git a/alib2/src/regexp/RegExpEmpty.h b/alib2/src/regexp/RegExpEmpty.h index f556c0ed44d10eeac6dc89cd3363d5806687a7a6..e0aa81e559cf21bf536ac7ab0e8a41ca7ffd02c8 100644 --- a/alib2/src/regexp/RegExpEmpty.h +++ b/alib2/src/regexp/RegExpEmpty.h @@ -24,6 +24,21 @@ protected: virtual RegExpElement* plunder() &&; + /** + * @copydoc RegExpElement::testSymbol() const + */ + virtual bool testSymbol( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc RegExpElement::attachRegExp() + */ + virtual bool attachRegExp ( const RegExp * regexp ); + + /** + * @copydoc RegExpElement::computeMinimalAlphabet() + */ + virtual void computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const; + public: virtual bool operator<(const RegExpElement&) const; virtual bool operator==(const RegExpElement&) const; @@ -40,11 +55,6 @@ public: * @copydoc RegExpElement::operator>>() const */ virtual void operator>>(std::ostream& out) const; - - /** - * @copydoc RegExpElement::getAlphabet() const - */ - virtual void getAlphabet( std::set<alphabet::Symbol> & alphabet ) const; /** * @copydoc RegExpElement::containsEmptyString() const diff --git a/alib2/src/regexp/RegExpEpsilon.cpp b/alib2/src/regexp/RegExpEpsilon.cpp index caeddb78618ce78b6a39cd09645a9ea10a8292aa..7666137a6cb0581fc017e2db4731a1f1e7b2c895 100644 --- a/alib2/src/regexp/RegExpEpsilon.cpp +++ b/alib2/src/regexp/RegExpEpsilon.cpp @@ -8,7 +8,7 @@ #include "RegExpEpsilon.h" namespace regexp { - + RegExpElement* RegExpEpsilon::clone() const { return new RegExpEpsilon(*this); } @@ -54,7 +54,16 @@ void RegExpEpsilon::operator>>(std::ostream& out) const { out << "(RegExpEpsilon)"; } -void RegExpEpsilon::getAlphabet( std::set<alphabet::Symbol> & alphabet ) const { +bool RegExpEpsilon::testSymbol( const alphabet::Symbol & ) const { + return false; +} + +bool RegExpEpsilon::attachRegExp(const RegExp * regexp ) { + this->parentRegExp = regexp; + return true; +} + +void RegExpEpsilon::computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const { } diff --git a/alib2/src/regexp/RegExpEpsilon.h b/alib2/src/regexp/RegExpEpsilon.h index f4ed90947e68dbc366ddb3ebff52070138ab114e..fed42e6d959b7033701f2c78acafc950b1566577 100644 --- a/alib2/src/regexp/RegExpEpsilon.h +++ b/alib2/src/regexp/RegExpEpsilon.h @@ -24,7 +24,23 @@ protected: virtual RegExpElement* plunder() &&; + /** + * @copydoc RegExpElement::testSymbol() const + */ + virtual bool testSymbol( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc RegExpElement::attachRegExp() + */ + virtual bool attachRegExp ( const RegExp * regexp ); + + /** + * @copydoc RegExpElement::computeMinimalAlphabet() + */ + virtual void computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const; + public: + virtual bool operator<(const RegExpElement&) const; virtual bool operator==(const RegExpElement&) const; virtual bool operator>(const RegExpElement&) const; @@ -39,12 +55,7 @@ public: * @copydoc RegExpElement::operator>>() const */ virtual void operator>>(std::ostream& out) const; - - /** - * @copydoc RegExpElement::getAlphabet() const - */ - virtual void getAlphabet( std::set<alphabet::Symbol> & alphabet ) const; - + /** * @copydoc RegExpElement::containsEmptyString() const */ diff --git a/alib2/src/regexp/RegExpFromXMLParser.cpp b/alib2/src/regexp/RegExpFromXMLParser.cpp index b40cd1f46870b46f99382842af18e78475c247be..bd22da901c80d37b85a4c7c14537723c8f7a6262 100644 --- a/alib2/src/regexp/RegExpFromXMLParser.cpp +++ b/alib2/src/regexp/RegExpFromXMLParser.cpp @@ -12,18 +12,33 @@ namespace regexp { RegExp RegExpFromXMLParser::parse(std::list<sax::Token>& input) { popToken(input, sax::Token::START_ELEMENT, "regexp"); + + RegExp regexp; + parseAlphabet(input, regexp); + RegExpElement* element = parseElement(input); - if(element == NULL) throw sax::ParserException(sax::Token("", sax::Token::CHARACTER), input.front()); - RegExp regexp(std::move(*element)); + if(!element) throw sax::ParserException(sax::Token("", sax::Token::CHARACTER), input.front()); + regexp.setRegExp(std::move(*element)); delete element; + popToken(input, sax::Token::END_ELEMENT, "regexp"); return regexp; } +void RegExpFromXMLParser::parseAlphabet(std::list<sax::Token> &input, RegExp& regexp) { + popToken(input, sax::Token::START_ELEMENT, "alphabet"); + while (isToken(input, sax::Token::START_ELEMENT, "symbol")) { + alphabet::Symbol* symbol = parseSymbol<alphabet::Symbol>(input); + regexp.addSymbolToAlphabet(*symbol); + delete symbol; + } + popToken(input, sax::Token::END_ELEMENT, "alphabet"); +} + RegExpElement* RegExpFromXMLParser::parseElement(std::list<sax::Token>& input) { if (isToken(input, sax::Token::START_ELEMENT, "symbol")) { - return parseSymbol(input); + return parseSymbol<RegExpSymbol>(input); } else if (isToken(input, sax::Token::START_ELEMENT, "empty")) { return parseEmpty(input); } else if (isToken(input, sax::Token::START_ELEMENT, "epsilon")) { @@ -48,6 +63,9 @@ Alternation* RegExpFromXMLParser::parseAlternation(std::list<sax::Token>& input) if(!element2) throw alib::AlibException("Not enought elements in alternation"); Alternation* alternation = new Alternation(std::move(*element1), std::move(*element2)); + delete element1; + delete element2; + while (true) { RegExpElement* element = parseElement(input); if(!element) break; @@ -68,6 +86,9 @@ Concatenation* RegExpFromXMLParser::parseConcatenation(std::list<sax::Token>& in if(!element2) throw alib::AlibException("Not enought elements in concatenation"); Concatenation* concatenation = new Concatenation(std::move(*element1), std::move(*element2)); + delete element1; + delete element2; + while (true) { RegExpElement* element = parseElement(input); if(!element) break; @@ -84,7 +105,7 @@ Iteration* RegExpFromXMLParser::parseIteration(std::list<sax::Token>& input) { popToken(input, sax::Token::START_ELEMENT, "iteration"); RegExpElement* element = parseElement(input); - if(element == NULL) throw sax::ParserException(sax::Token("", sax::Token::CHARACTER), input.front()); + if(!element) throw sax::ParserException(sax::Token("", sax::Token::CHARACTER), input.front()); Iteration* iteration = new Iteration(std::move(*element)); delete element; @@ -110,11 +131,12 @@ RegExpEmpty* RegExpFromXMLParser::parseEmpty(std::list<sax::Token>& input) { return empty; } -RegExpSymbol* RegExpFromXMLParser::parseSymbol(std::list<sax::Token>& input) { +template <class T> +T* RegExpFromXMLParser::parseSymbol(std::list<sax::Token>& input) { popToken(input, sax::Token::START_ELEMENT, "symbol"); if (input.front().getType() == sax::Token::CHARACTER) { - RegExpSymbol* symbol = new RegExpSymbol(input.front().getData()); + T* symbol = new T(input.front().getData()); input.pop_front(); popToken(input, sax::Token::END_ELEMENT, "symbol"); return symbol; diff --git a/alib2/src/regexp/RegExpFromXMLParser.h b/alib2/src/regexp/RegExpFromXMLParser.h index d77840db45ba3ef1996a413c09749afed7e96d38..2a45f1ace8c55da13c8f5272a8294796b41eebe2 100644 --- a/alib2/src/regexp/RegExpFromXMLParser.h +++ b/alib2/src/regexp/RegExpFromXMLParser.h @@ -11,6 +11,7 @@ #include "RegExp.h" #include "RegExpElements.h" #include "../sax/Token.h" +#include "../alphabet/Symbol.h" namespace regexp { @@ -18,11 +19,13 @@ namespace regexp { * Parser used to get RegExp from XML parsed into list of tokens. */ class RegExpFromXMLParser { + void parseAlphabet(std::list<sax::Token>& input, RegExp& regexp); + RegExpElement* parseElement(std::list<sax::Token>& input); RegExpEpsilon* parseEpsilon(std::list<sax::Token>& input); RegExpEmpty* parseEmpty(std::list<sax::Token>& input); - RegExpSymbol* parseSymbol(std::list<sax::Token> &input); + template <class T> T* parseSymbol(std::list<sax::Token> &input); Iteration* parseIteration(std::list<sax::Token> &input); Alternation* parseAlternation(std::list<sax::Token> &input); Concatenation* parseConcatenation(std::list<sax::Token> &input); diff --git a/alib2/src/regexp/RegExpSymbol.cpp b/alib2/src/regexp/RegExpSymbol.cpp index 85426d0e687932363d8de409789fd67e5493b9fc..53f759e7b2da0c238139c907ea2fdbb1d40de61f 100644 --- a/alib2/src/regexp/RegExpSymbol.cpp +++ b/alib2/src/regexp/RegExpSymbol.cpp @@ -10,11 +10,11 @@ namespace regexp { RegExpSymbol::RegExpSymbol(const std::string& symbol) : - symbol(symbol) { + alphabet::Symbol(symbol) { } RegExpSymbol::RegExpSymbol(std::string&& symbol) : - symbol(std::move(symbol)) { + alphabet::Symbol(std::move(symbol)) { } RegExpElement* RegExpSymbol::clone() const { @@ -25,6 +25,10 @@ RegExpElement* RegExpSymbol::plunder() && { return new RegExpSymbol(std::move(*this)); } +bool RegExpSymbol::operator==(const alphabet::Symbol& other) const { + return *this == other; +} + bool RegExpSymbol::operator<(const RegExpElement& other) const { return other > *this; } @@ -70,8 +74,18 @@ bool RegExpSymbol::isEmpty() const { return false; } -void RegExpSymbol::getAlphabet( std::set<alphabet::Symbol> & alphabet ) const { - alphabet.insert( alphabet::Symbol( this->getSymbol( ) ) ); +bool RegExpSymbol::testSymbol( const alphabet::Symbol & symbol ) const { + if( symbol == *this ) return true; + return false; +} + +bool RegExpSymbol::attachRegExp(const RegExp * regexp ) { + this->parentRegExp = regexp; + return this->parentRegExp->getAlphabet().find(*this) != this->parentRegExp->getAlphabet().end(); +} + +void RegExpSymbol::computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const { + alphabet.insert(*this); } const std::string& RegExpSymbol::getSymbol() const { diff --git a/alib2/src/regexp/RegExpSymbol.h b/alib2/src/regexp/RegExpSymbol.h index 5f21201f9b56c0d77f417d948d2cc1bc989aa2af..ed232bf89ae400a01b02a5e500147f0c3db06df7 100644 --- a/alib2/src/regexp/RegExpSymbol.h +++ b/alib2/src/regexp/RegExpSymbol.h @@ -17,7 +17,7 @@ namespace regexp { /** * Represents symbol in the regular expression. Contains name of the symbol. */ -class RegExpSymbol : public RegExpElement, public std::element<RegExpSymbol, RegExpElement::visitor_type> { +class RegExpSymbol : protected alphabet::Symbol, public RegExpElement, public std::element<RegExpSymbol, RegExpElement::visitor_type> { protected: /** * @copydoc RegExpElement::clone() const @@ -26,10 +26,25 @@ protected: virtual RegExpElement* plunder() &&; - std::string symbol; + /** + * @copydoc RegExpElement::testSymbol() const + */ + virtual bool testSymbol( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc RegExpElement::attachRegExp() + */ + virtual bool attachRegExp ( const RegExp * regexp ); + + /** + * @copydoc RegExpElement::computeMinimalAlphabet() + */ + virtual void computeMinimalAlphabet( std::set<alphabet::Symbol>& alphabet ) const; public: RegExpSymbol(const std::string& symbol); RegExpSymbol(std::string&& symbol); + + virtual bool operator==(const alphabet::Symbol&) const; virtual bool operator<(const RegExpElement&) const; virtual bool operator==(const RegExpElement&) const; @@ -45,11 +60,6 @@ public: * @copydoc RegExpElement::operator>>() const */ virtual void operator>>(std::ostream& out) const; - - /** - * @copydoc RegExpElement::getAlphabet() const - */ - virtual void getAlphabet( std::set<alphabet::Symbol> & alphabet ) const; /** * @copydoc RegExpElement::containsEmptyString() const diff --git a/alib2/src/regexp/RegExpToXMLComposer.cpp b/alib2/src/regexp/RegExpToXMLComposer.cpp index e1383fc6fef706ba20aec117f7f72979cb440a98..682b123d48239212231dbaf0c2e6921c3c649301 100644 --- a/alib2/src/regexp/RegExpToXMLComposer.cpp +++ b/alib2/src/regexp/RegExpToXMLComposer.cpp @@ -13,16 +13,21 @@ void RegExpToXMLComposer::Visit(void* userData, const RegExp& regexp) { std::list<sax::Token> &out = *((std::list<sax::Token>*) userData); out.push_back(sax::Token("regexp", sax::Token::START_ELEMENT)); - regexp.Accept(userData, *this); + { + out.push_back(sax::Token("alphabet", sax::Token::START_ELEMENT)); + for (const auto& symbol: regexp.getAlphabet()) { + out.push_back(sax::Token("symbol", sax::Token::START_ELEMENT)); + out.push_back(sax::Token(symbol.getSymbol(), sax::Token::CHARACTER)); + out.push_back(sax::Token("symbol", sax::Token::END_ELEMENT)); + } + out.push_back(sax::Token("alphabet", sax::Token::END_ELEMENT)); + } + regexp.getRegExp().Accept(userData, *this); out.push_back(sax::Token("regexp", sax::Token::END_ELEMENT)); } void RegExpToXMLComposer::Visit(void* userData, const RegExp::element_type& regexp) { - std::list<sax::Token> &out = *((std::list<sax::Token>*) userData); - - out.push_back(sax::Token("regexp", sax::Token::START_ELEMENT)); regexp.Accept(userData, *this); - out.push_back(sax::Token("regexp", sax::Token::END_ELEMENT)); } void RegExpToXMLComposer::Visit(void* userData, const RegExpElement::element_type& element) { @@ -85,9 +90,7 @@ void RegExpToXMLComposer::Visit(void* userData, const RegExpEmpty& empty) { std::list<sax::Token> RegExpToXMLComposer::compose(const RegExp& regexp) { std::list<sax::Token> out; - out.push_back(sax::Token("regexp", sax::Token::START_ELEMENT)); - regexp.getRegExp().Accept((void*) &out, *this); - out.push_back(sax::Token("regexp", sax::Token::END_ELEMENT)); + regexp.Accept((void*) &out, *this); return out; } diff --git a/alib2/test-src/regexp/RegExpTest.cpp b/alib2/test-src/regexp/RegExpTest.cpp index 87efaccc7aac3d8ae01df6e0ad97c6c1db5c17a2..62f44971d6304d779069627d88dbc34674a0bb0c 100644 --- a/alib2/test-src/regexp/RegExpTest.cpp +++ b/alib2/test-src/regexp/RegExpTest.cpp @@ -35,8 +35,11 @@ void RegExpTest::testEqual() { void RegExpTest::testXMLParser() { - regexp::RegExp regexp( - regexp::Alternation( + regexp::RegExp regexp; + regexp.addSymbolToAlphabet(alphabet::Symbol("1")); + regexp.addSymbolToAlphabet(alphabet::Symbol("2")); + regexp.addSymbolToAlphabet(alphabet::Symbol("3")); + regexp.setRegExp(regexp::Alternation( regexp::Concatenation( regexp::RegExpSymbol("1"), regexp::RegExpSymbol("2") @@ -46,7 +49,9 @@ void RegExpTest::testXMLParser() { ) ) ); - + + regexp.removeSymbolFromAlphabet(alphabet::Symbol("3")); + regexp::RegExpToXMLComposer composer; std::list<sax::Token> tokens = composer.compose(regexp); std::string tmp;