diff --git a/alib2algo/src/regexp/convert/ToAutomatonGlushkov.cpp b/alib2algo/src/regexp/convert/ToAutomatonGlushkov.cpp index e8815fff6ad3df3ef4fa0008953081d1fc999442..784901e81964ac319e49f4dcdb2d685b668cca4a 100644 --- a/alib2algo/src/regexp/convert/ToAutomatonGlushkov.cpp +++ b/alib2algo/src/regexp/convert/ToAutomatonGlushkov.cpp @@ -15,6 +15,7 @@ #include "object/Object.h" #include "../glushkov/GlushkovTraversal.h" +#include "../glushkov/GlushkovIndexate.h" #include "../properties/RegExpEpsilon.h" #include <exception/CommonException.h> @@ -34,7 +35,7 @@ automaton::NFA < > ToAutomatonGlushkov::convert ( const regexp::UnboundedRegExp // step 1 automaton.setInputAlphabet ( regexp.getAlphabet ( ) ); - regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovTraversal::index ( regexp ); + regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); // steps 2, 3, 4 const std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > first = regexp::GlushkovTraversal::first ( indexedRegExp ); diff --git a/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp b/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp index c0d5c91f8af3befee54d2308bfeb8253f583bf09..9d95ce23b81c26dc5652dec6a4676be4e50e56c9 100644 --- a/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp +++ b/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp @@ -17,6 +17,7 @@ #include <object/Object.h> #include "../glushkov/GlushkovTraversal.h" +#include "../glushkov/GlushkovIndexate.h" #include "../properties/RegExpEpsilon.h" #include <exception/CommonException.h> @@ -36,7 +37,7 @@ grammar::RightRG < > ToGrammarRightRGGlushkov::convert ( const regexp::Unbounded // step 1 grammar.setTerminalAlphabet ( regexp.getAlphabet ( ) ); - regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovTraversal::index ( regexp ); + regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); // steps 2, 3, 4 const std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > first = regexp::GlushkovTraversal::first ( indexedRegExp ); diff --git a/alib2algo/src/regexp/glushkov/GlushkovIndexate.cpp b/alib2algo/src/regexp/glushkov/GlushkovIndexate.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b2242bc2172a6055ba51c8e2c1a0c4fcfcf5500 --- /dev/null +++ b/alib2algo/src/regexp/glushkov/GlushkovIndexate.cpp @@ -0,0 +1,66 @@ +/* + * GlushkovIndexate.cpp + * + * Created on: 13. 3. 2014 + * Author: Tomas Pecka + */ + +#include "GlushkovIndexate.h" + +#include <regexp/unbounded/UnboundedRegExpAlternation.h> +#include <regexp/unbounded/UnboundedRegExpConcatenation.h> +#include <regexp/unbounded/UnboundedRegExpElement.h> +#include <regexp/unbounded/UnboundedRegExpEmpty.h> +#include <regexp/unbounded/UnboundedRegExpEpsilon.h> +#include <regexp/unbounded/UnboundedRegExpIteration.h> +#include <regexp/unbounded/UnboundedRegExpSymbol.h> + +#include "../properties/RegExpEpsilon.h" +#include <alphabet/SymbolPairSymbol.h> + +#include <exception/CommonException.h> +#include <iterator> + +namespace regexp { + +UnboundedRegExp < > GlushkovIndexate::index ( const regexp::UnboundedRegExp < > & re ) { + int i = 1; + + return UnboundedRegExp < > ( regexp::UnboundedRegExpStructure < alphabet::Symbol > ( re.getRegExp ( ).getStructure ( ).accept < std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > >, GlushkovIndexate::Unbounded > ( i ) ) ); +} + +std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > GlushkovIndexate::Unbounded::visit(const regexp::UnboundedRegExpAlternation < alphabet::Symbol > & alternation, int & i) { + UnboundedRegExpAlternation < alphabet::Symbol > * alt = new UnboundedRegExpAlternation < alphabet::Symbol > ( ); + + for ( const auto & element : alternation.getElements ( ) ) + alt->appendElement ( element->accept < std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > >, GlushkovIndexate::Unbounded > ( i ) ); + + return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( alt ); +} + +std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > GlushkovIndexate::Unbounded::visit(const regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & concatenation, int & i) { + UnboundedRegExpConcatenation < alphabet::Symbol > * con = new UnboundedRegExpConcatenation < alphabet::Symbol > ( ); + + for ( const auto & element : concatenation.getElements ( ) ) + con->appendElement ( element->accept < std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > >, GlushkovIndexate::Unbounded > ( i ) ); + + return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( con ); +} + +std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > GlushkovIndexate::Unbounded::visit(const regexp::UnboundedRegExpIteration < alphabet::Symbol > & iteration, int & i) { + return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpIteration < alphabet::Symbol > ( iteration.getElement ( ).accept < std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > >, GlushkovIndexate::Unbounded > ( i ) ) ); +} + +std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > GlushkovIndexate::Unbounded::visit(const regexp::UnboundedRegExpSymbol < alphabet::Symbol > & symbol, int & i) { + return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpSymbol < alphabet::Symbol > ( alphabet::Symbol ( alphabet::SymbolPairSymbol ( std::make_pair ( symbol.getSymbol ( ), alphabet::symbolFrom ( i++ ) ) ) ) ) ); +} + +std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > GlushkovIndexate::Unbounded::visit(const regexp::UnboundedRegExpEpsilon < alphabet::Symbol > &, int &) { + return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEpsilon < alphabet::Symbol > ( ) ); +} + +std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > GlushkovIndexate::Unbounded::visit(const regexp::UnboundedRegExpEmpty < alphabet::Symbol > &, int &) { + return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEmpty < alphabet::Symbol > ( ) ); +} + +} /* namespace conversions */ diff --git a/alib2algo/src/regexp/glushkov/GlushkovIndexate.h b/alib2algo/src/regexp/glushkov/GlushkovIndexate.h new file mode 100644 index 0000000000000000000000000000000000000000..10240ace4125b8c655d137c9a21d78076197f830 --- /dev/null +++ b/alib2algo/src/regexp/glushkov/GlushkovIndexate.h @@ -0,0 +1,46 @@ +/* + * GlushkovIndexate.h + * + * Created on: 13. 3. 2014 + * Author: Tomas Pecka + */ + +#ifndef GLUSHKOV_INDEXATE_H_ +#define GLUSHKOV_INDEXATE_H_ + +#include <algorithm> +#include <list> +#include <set> + +#include <regexp/unbounded/UnboundedRegExp.h> +#include <regexp/RegExpFeatures.h> + +namespace regexp { + +/** + * RegExp tree traversal utils for Glushkov algorithm. + * + */ +class GlushkovIndexate { +public: + /** + * @param re RegExp to index + * @return UnboundedRegExp with indexed elements + */ + static regexp::UnboundedRegExp < > index ( const regexp::UnboundedRegExp < > & re ); + +private: + class Unbounded { + public: + static std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > visit(const regexp::UnboundedRegExpAlternation < alphabet::Symbol > & alternation, int & i); + static std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > visit(const regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & concatenation, int & i); + static std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > visit(const regexp::UnboundedRegExpIteration < alphabet::Symbol > & iteration, int & i); + static std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > visit(const regexp::UnboundedRegExpSymbol < alphabet::Symbol > & symbol, int & i); + static std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > visit(const regexp::UnboundedRegExpEpsilon < alphabet::Symbol > & epsilon, int & i); + static std::rvalue_ref < regexp::UnboundedRegExpElement < alphabet::Symbol > > visit(const regexp::UnboundedRegExpEmpty < alphabet::Symbol > & empty, int & i); + }; +}; + +} /* namespace conversions */ + +#endif /* GLUSHKOV_INDEXATE_H_ */ diff --git a/alib2algo/src/regexp/glushkov/GlushkovTraversal.cpp b/alib2algo/src/regexp/glushkov/GlushkovTraversal.cpp index 34d71fc86d7be391511e38c208545ecefb1c112a..f2326ae106e83fd84807796e2dba82ddc620e0f2 100644 --- a/alib2algo/src/regexp/glushkov/GlushkovTraversal.cpp +++ b/alib2algo/src/regexp/glushkov/GlushkovTraversal.cpp @@ -325,47 +325,4 @@ bool GlushkovTraversal::pos ( const regexp::UnboundedRegExpEpsilon < alphabet::S return false; } -// ---------------------------------------------------------------------------- - -UnboundedRegExp < > GlushkovTraversal::index ( const regexp::UnboundedRegExp < > & re ) { - int i = 1; - - return UnboundedRegExp < > ( regexp::UnboundedRegExpStructure < alphabet::Symbol > ( index ( re.getRegExp ( ).getStructure ( ), i ) ) ); -} - -std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > GlushkovTraversal::index ( const regexp::UnboundedRegExpElement < alphabet::Symbol > & node, int & i ) { - const regexp::UnboundedRegExpAlternation < alphabet::Symbol > * alternation = dynamic_cast < const regexp::UnboundedRegExpAlternation < alphabet::Symbol > * > ( & node ); - const regexp::UnboundedRegExpConcatenation < alphabet::Symbol > * concatenation = dynamic_cast < const regexp::UnboundedRegExpConcatenation < alphabet::Symbol > * > ( & node ); - const regexp::UnboundedRegExpIteration < alphabet::Symbol > * iteration = dynamic_cast < const regexp::UnboundedRegExpIteration < alphabet::Symbol > * > ( & node ); - const regexp::UnboundedRegExpSymbol < alphabet::Symbol > * symbol = dynamic_cast < const regexp::UnboundedRegExpSymbol < alphabet::Symbol > * > ( & node ); - const regexp::UnboundedRegExpEmpty < alphabet::Symbol > * empty = dynamic_cast < const regexp::UnboundedRegExpEmpty < alphabet::Symbol > * > ( & node ); - const regexp::UnboundedRegExpEpsilon < alphabet::Symbol > * eps = dynamic_cast < const regexp::UnboundedRegExpEpsilon < alphabet::Symbol > * > ( & node ); - - if ( symbol ) { - return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpSymbol < alphabet::Symbol > ( alphabet::Symbol ( alphabet::SymbolPairSymbol ( std::make_pair ( symbol->getSymbol ( ), alphabet::symbolFrom ( i++ ) ) ) ) ) ); - } else if ( alternation ) { - UnboundedRegExpAlternation < alphabet::Symbol > * alt = new UnboundedRegExpAlternation < alphabet::Symbol > ( ); - - for ( const auto & element : alternation->getElements ( ) ) - alt->appendElement ( index ( * element, i ) ); - - return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( alt ); - } else if ( concatenation ) { - UnboundedRegExpConcatenation < alphabet::Symbol > * con = new UnboundedRegExpConcatenation < alphabet::Symbol > ( ); - - for ( const auto & element : concatenation->getElements ( ) ) - con->appendElement ( index ( * element, i ) ); - - return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( con ); - } else if ( iteration ) { - return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpIteration < alphabet::Symbol > ( index ( iteration->getElement ( ), i ) ) ); - } else if ( empty ) { - return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEmpty < alphabet::Symbol > ( ) ); - } else if ( eps ) { - return std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEpsilon < alphabet::Symbol > ( ) ); - } else { - throw exception::CommonException ( "GlushkovTraversal::index() - unknown RegExpElement node" ); - } -} - } /* namespace conversions */ diff --git a/alib2algo/src/regexp/glushkov/GlushkovTraversal.h b/alib2algo/src/regexp/glushkov/GlushkovTraversal.h index e649746e67d630f96dc6ca83ad8d1a3dec4a7937..fac2d1004d81bff79a4e25c72680e08002ea6501 100644 --- a/alib2algo/src/regexp/glushkov/GlushkovTraversal.h +++ b/alib2algo/src/regexp/glushkov/GlushkovTraversal.h @@ -51,20 +51,12 @@ public: */ static std::set < UnboundedRegExpSymbol < alphabet::Symbol > > follow ( const regexp::UnboundedRegExp < > & re, const UnboundedRegExpSymbol < alphabet::Symbol > & symbol ); - /** - * @param re RegExp to index - * @return UnboundedRegExp with indexed elements - */ - static regexp::UnboundedRegExp < > index ( const regexp::UnboundedRegExp < > & re ); - private: /** * @return bool true if symbol pointer is in this subtree */ static bool pos ( const UnboundedRegExpSymbol < alphabet::Symbol > & symbol, const regexp::UnboundedRegExp < > & node ); - static std::rvalue_ref < UnboundedRegExpElement < alphabet::Symbol > > index ( const regexp::UnboundedRegExpElement < alphabet::Symbol > & node, int & i ); - static std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > first ( const regexp::UnboundedRegExpElement < alphabet::Symbol > & node ); static std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > first ( const regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); static std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > first ( const regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); diff --git a/alib2algo/test-src/regexp/RegExpTest.cpp b/alib2algo/test-src/regexp/RegExpTest.cpp index 01410cebcc99fcc4de208ebed5870488e4cb7f28..318e5aa4dcbf0adf0263e074008b619937ff3dfb 100644 --- a/alib2algo/test-src/regexp/RegExpTest.cpp +++ b/alib2algo/test-src/regexp/RegExpTest.cpp @@ -9,6 +9,7 @@ #include <regexp/RegExp.h> #include "regexp/glushkov/GlushkovTraversal.h" +#include "regexp/glushkov/GlushkovIndexate.h" #include <factory/StringDataFactory.hpp> @@ -28,7 +29,7 @@ void RegExpTest::testFirst ( ) { { std::string input = "#E* #0*"; regexp::UnboundedRegExp < > regexp ( static_cast < const regexp::UnboundedRegExp < > & > ( alib::StringDataFactory::fromString < regexp::RegExp > ( input ).getData ( ) ) ); - regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovTraversal::index ( regexp ); + regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > first = regexp::GlushkovTraversal::first ( indexedRegExp ); @@ -37,7 +38,7 @@ void RegExpTest::testFirst ( ) { { std::string input = "#E* a"; regexp::UnboundedRegExp < > regexp ( static_cast < const regexp::UnboundedRegExp < > & > ( alib::StringDataFactory::fromString < regexp::RegExp > ( input ).getData ( ) ) ); - regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovTraversal::index ( regexp ); + regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > first = regexp::GlushkovTraversal::first ( indexedRegExp ); @@ -49,7 +50,7 @@ void RegExpTest::testLast ( ) { { std::string input = "a+a"; regexp::UnboundedRegExp < > regexp ( static_cast < const regexp::UnboundedRegExp < > & > ( alib::StringDataFactory::fromString < regexp::RegExp > ( input ).getData ( ) ) ); - regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovTraversal::index ( regexp ); + regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > last = regexp::GlushkovTraversal::last ( indexedRegExp ); @@ -58,7 +59,7 @@ void RegExpTest::testLast ( ) { { std::string input = "(a+a)b"; regexp::UnboundedRegExp < > regexp ( static_cast < const regexp::UnboundedRegExp < > & > ( alib::StringDataFactory::fromString < regexp::RegExp > ( input ).getData ( ) ) ); - regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovTraversal::index ( regexp ); + regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); std::set < regexp::UnboundedRegExpSymbol < alphabet::Symbol > > last = regexp::GlushkovTraversal::last ( indexedRegExp ); @@ -71,7 +72,7 @@ void RegExpTest::testFollow ( ) { { std::string input = "(a+a)b"; regexp::UnboundedRegExp < > regexp ( static_cast < const regexp::UnboundedRegExp < > & > ( alib::StringDataFactory::fromString < regexp::RegExp > ( input ).getData ( ) ) ); - regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovTraversal::index ( regexp ); + regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); auto symbolsIter = indexedRegExp.getAlphabet ( ).begin ( ); @@ -92,7 +93,7 @@ void RegExpTest::testFollow ( ) { { std::string input = "a+a* (b+a)* c"; regexp::UnboundedRegExp < > regexp ( static_cast < const regexp::UnboundedRegExp < > & > ( alib::StringDataFactory::fromString < regexp::RegExp > ( input ).getData ( ) ) ); - regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovTraversal::index ( regexp ); + regexp::UnboundedRegExp < > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); auto symbolsIter = indexedRegExp.getAlphabet ( ).begin ( );