From dfd2b4561a946033fb25276b4ecad33f955ec6c3 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Tue, 24 May 2016 12:30:34 +0200 Subject: [PATCH] make algo use new regexp internal form --- alib2algo/src/regexp/GlushkovPair.cpp | 36 -- alib2algo/src/regexp/GlushkovPair.h | 32 -- alib2algo/src/regexp/GlushkovSymbol.cpp | 39 -- alib2algo/src/regexp/GlushkovSymbol.h | 36 -- alib2algo/src/regexp/GlushkovTraversal.cpp | 388 ------------------ alib2algo/src/regexp/GlushkovTraversal.h | 111 ----- .../regexp/convert/ToAutomatonGlushkov.cpp | 42 +- .../convert/ToGrammarRightRGGlushkov.cpp | 56 ++- .../src/regexp/glushkov/GlushkovTraversal.cpp | 368 +++++++++++++++++ .../src/regexp/glushkov/GlushkovTraversal.h | 104 +++++ alib2algo/test-src/regexp/RegExpTest.cpp | 39 +- 11 files changed, 533 insertions(+), 718 deletions(-) delete mode 100644 alib2algo/src/regexp/GlushkovPair.cpp delete mode 100644 alib2algo/src/regexp/GlushkovPair.h delete mode 100644 alib2algo/src/regexp/GlushkovSymbol.cpp delete mode 100644 alib2algo/src/regexp/GlushkovSymbol.h delete mode 100644 alib2algo/src/regexp/GlushkovTraversal.cpp delete mode 100644 alib2algo/src/regexp/GlushkovTraversal.h create mode 100644 alib2algo/src/regexp/glushkov/GlushkovTraversal.cpp create mode 100644 alib2algo/src/regexp/glushkov/GlushkovTraversal.h diff --git a/alib2algo/src/regexp/GlushkovPair.cpp b/alib2algo/src/regexp/GlushkovPair.cpp deleted file mode 100644 index 8d06d8e5d4..0000000000 --- a/alib2algo/src/regexp/GlushkovPair.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * GlushkovPair.cpp - * - * Created on: 14. 4. 2014 - * Author: Tomas Pecka - */ - -#include "GlushkovPair.h" - -namespace regexp -{ - -GlushkovPair::GlushkovPair( GlushkovSymbol const& first, GlushkovSymbol const& second ) : - m_first( first ), m_second( second ) -{ - -} - -bool GlushkovPair::operator<( GlushkovPair const& x ) const -{ - if( m_first.getId( ) == x.m_first.getId( ) ) - return m_second.getId( ) < x.m_second.getId( ); - else - return m_first.getId( ) < x.m_first.getId( ); -} - -GlushkovSymbol const& GlushkovPair::getFirst( void ) const -{ - return m_first; -} -GlushkovSymbol const& GlushkovPair::getSecond( void ) const -{ - return m_second; -} - -} /* namespace conversions */ diff --git a/alib2algo/src/regexp/GlushkovPair.h b/alib2algo/src/regexp/GlushkovPair.h deleted file mode 100644 index 5ad87945d5..0000000000 --- a/alib2algo/src/regexp/GlushkovPair.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * GlushkovPair.h - * - * Created on: 14. 4. 2014 - * Author: Tomas Pecka - */ - -#ifndef GLUSHKOVPAIR_H_ -#define GLUSHKOVPAIR_H_ - -#include "GlushkovSymbol.h" - -namespace regexp { - -/** - * Actually, this is just std::pair. 2-tuple. - */ -class GlushkovPair -{ -private: - GlushkovSymbol const m_first; - GlushkovSymbol const m_second; - -public: - GlushkovPair( GlushkovSymbol const& first, GlushkovSymbol const& second ); - bool operator<( GlushkovPair const& x ) const; - GlushkovSymbol const& getFirst( void ) const; - GlushkovSymbol const& getSecond( void ) const; -}; -} /* namespace conversions */ - -#endif /* GLUSHKOVPAIR_H_ */ diff --git a/alib2algo/src/regexp/GlushkovSymbol.cpp b/alib2algo/src/regexp/GlushkovSymbol.cpp deleted file mode 100644 index 0be8280ad5..0000000000 --- a/alib2algo/src/regexp/GlushkovSymbol.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * GlushkovSymbol.cpp - * - * Created on: 14. 4. 2014 - * Author: Tomas Pecka - */ - -#include "GlushkovSymbol.h" - -namespace regexp -{ - -GlushkovSymbol::GlushkovSymbol( regexp::UnboundedRegExpSymbol const * const & node, int i ) : - m_regexpSymbol( node ), m_i( i ) -{ - -} - -bool GlushkovSymbol::operator<( GlushkovSymbol const& x ) const -{ - return m_i < x.m_i; -} - -int GlushkovSymbol::getId( void ) const -{ - return m_i; -} - -alphabet::Symbol GlushkovSymbol::getInputSymbol( void ) const -{ - return alphabet::Symbol( m_regexpSymbol->getSymbol( ) ); -} - -regexp::UnboundedRegExpSymbol const * GlushkovSymbol::getSymbolPtr( void ) const -{ - return m_regexpSymbol; -} - -} /* namespace conversions */ diff --git a/alib2algo/src/regexp/GlushkovSymbol.h b/alib2algo/src/regexp/GlushkovSymbol.h deleted file mode 100644 index 10d3f7863d..0000000000 --- a/alib2algo/src/regexp/GlushkovSymbol.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * GlushkovSymbol.h - * - * Created on: 14. 4. 2014 - * Author: Tomas Pecka - */ - -#ifndef GLUSHKOVSYMBOL_H_ -#define GLUSHKOVSYMBOL_H_ - -#include <alphabet/Symbol.h> -#include <regexp/unbounded/UnboundedRegExpSymbol.h> - - -namespace regexp { - -/** - * Represents numbered symbol in glushkov algorithm. - */ -class GlushkovSymbol -{ -private: - regexp::UnboundedRegExpSymbol const * const m_regexpSymbol; - int m_i; - -public: - GlushkovSymbol( regexp::UnboundedRegExpSymbol const * const & node, int i ); - bool operator<( GlushkovSymbol const& x ) const; - int getId( void ) const; - alphabet::Symbol getInputSymbol( void ) const; - regexp::UnboundedRegExpSymbol const * getSymbolPtr( void ) const; -}; - -} /* namespace conversions */ - -#endif /* GLUSHKOVSYMBOL_H_ */ diff --git a/alib2algo/src/regexp/GlushkovTraversal.cpp b/alib2algo/src/regexp/GlushkovTraversal.cpp deleted file mode 100644 index c07064f6d7..0000000000 --- a/alib2algo/src/regexp/GlushkovTraversal.cpp +++ /dev/null @@ -1,388 +0,0 @@ -/* - * GlushkovTraversal.cpp - * - * Created on: 13. 3. 2014 - * Author: Tomas Pecka - */ - -#include "GlushkovTraversal.h" - -#include "properties/RegExpEpsilon.h" - -#include <exception/CommonException.h> - -namespace regexp { - -const GlushkovSymbol & GlushkovTraversal::findSymbol ( regexp::UnboundedRegExpSymbol const * const symbol, const std::set < GlushkovSymbol > & symbolSet ) { - auto it = find_if ( symbolSet.begin ( ), symbolSet.end ( ), [symbol] ( GlushkovSymbol const & gs ) -> bool { - return gs.getSymbolPtr ( ) == symbol; - } ); - - if ( it == symbolSet.end ( ) ) - throw ( "GlushkovTraversal - Can not find GlushkovSymbol for regexp node." ); - - return * it; -} - -bool GlushkovTraversal::pos ( GlushkovSymbol const & symbol, regexp::UnboundedRegExp const * const & node ) { - return pos ( & node->getRegExp ( ), symbol.getSymbolPtr ( ) ); -} - -std::set < GlushkovSymbol > GlushkovTraversal::first ( regexp::UnboundedRegExp const & re ) { - std::set < GlushkovSymbol > firstSet, symbolSet = getSymbols ( re ); - - for ( auto const & s : first ( & re.getRegExp ( ) ) ) - firstSet.insert ( findSymbol ( s, symbolSet ) ); - - return firstSet; -} - -std::set < GlushkovSymbol > GlushkovTraversal::last ( regexp::UnboundedRegExp const & re ) { - std::set < GlushkovSymbol > lastSet, symbolSet = getSymbols ( re ); - - for ( auto const & s : last ( & re.getRegExp ( ) ) ) - lastSet.insert ( findSymbol ( s, symbolSet ) ); - - return lastSet; -} - -std::set < GlushkovSymbol > GlushkovTraversal::follow ( regexp::UnboundedRegExp const & re, GlushkovSymbol const & symbol ) { - std::set < GlushkovSymbol > followSet, symbolSet = getSymbols ( re ); - - for ( auto const & s : follow ( & re.getRegExp ( ), symbol.getSymbolPtr ( ) ) ) - followSet.insert ( findSymbol ( s, symbolSet ) ); - - return followSet; -} - -// ----------------------------------------------------------------------------- - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::first ( regexp::UnboundedRegExpElement const * const & node ) { - regexp::UnboundedRegExpAlternation const * const alternation = dynamic_cast < regexp::UnboundedRegExpAlternation const * const > ( node ); - regexp::UnboundedRegExpConcatenation const * const concatenation = dynamic_cast < regexp::UnboundedRegExpConcatenation const * const > ( node ); - regexp::UnboundedRegExpIteration const * const iteration = dynamic_cast < regexp::UnboundedRegExpIteration const * const > ( node ); - regexp::UnboundedRegExpSymbol const * const symbol = dynamic_cast < regexp::UnboundedRegExpSymbol const * const > ( node ); - regexp::UnboundedRegExpEmpty const * const empty = dynamic_cast < regexp::UnboundedRegExpEmpty const * const > ( node ); - regexp::UnboundedRegExpEpsilon const * const eps = dynamic_cast < regexp::UnboundedRegExpEpsilon const * const > ( node ); - - if ( alternation ) - return first ( alternation ); - else if ( concatenation ) - return first ( concatenation ); - else if ( iteration ) - return first ( iteration ); - else if ( eps ) - return first ( eps ); - else if ( empty ) - return first ( empty ); - else if ( symbol ) - return first ( symbol ); - - throw exception::CommonException ( "GlushkovTraversal::first - invalid RegExpElement node" ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::first ( regexp::UnboundedRegExpAlternation const * const & node ) { - std::set < regexp::UnboundedRegExpSymbol const * > ret, tmp; - - for ( auto const & element : node->getElements ( ) ) { - tmp = first ( element ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - } - - return ret; -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::first ( regexp::UnboundedRegExpConcatenation const * const & node ) { - std::set < regexp::UnboundedRegExpSymbol const * > ret, tmp; - - for ( auto const & element : node->getElements ( ) ) { - tmp = first ( element ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - if ( !regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * element ) ) // If regexp of this subtree can match epsilon, then we need to add next subtree - break; - } - - return ret; -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::first ( regexp::UnboundedRegExpIteration const * const & node ) { - return first ( & node->getElement ( ) ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::first ( regexp::UnboundedRegExpSymbol const * const & node ) { - return std::set < regexp::UnboundedRegExpSymbol const * > { node }; -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::first ( regexp::UnboundedRegExpEpsilon const * const & /* node */ ) { - return std::set < regexp::UnboundedRegExpSymbol const * > ( ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::first ( regexp::UnboundedRegExpEmpty const * const & /* node */ ) { - return std::set < regexp::UnboundedRegExpSymbol const * > ( ); -} - -// ---------------------------------------------------------------------------- - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::last ( regexp::UnboundedRegExpElement const * const & node ) { - regexp::UnboundedRegExpAlternation const * const alternation = dynamic_cast < regexp::UnboundedRegExpAlternation const * const > ( node ); - regexp::UnboundedRegExpConcatenation const * const concatenation = dynamic_cast < regexp::UnboundedRegExpConcatenation const * const > ( node ); - regexp::UnboundedRegExpIteration const * const iteration = dynamic_cast < regexp::UnboundedRegExpIteration const * const > ( node ); - regexp::UnboundedRegExpSymbol const * const symbol = dynamic_cast < regexp::UnboundedRegExpSymbol const * const > ( node ); - regexp::UnboundedRegExpEmpty const * const empty = dynamic_cast < regexp::UnboundedRegExpEmpty const * const > ( node ); - regexp::UnboundedRegExpEpsilon const * const eps = dynamic_cast < regexp::UnboundedRegExpEpsilon const * const > ( node ); - - if ( symbol ) - return last ( symbol ); - else if ( alternation ) - return last ( alternation ); - else if ( concatenation ) - return last ( concatenation ); - else if ( iteration ) - return last ( iteration ); - else if ( eps ) - return last ( eps ); - else if ( empty ) - return last ( empty ); - - throw exception::CommonException ( "GlushkovTraversal::last - invalid RegExpElement node" ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::last ( regexp::UnboundedRegExpAlternation const * const & node ) { - std::set < regexp::UnboundedRegExpSymbol const * > ret; - - for ( auto const & element : node->getElements ( ) ) { - std::set < regexp::UnboundedRegExpSymbol const * > tmp = last ( element ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - } - - return ret; -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::last ( regexp::UnboundedRegExpConcatenation const * const & node ) { - std::set < regexp::UnboundedRegExpSymbol const * > ret, tmp; - - for ( auto it = node->getElements ( ).rbegin ( ); it != node->getElements ( ).rend ( ); it++ ) { - tmp = last ( * it ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - if ( !regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * * it ) ) - break; - } - - return ret; -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::last ( regexp::UnboundedRegExpIteration const * const & node ) { - return last ( & node->getElement ( ) ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::last ( regexp::UnboundedRegExpSymbol const * const & node ) { - return std::set < regexp::UnboundedRegExpSymbol const * > { node }; -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::last ( regexp::UnboundedRegExpEpsilon const * const & /* node */ ) { - return std::set < regexp::UnboundedRegExpSymbol const * > ( ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::last ( regexp::UnboundedRegExpEmpty const * const & /* node */ ) { - return std::set < regexp::UnboundedRegExpSymbol const * > ( ); -} - -// ---------------------------------------------------------------------------- - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::follow ( regexp::UnboundedRegExpElement const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - regexp::UnboundedRegExpAlternation const * const alternation = dynamic_cast < regexp::UnboundedRegExpAlternation const * const > ( node ); - regexp::UnboundedRegExpConcatenation const * const concatenation = dynamic_cast < regexp::UnboundedRegExpConcatenation const * const > ( node ); - regexp::UnboundedRegExpIteration const * const iteration = dynamic_cast < regexp::UnboundedRegExpIteration const * const > ( node ); - regexp::UnboundedRegExpSymbol const * const symbol = dynamic_cast < regexp::UnboundedRegExpSymbol const * const > ( node ); - regexp::UnboundedRegExpEmpty const * const empty = dynamic_cast < regexp::UnboundedRegExpEmpty const * const > ( node ); - regexp::UnboundedRegExpEpsilon const * const eps = dynamic_cast < regexp::UnboundedRegExpEpsilon const * const > ( node ); - - if ( alternation ) - return follow ( alternation, symbolptr ); - - else if ( concatenation ) - return follow ( concatenation, symbolptr ); - - else if ( iteration ) - return follow ( iteration, symbolptr ); - - else if ( symbol ) - return follow ( symbol, symbolptr ); - - else if ( empty ) - return follow ( empty, symbolptr ); - - else if ( eps ) - return follow ( eps, symbolptr ); - - throw exception::CommonException ( "GlushkovTraversal::follow() - unknown RegExpElement node" ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::follow ( regexp::UnboundedRegExpAlternation const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - for ( auto const & element : node->getElements ( ) ) - if ( pos ( element, symbolptr ) ) - return follow ( element, symbolptr ); - - throw exception::CommonException ( "GlushkovTraversal::follow(Alt)" ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::follow ( regexp::UnboundedRegExpConcatenation const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - std::set < regexp::UnboundedRegExpSymbol const * > ret, tmp, lastSet; - - for ( auto e = node->getElements ( ).begin ( ); e != node->getElements ( ).end ( ); e++ ) { - if ( !pos ( * e, symbolptr ) ) - continue; - - tmp = follow ( * e, symbolptr ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - lastSet = last ( * e ); - - if ( lastSet.find ( symbolptr ) != lastSet.end ( ) ) - for ( auto f = next ( e ); f != node->getElements ( ).end ( ); f++ ) { - tmp = first ( * f ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - if ( !regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * * f ) ) - break; - } - - } - - return ret; -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::follow ( regexp::UnboundedRegExpIteration const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - std::set < regexp::UnboundedRegExpSymbol const * > ret = follow ( & node->getElement ( ), symbolptr ); - std::set < regexp::UnboundedRegExpSymbol const * > lastSet = last ( & node->getElement ( ) ); - - if ( lastSet.find ( symbolptr ) != lastSet.end ( ) ) { - std::set < regexp::UnboundedRegExpSymbol const * > firstSet = first ( & node->getElement ( ) ); - ret.insert ( firstSet.begin ( ), firstSet.end ( ) ); - } - - return ret; -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::follow ( regexp::UnboundedRegExpSymbol const * const & /* node */, regexp::UnboundedRegExpSymbol const * const & /* symbolptr */ ) { - return std::set < regexp::UnboundedRegExpSymbol const * > ( ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::follow ( regexp::UnboundedRegExpEmpty const * const & /* node */, regexp::UnboundedRegExpSymbol const * const & /* symbolptr */ ) { - return std::set < regexp::UnboundedRegExpSymbol const * > ( ); -} - -std::set < regexp::UnboundedRegExpSymbol const * > GlushkovTraversal::follow ( regexp::UnboundedRegExpEpsilon const * const & /* node */, regexp::UnboundedRegExpSymbol const * const & /* symbolptr */ ) { - return std::set < regexp::UnboundedRegExpSymbol const * > ( ); -} - -// ---------------------------------------------------------------------------- - -bool GlushkovTraversal::pos ( regexp::UnboundedRegExpElement const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - regexp::UnboundedRegExpAlternation const * const alternation = dynamic_cast < regexp::UnboundedRegExpAlternation const * const > ( node ); - regexp::UnboundedRegExpConcatenation const * const concatenation = dynamic_cast < regexp::UnboundedRegExpConcatenation const * const > ( node ); - regexp::UnboundedRegExpIteration const * const iteration = dynamic_cast < regexp::UnboundedRegExpIteration const * const > ( node ); - regexp::UnboundedRegExpSymbol const * const symbol = dynamic_cast < regexp::UnboundedRegExpSymbol const * const > ( node ); - regexp::UnboundedRegExpEmpty const * const empty = dynamic_cast < regexp::UnboundedRegExpEmpty const * const > ( node ); - regexp::UnboundedRegExpEpsilon const * const eps = dynamic_cast < regexp::UnboundedRegExpEpsilon const * const > ( node ); - - if ( alternation ) - return pos ( alternation, symbolptr ); - - else if ( concatenation ) - return pos ( concatenation, symbolptr ); - - else if ( iteration ) - return pos ( iteration, symbolptr ); - - else if ( symbol ) - return pos ( symbol, symbolptr ); - - else if ( empty ) - return pos ( empty, symbolptr ); - - else if ( eps ) - return pos ( eps, symbolptr ); - - throw exception::CommonException ( "GlushkovTraversal::pos() - unknown RegExpElement node" ); -} - -bool GlushkovTraversal::pos ( regexp::UnboundedRegExpAlternation const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - for ( auto const & element : node->getElements ( ) ) - if ( pos ( element, symbolptr ) ) - return true; - - return false; -} - -bool GlushkovTraversal::pos ( regexp::UnboundedRegExpConcatenation const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - for ( auto const & element : node->getElements ( ) ) - if ( pos ( element, symbolptr ) ) - return true; - - return false; -} - -bool GlushkovTraversal::pos ( regexp::UnboundedRegExpIteration const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - return pos ( & node->getElement ( ), symbolptr ); -} - -bool GlushkovTraversal::pos ( regexp::UnboundedRegExpSymbol const * const & node, regexp::UnboundedRegExpSymbol const * const & symbolptr ) { - return symbolptr == node; -} - -bool GlushkovTraversal::pos ( regexp::UnboundedRegExpEmpty const * const & /* node */, regexp::UnboundedRegExpSymbol const * const & /* symbolptr */ ) { - return false; -} - -bool GlushkovTraversal::pos ( regexp::UnboundedRegExpEpsilon const * const & /* node */, regexp::UnboundedRegExpSymbol const * const & /* symbolptr */ ) { - return false; -} - -// ---------------------------------------------------------------------------- - -std::set < GlushkovSymbol > GlushkovTraversal::getSymbols ( regexp::UnboundedRegExp const & re ) { - std::set < GlushkovSymbol > alphabet; - int i = 1; - - getSymbols ( & re.getRegExp ( ), alphabet, i ); - - return alphabet; -} - -void GlushkovTraversal::getSymbols ( regexp::UnboundedRegExpElement const * const & node, std::set < GlushkovSymbol > & alphabet, int & i ) { - regexp::UnboundedRegExpAlternation const * const alternation = dynamic_cast < regexp::UnboundedRegExpAlternation const * const > ( node ); - regexp::UnboundedRegExpConcatenation const * const concatenation = dynamic_cast < regexp::UnboundedRegExpConcatenation const * const > ( node ); - regexp::UnboundedRegExpIteration const * const iteration = dynamic_cast < regexp::UnboundedRegExpIteration const * const > ( node ); - regexp::UnboundedRegExpSymbol const * const symbol = dynamic_cast < regexp::UnboundedRegExpSymbol const * const > ( node ); - regexp::UnboundedRegExpEmpty const * const empty = dynamic_cast < regexp::UnboundedRegExpEmpty const * const > ( node ); - regexp::UnboundedRegExpEpsilon const * const eps = dynamic_cast < regexp::UnboundedRegExpEpsilon const * const > ( node ); - - if ( symbol ) { - alphabet.insert ( GlushkovSymbol ( symbol, i++ ) ); - return; - } else if ( alternation ) { - for ( const auto & element : alternation->getElements ( ) ) - getSymbols ( element, alphabet, i ); - - return; - } else if ( concatenation ) { - for ( const auto & element : concatenation->getElements ( ) ) - getSymbols ( element, alphabet, i ); - - return; - } else if ( iteration ) { - getSymbols ( & iteration->getElement ( ), alphabet, i ); - return; - } else if ( empty ) { - return; - } else if ( eps ) { - return; - } - - throw exception::CommonException ( "GlushkovTraversal::getSymbols() - unknown RegExpElement node" ); -} - -} /* namespace conversions */ diff --git a/alib2algo/src/regexp/GlushkovTraversal.h b/alib2algo/src/regexp/GlushkovTraversal.h deleted file mode 100644 index 7850f3927c..0000000000 --- a/alib2algo/src/regexp/GlushkovTraversal.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * GlushkovTraversal.h - * - * Created on: 13. 3. 2014 - * Author: Tomas Pecka - */ - -#ifndef GLUSHKOVTRAVERSAL_H_ -#define GLUSHKOVTRAVERSAL_H_ - -#include <set> -#include <list> -#include <algorithm> - -#include <regexp/unbounded/UnboundedRegExp.h> -#include <regexp/unbounded/UnboundedRegExpElement.h> -#include <regexp/unbounded/UnboundedRegExpAlternation.h> -#include <regexp/unbounded/UnboundedRegExpConcatenation.h> -#include <regexp/unbounded/UnboundedRegExpIteration.h> -#include <regexp/unbounded/UnboundedRegExpSymbol.h> -#include <regexp/unbounded/UnboundedRegExpEmpty.h> -#include <regexp/unbounded/UnboundedRegExpEpsilon.h> - -#include "GlushkovSymbol.h" -#include "GlushkovPair.h" - -namespace regexp { - -/** - * RegExp tree traversal utils for Glushkov algorithm. - * - * Thanks to http://www.sciencedirect.com/science/article/pii/S030439759700296X for better follow() solution. - */ -class GlushkovTraversal { -public: - /** - * @param re RegExp to probe - * @return all RegExpSymbols whichcan start the word. - */ - static std::set < GlushkovSymbol > first ( regexp::UnboundedRegExp const & re ); - - /** - * @param re RegExp to probe - * @return all RegExpSymbols that can terminate the word. - */ - static std::set < GlushkovSymbol > last ( regexp::UnboundedRegExp const & re ); - - /** - * @param re RegExp to probe - * @param symbol GlushkovSymbol for which we need the follow() - * @return all symbols that can follow specific symbol in word - */ - static std::set < GlushkovSymbol > follow ( regexp::UnboundedRegExp const & re, GlushkovSymbol const & symbol ); - - /** - * @param re RegExp to probe - * @return symbols of regexp tree in order of they occurence in regexp. - */ - static std::set < GlushkovSymbol > getSymbols ( regexp::UnboundedRegExp const & re ); - -private: - /** - * @param symbol ptr to symbol - * @param symbolSet set of gl.symbols - * @return GlushkovSymbol equivalent for RegExpSymbol pointer - */ - static GlushkovSymbol const & findSymbol ( regexp::UnboundedRegExpSymbol const * const symbol, const std::set < GlushkovSymbol > & symbolSet ); - - /** - * @return bool true if symbol pointer is in this subtree - */ - static bool pos ( GlushkovSymbol const & symbol, regexp::UnboundedRegExp const * const & node ); - - static void getSymbols ( regexp::UnboundedRegExpElement const * const & node, std::set < GlushkovSymbol > & alphabet, int & i ); - - static std::set < regexp::UnboundedRegExpSymbol const * > first ( regexp::UnboundedRegExpElement const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > first ( regexp::UnboundedRegExpAlternation const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > first ( regexp::UnboundedRegExpConcatenation const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > first ( regexp::UnboundedRegExpIteration const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > first ( regexp::UnboundedRegExpSymbol const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > first ( regexp::UnboundedRegExpEmpty const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > first ( regexp::UnboundedRegExpEpsilon const * const & node ); - - static std::set < regexp::UnboundedRegExpSymbol const * > last ( regexp::UnboundedRegExpElement const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > last ( regexp::UnboundedRegExpAlternation const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > last ( regexp::UnboundedRegExpConcatenation const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > last ( regexp::UnboundedRegExpIteration const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > last ( regexp::UnboundedRegExpSymbol const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > last ( regexp::UnboundedRegExpEmpty const * const & node ); - static std::set < regexp::UnboundedRegExpSymbol const * > last ( regexp::UnboundedRegExpEpsilon const * const & node ); - - static bool pos ( regexp::UnboundedRegExpElement const * const & node, regexp::UnboundedRegExpSymbol const * const & symbSearch ); - static bool pos ( regexp::UnboundedRegExpAlternation const * const & node, regexp::UnboundedRegExpSymbol const * const & symbSearch ); - static bool pos ( regexp::UnboundedRegExpConcatenation const * const & node, regexp::UnboundedRegExpSymbol const * const & symbSearch ); - static bool pos ( regexp::UnboundedRegExpIteration const * const & node, regexp::UnboundedRegExpSymbol const * const & symbSearch ); - static bool pos ( regexp::UnboundedRegExpSymbol const * const & node, regexp::UnboundedRegExpSymbol const * const & symbSearch ); - static bool pos ( regexp::UnboundedRegExpEmpty const * const & node, regexp::UnboundedRegExpSymbol const * const & symbSearch ); - static bool pos ( regexp::UnboundedRegExpEpsilon const * const & node, regexp::UnboundedRegExpSymbol const * const & symbSearch ); - - static std::set < regexp::UnboundedRegExpSymbol const * > follow ( regexp::UnboundedRegExpElement const * const & node, regexp::UnboundedRegExpSymbol const * const & symbFollow ); - static std::set < regexp::UnboundedRegExpSymbol const * > follow ( regexp::UnboundedRegExpAlternation const * const & node, regexp::UnboundedRegExpSymbol const * const & symbFollow ); - static std::set < regexp::UnboundedRegExpSymbol const * > follow ( regexp::UnboundedRegExpConcatenation const * const & node, regexp::UnboundedRegExpSymbol const * const & symbFollow ); - static std::set < regexp::UnboundedRegExpSymbol const * > follow ( regexp::UnboundedRegExpIteration const * const & node, regexp::UnboundedRegExpSymbol const * const & symbFollow ); - static std::set < regexp::UnboundedRegExpSymbol const * > follow ( regexp::UnboundedRegExpSymbol const * const & node, regexp::UnboundedRegExpSymbol const * const & symbFollow ); - static std::set < regexp::UnboundedRegExpSymbol const * > follow ( regexp::UnboundedRegExpEmpty const * const & node, regexp::UnboundedRegExpSymbol const * const & symbFollow ); - static std::set < regexp::UnboundedRegExpSymbol const * > follow ( regexp::UnboundedRegExpEpsilon const * const & node, regexp::UnboundedRegExpSymbol const * const & symbFollow ); -}; - -} /* namespace conversions */ - -#endif /* GLUSHKOVTRAVERSAL_H_ */ diff --git a/alib2algo/src/regexp/convert/ToAutomatonGlushkov.cpp b/alib2algo/src/regexp/convert/ToAutomatonGlushkov.cpp index 9cf17b3d95..3f29833ae0 100644 --- a/alib2algo/src/regexp/convert/ToAutomatonGlushkov.cpp +++ b/alib2algo/src/regexp/convert/ToAutomatonGlushkov.cpp @@ -14,9 +14,7 @@ #include "object/Object.h" -#include "../GlushkovTraversal.h" -#include "../GlushkovPair.h" -#include "../GlushkovSymbol.h" +#include "../glushkov/GlushkovTraversal.h" #include "../properties/RegExpEpsilon.h" #include <exception/CommonException.h> @@ -36,44 +34,32 @@ automaton::NFA ToAutomatonGlushkov::convert(const regexp::UnboundedRegExp& regex // step 1 automaton.setInputAlphabet(regexp.getAlphabet()); + regexp::UnboundedRegExp indexedRegExp = regexp::GlushkovTraversal::indexate(regexp); // steps 2, 3, 4 - std::set<regexp::GlushkovPair> pairs; - const std::set<regexp::GlushkovSymbol> first = regexp::GlushkovTraversal::first(regexp); - const std::set<regexp::GlushkovSymbol> last = regexp::GlushkovTraversal::last(regexp); - for( auto const& x : regexp::GlushkovTraversal::getSymbols( regexp ) ) - for( auto const& f : regexp::GlushkovTraversal::follow( regexp, x ) ) { - pairs.insert( regexp::GlushkovPair( x, f ) ); - } + const std::set<regexp::UnboundedRegExpSymbol> first = regexp::GlushkovTraversal::first(indexedRegExp); + const std::set<regexp::UnboundedRegExpSymbol> last = regexp::GlushkovTraversal::last(indexedRegExp); // \e in q0 check is in step 7 // step 5 - std::map<regexp::GlushkovSymbol, automaton::State> stateMap; - - for( auto const& symbol : regexp::GlushkovTraversal::getSymbols( regexp ) ) { - automaton::State q( label::Label( label::LabelPairLabel( std::make_pair( label::Label( label::ObjectLabel( alib::Object( symbol.getInputSymbol( ).getData() ) ) ), label::labelFrom( symbol.getId( ) ) ) ) ) ); - - stateMap.insert( std::make_pair( symbol, q ) ); - automaton.addState( q ); - } + for( auto const& symbol : indexedRegExp.getAlphabet()) + automaton.addState( automaton::State ( label::Label( label::ObjectLabel( alib::Object( symbol.getData() ) ) ) ) ); // step 6 for( auto const& symbol : first ) { - const automaton::State & q = stateMap.find( symbol )->second; - - automaton.addTransition( q0, symbol.getInputSymbol( ), q ); + automaton.addTransition( q0, regexp::GlushkovTraversal::getSymbolFromGlushkovPair(symbol.getSymbol()), automaton::State ( label::Label( label::ObjectLabel( alib::Object( symbol.getSymbol( ).getData() ) ) ) ) ); } - for( auto const& pair : pairs ) { - const automaton::State & p = stateMap.find( pair.getFirst( ) )->second; - const automaton::State & q = stateMap.find( pair.getSecond( ) )->second; + for(const auto& x : indexedRegExp.getAlphabet()) + for(const auto& f : regexp::GlushkovTraversal::follow(indexedRegExp, UnboundedRegExpSymbol ( x ))) { + const alphabet::Symbol& p = x; + const alphabet::Symbol& q = f.getSymbol(); - automaton.addTransition( p, pair.getSecond( ).getInputSymbol( ), q ); - } + automaton.addTransition( automaton::State ( label::Label( label::ObjectLabel( alib::Object( p.getData( ) ) ) ) ), regexp::GlushkovTraversal::getSymbolFromGlushkovPair(q), automaton::State ( label::Label( label::ObjectLabel( alib::Object( q.getData( ) ) ) ) ) ); + } // step 7 for( auto const& symbol : last ) { - const automaton::State & q = stateMap.find( symbol )->second; - automaton.addFinalState( q ); + automaton.addFinalState( automaton::State ( label::Label( label::ObjectLabel( alib::Object( symbol.getSymbol( ).getData() ) ) ) ) ); } if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(regexp)) diff --git a/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp b/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp index d0db89f812..704eb641a5 100644 --- a/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp +++ b/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp @@ -16,9 +16,7 @@ #include <object/Object.h> -#include "../GlushkovTraversal.h" -#include "../GlushkovPair.h" -#include "../GlushkovSymbol.h" +#include "../glushkov/GlushkovTraversal.h" #include "../properties/RegExpEpsilon.h" #include <exception/CommonException.h> @@ -38,48 +36,44 @@ grammar::RightRG ToGrammarRightRGGlushkov::convert(const regexp::UnboundedRegExp // step 1 grammar.setTerminalAlphabet(regexp.getAlphabet()); + regexp::UnboundedRegExp indexedRegExp = regexp::GlushkovTraversal::indexate(regexp); // steps 2, 3, 4 - std::set<regexp::GlushkovPair> pairs; - const std::set<regexp::GlushkovSymbol> first = regexp::GlushkovTraversal::first(regexp); - const std::set<regexp::GlushkovSymbol> last = regexp::GlushkovTraversal::last(regexp); - for(const auto& x : regexp::GlushkovTraversal::getSymbols(regexp)) - for(const auto& f : regexp::GlushkovTraversal::follow(regexp, x)) - pairs.insert(regexp::GlushkovPair(x, f)); + const std::set<regexp::UnboundedRegExpSymbol> first = regexp::GlushkovTraversal::first(indexedRegExp); + const std::set<regexp::UnboundedRegExpSymbol> last = regexp::GlushkovTraversal::last(indexedRegExp); // \e in q0 check is in step 7 // step 5 - std::map<regexp::GlushkovSymbol, alphabet::Symbol> symbolMap; - - for(const auto& symbol : regexp::GlushkovTraversal::getSymbols(regexp)) { - alphabet::Symbol nt(alphabet::LabeledSymbol(label::Label(label::LabelPairLabel(std::make_pair(label::Label( label::ObjectLabel( alib::Object( symbol.getInputSymbol( ).getData() ) ) ), label::labelFrom(symbol.getId())))))); - symbolMap.insert(std::make_pair(symbol, nt)); - grammar.addNonterminalSymbol(nt); - } + for(const auto & symbol : indexedRegExp.getAlphabet()) + grammar.addNonterminalSymbol (symbol); // step 6 for(const auto& symbol : first) - grammar.addRule(S, std::make_pair(symbol.getInputSymbol(), symbolMap.find(symbol)->second)); + grammar.addRule(S, std::make_pair(regexp::GlushkovTraversal::getSymbolFromGlushkovPair(symbol.getSymbol()), symbol.getSymbol())); - for(const auto& pair : pairs) { - const alphabet::Symbol& a = symbolMap.find(pair.getFirst())->second; - const alphabet::Symbol& b = symbolMap.find(pair.getSecond())->second; - grammar.addRule(a, std::make_pair(pair.getSecond().getInputSymbol(), b)); - } + for(const auto& x : indexedRegExp.getAlphabet()) + for(const auto& f : regexp::GlushkovTraversal::follow(indexedRegExp, UnboundedRegExpSymbol ( x ))) { + const alphabet::Symbol& a = x; + const alphabet::Symbol& b = f.getSymbol(); + + grammar.addRule(a, std::make_pair(regexp::GlushkovTraversal::getSymbolFromGlushkovPair(b), b)); + } // step 7 - for(const auto& symbol : last) { - /* - * for all rules where ns.m_nonTerminal is on rightSide: - * add Rule: leftSide -> symbol.getSymbol( ) - * unless it already exists - */ + /* + * for all rules where ns.m_nonTerminal is on rightSide: + * add Rule: leftSide -> symbol.getSymbol( ) + * unless it already exists + */ + for(const auto& rule : grammar.getRawRules()) { + for(const auto& rhs : rule.second) { + for(const auto& symbol : last) { - const alphabet::Symbol& a = symbolMap.find(symbol)->second; + const alphabet::Symbol& a = symbol.getSymbol(); - for(const auto& rule : grammar.getRawRules()) - for(const auto& rhs : rule.second) if(std::find(rhs.begin(), rhs.end(), a) != rhs.end()) grammar.addRule(rule.first, rhs.at(0)); + } + } } if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(regexp)) diff --git a/alib2algo/src/regexp/glushkov/GlushkovTraversal.cpp b/alib2algo/src/regexp/glushkov/GlushkovTraversal.cpp new file mode 100644 index 0000000000..a757df8d3d --- /dev/null +++ b/alib2algo/src/regexp/glushkov/GlushkovTraversal.cpp @@ -0,0 +1,368 @@ +/* + * GlushkovTraversal.cpp + * + * Created on: 13. 3. 2014 + * Author: Tomas Pecka + */ + +#include "GlushkovTraversal.h" + +#include "../properties/RegExpEpsilon.h" +#include <alphabet/SymbolPairSymbol.h> + +#include <exception/CommonException.h> + +namespace regexp { + + +alphabet::Symbol GlushkovTraversal::getSymbolFromGlushkovPair(const alphabet::Symbol & symbol) { + const alphabet::SymbolPairSymbol & symbolPair = (const alphabet::SymbolPairSymbol&) symbol.getData(); + return symbolPair.getData().first; +} + +bool GlushkovTraversal::pos ( const UnboundedRegExpSymbol & symbol, const regexp::UnboundedRegExp & node ) { + return pos ( node.getRegExp ( ), symbol ); +} + +std::set < UnboundedRegExpSymbol > GlushkovTraversal::first ( const regexp::UnboundedRegExp & re ) { + return first ( re.getRegExp ( ) ); +} + +std::set < UnboundedRegExpSymbol > GlushkovTraversal::last ( const regexp::UnboundedRegExp & re ) { + return last ( re.getRegExp ( ) ); +} + +std::set < UnboundedRegExpSymbol > GlushkovTraversal::follow ( const regexp::UnboundedRegExp & re, const UnboundedRegExpSymbol & symbol ) { + return follow ( re.getRegExp ( ), symbol ); +} + +// ----------------------------------------------------------------------------- + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::first ( const regexp::UnboundedRegExpElement & node ) { + const regexp::UnboundedRegExpAlternation * alternation = dynamic_cast < const regexp::UnboundedRegExpAlternation * > ( & node ); + const regexp::UnboundedRegExpConcatenation * concatenation = dynamic_cast < const regexp::UnboundedRegExpConcatenation * > ( & node ); + const regexp::UnboundedRegExpIteration * iteration = dynamic_cast < const regexp::UnboundedRegExpIteration * > ( & node ); + const regexp::UnboundedRegExpSymbol * symbol = dynamic_cast < const regexp::UnboundedRegExpSymbol * > ( & node ); + const regexp::UnboundedRegExpEmpty * empty = dynamic_cast < const regexp::UnboundedRegExpEmpty * > ( & node ); + const regexp::UnboundedRegExpEpsilon * eps = dynamic_cast < const regexp::UnboundedRegExpEpsilon * > ( & node ); + + if ( alternation ) + return first ( * alternation ); + else if ( concatenation ) + return first ( * concatenation ); + else if ( iteration ) + return first ( * iteration ); + else if ( eps ) + return first ( * eps ); + else if ( empty ) + return first ( * empty ); + else if ( symbol ) + return first ( * symbol ); + + throw exception::CommonException ( "GlushkovTraversal::first - invalid RegExpElement node" ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::first ( const regexp::UnboundedRegExpAlternation & node ) { + std::set < regexp::UnboundedRegExpSymbol > ret, tmp; + + for ( auto const & element : node.getElements ( ) ) { + tmp = first ( * element ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + } + + return ret; +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::first ( const regexp::UnboundedRegExpConcatenation & node ) { + std::set < regexp::UnboundedRegExpSymbol > ret, tmp; + + for ( auto const & element : node.getElements ( ) ) { + tmp = first ( * element ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + if ( !regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * element ) ) // If regexp of this subtree can match epsilon, then we need to add next subtree + break; + } + + return ret; +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::first ( const regexp::UnboundedRegExpIteration & node ) { + return first ( node.getElement ( ) ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::first ( const regexp::UnboundedRegExpSymbol & node ) { + return std::set < regexp::UnboundedRegExpSymbol > { node }; +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::first ( const regexp::UnboundedRegExpEpsilon & /* node */ ) { + return std::set < regexp::UnboundedRegExpSymbol > ( ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::first ( const regexp::UnboundedRegExpEmpty & /* node */ ) { + return std::set < regexp::UnboundedRegExpSymbol > ( ); +} + +// ---------------------------------------------------------------------------- + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::last ( const regexp::UnboundedRegExpElement & node ) { + const regexp::UnboundedRegExpAlternation * alternation = dynamic_cast < const regexp::UnboundedRegExpAlternation * > ( & node ); + const regexp::UnboundedRegExpConcatenation * concatenation = dynamic_cast < const regexp::UnboundedRegExpConcatenation * > ( & node ); + const regexp::UnboundedRegExpIteration * iteration = dynamic_cast < const regexp::UnboundedRegExpIteration * > ( & node ); + const regexp::UnboundedRegExpSymbol * symbol = dynamic_cast < const regexp::UnboundedRegExpSymbol * > ( & node ); + const regexp::UnboundedRegExpEmpty * empty = dynamic_cast < const regexp::UnboundedRegExpEmpty * > ( & node ); + const regexp::UnboundedRegExpEpsilon * eps = dynamic_cast < const regexp::UnboundedRegExpEpsilon * > ( & node ); + + if ( symbol ) + return last ( * symbol ); + else if ( alternation ) + return last ( * alternation ); + else if ( concatenation ) + return last ( * concatenation ); + else if ( iteration ) + return last ( * iteration ); + else if ( eps ) + return last ( * eps ); + else if ( empty ) + return last ( * empty ); + + throw exception::CommonException ( "GlushkovTraversal::last - invalid RegExpElement node" ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::last ( const regexp::UnboundedRegExpAlternation & node ) { + std::set < regexp::UnboundedRegExpSymbol > ret; + + for ( auto const & element : node.getElements ( ) ) { + std::set < regexp::UnboundedRegExpSymbol > tmp = last ( * element ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + } + + return ret; +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::last ( const regexp::UnboundedRegExpConcatenation & node ) { + std::set < regexp::UnboundedRegExpSymbol > ret; + + for ( auto it = node.getElements ( ).rbegin ( ); it != node.getElements ( ).rend ( ); it++ ) { + std::set < regexp::UnboundedRegExpSymbol > tmp = last ( * * it ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + if ( !regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * * it ) ) + break; + } + + return ret; +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::last ( const regexp::UnboundedRegExpIteration & node ) { + return last ( node.getElement ( ) ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::last ( const regexp::UnboundedRegExpSymbol & node ) { + return std::set < regexp::UnboundedRegExpSymbol > { node }; +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::last ( const regexp::UnboundedRegExpEpsilon & /* node */ ) { + return std::set < regexp::UnboundedRegExpSymbol > ( ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::last ( const regexp::UnboundedRegExpEmpty & /* node */ ) { + return std::set < regexp::UnboundedRegExpSymbol > ( ); +} + +// ---------------------------------------------------------------------------- + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::follow ( const regexp::UnboundedRegExpElement & node, const regexp::UnboundedRegExpSymbol & symbolptr ) { + const regexp::UnboundedRegExpAlternation * alternation = dynamic_cast < const regexp::UnboundedRegExpAlternation * > ( & node ); + const regexp::UnboundedRegExpConcatenation * concatenation = dynamic_cast < const regexp::UnboundedRegExpConcatenation * > ( & node ); + const regexp::UnboundedRegExpIteration * iteration = dynamic_cast < const regexp::UnboundedRegExpIteration * > ( & node ); + const regexp::UnboundedRegExpSymbol * symbol = dynamic_cast < const regexp::UnboundedRegExpSymbol * > ( & node ); + const regexp::UnboundedRegExpEmpty * empty = dynamic_cast < const regexp::UnboundedRegExpEmpty * > ( & node ); + const regexp::UnboundedRegExpEpsilon * eps = dynamic_cast < const regexp::UnboundedRegExpEpsilon * > ( & node ); + + if ( alternation ) + return follow ( * alternation, symbolptr ); + + else if ( concatenation ) + return follow ( * concatenation, symbolptr ); + + else if ( iteration ) + return follow ( * iteration, symbolptr ); + + else if ( symbol ) + return follow ( * symbol, symbolptr ); + + else if ( empty ) + return follow ( * empty, symbolptr ); + + else if ( eps ) + return follow ( * eps, symbolptr ); + + throw exception::CommonException ( "GlushkovTraversal::follow() - unknown RegExpElement node" ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::follow ( const regexp::UnboundedRegExpAlternation & node, const regexp::UnboundedRegExpSymbol & symbolptr ) { + for ( auto const & element : node.getElements ( ) ) + if ( pos ( * element, symbolptr ) ) + return follow ( * element, symbolptr ); + + throw exception::CommonException ( "GlushkovTraversal::follow(Alt)" ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::follow ( const regexp::UnboundedRegExpConcatenation & node, const regexp::UnboundedRegExpSymbol & symbolptr ) { + std::set < regexp::UnboundedRegExpSymbol > ret, tmp, lastSet; + + for ( auto e = node.getElements ( ).begin ( ); e != node.getElements ( ).end ( ); e++ ) { + if ( !pos ( * * e, symbolptr ) ) + continue; + + tmp = follow ( * * e, symbolptr ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + lastSet = last ( * * e ); + + if ( lastSet.find ( symbolptr ) != lastSet.end ( ) ) + for ( auto f = next ( e ); f != node.getElements ( ).end ( ); f++ ) { + tmp = first ( * * f ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + if ( !regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * * f ) ) + break; + } + + } + + return ret; +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::follow ( const regexp::UnboundedRegExpIteration & node, const regexp::UnboundedRegExpSymbol & symbolptr ) { + std::set < regexp::UnboundedRegExpSymbol > ret = follow ( node.getElement ( ), symbolptr ); + std::set < regexp::UnboundedRegExpSymbol > lastSet = last ( node.getElement ( ) ); + + if ( lastSet.find ( symbolptr ) != lastSet.end ( ) ) { + std::set < regexp::UnboundedRegExpSymbol > firstSet = first ( node.getElement ( ) ); + ret.insert ( firstSet.begin ( ), firstSet.end ( ) ); + } + + return ret; +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::follow ( const regexp::UnboundedRegExpSymbol & /* node */, const regexp::UnboundedRegExpSymbol & /* symbolptr */ ) { + return std::set < regexp::UnboundedRegExpSymbol > ( ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::follow ( const regexp::UnboundedRegExpEmpty & /* node */, const regexp::UnboundedRegExpSymbol & /* symbolptr */ ) { + return std::set < regexp::UnboundedRegExpSymbol > ( ); +} + +std::set < regexp::UnboundedRegExpSymbol > GlushkovTraversal::follow ( const regexp::UnboundedRegExpEpsilon & /* node */, const regexp::UnboundedRegExpSymbol & /* symbolptr */ ) { + return std::set < regexp::UnboundedRegExpSymbol > ( ); +} + +// ---------------------------------------------------------------------------- + +bool GlushkovTraversal::pos ( const regexp::UnboundedRegExpElement & node, const regexp::UnboundedRegExpSymbol & symbolptr ) { + const regexp::UnboundedRegExpAlternation * alternation = dynamic_cast < const regexp::UnboundedRegExpAlternation * > ( & node ); + const regexp::UnboundedRegExpConcatenation * concatenation = dynamic_cast < const regexp::UnboundedRegExpConcatenation * > ( & node ); + const regexp::UnboundedRegExpIteration * iteration = dynamic_cast < const regexp::UnboundedRegExpIteration * > ( & node ); + const regexp::UnboundedRegExpSymbol * symbol = dynamic_cast < const regexp::UnboundedRegExpSymbol * > ( & node ); + const regexp::UnboundedRegExpEmpty * empty = dynamic_cast < const regexp::UnboundedRegExpEmpty * > ( & node ); + const regexp::UnboundedRegExpEpsilon * eps = dynamic_cast < const regexp::UnboundedRegExpEpsilon * > ( & node ); + + if ( alternation ) + return pos ( * alternation, symbolptr ); + + else if ( concatenation ) + return pos ( * concatenation, symbolptr ); + + else if ( iteration ) + return pos ( * iteration, symbolptr ); + + else if ( symbol ) + return pos ( * symbol, symbolptr ); + + else if ( empty ) + return pos ( * empty, symbolptr ); + + else if ( eps ) + return pos ( * eps, symbolptr ); + + throw exception::CommonException ( "GlushkovTraversal::pos() - unknown RegExpElement node" ); +} + +bool GlushkovTraversal::pos ( const regexp::UnboundedRegExpAlternation & node, const regexp::UnboundedRegExpSymbol & symbol ) { + for ( const auto & element : node.getElements ( ) ) + if ( pos ( * element, symbol ) ) + return true; + + return false; +} + +bool GlushkovTraversal::pos ( const regexp::UnboundedRegExpConcatenation & node, const regexp::UnboundedRegExpSymbol & symbol ) { + for ( const auto & element : node.getElements ( ) ) + if ( pos ( * element, symbol ) ) + return true; + + return false; +} + +bool GlushkovTraversal::pos ( const regexp::UnboundedRegExpIteration & node, const regexp::UnboundedRegExpSymbol & symbol ) { + return pos ( node.getElement ( ), symbol ); +} + +bool GlushkovTraversal::pos ( const regexp::UnboundedRegExpSymbol & node, const regexp::UnboundedRegExpSymbol & symbol ) { + return symbol == node; +} + +bool GlushkovTraversal::pos ( const regexp::UnboundedRegExpEmpty & /* node */, const regexp::UnboundedRegExpSymbol & /* symbol */ ) { + return false; +} + +bool GlushkovTraversal::pos ( const regexp::UnboundedRegExpEpsilon & /* node */, const regexp::UnboundedRegExpSymbol & /* symbol */ ) { + return false; +} + +// ---------------------------------------------------------------------------- + +UnboundedRegExp GlushkovTraversal::indexate ( const regexp::UnboundedRegExp & re ) { + int i = 1; + + return UnboundedRegExp ( indexate ( re.getRegExp ( ), i ) ); +} + +std::rvalue_ref < UnboundedRegExpElement > GlushkovTraversal::indexate ( const regexp::UnboundedRegExpElement & node, int & i ) { + const regexp::UnboundedRegExpAlternation * alternation = dynamic_cast < const regexp::UnboundedRegExpAlternation * > ( & node ); + const regexp::UnboundedRegExpConcatenation * concatenation = dynamic_cast < const regexp::UnboundedRegExpConcatenation * > ( & node ); + const regexp::UnboundedRegExpIteration * iteration = dynamic_cast < const regexp::UnboundedRegExpIteration * > ( & node ); + const regexp::UnboundedRegExpSymbol * symbol = dynamic_cast < const regexp::UnboundedRegExpSymbol * > ( & node ); + const regexp::UnboundedRegExpEmpty * empty = dynamic_cast < const regexp::UnboundedRegExpEmpty * > ( & node ); + const regexp::UnboundedRegExpEpsilon * eps = dynamic_cast < const regexp::UnboundedRegExpEpsilon * > ( & node ); + + if ( symbol ) { + return std::rvalue_ref < UnboundedRegExpElement > ( new UnboundedRegExpSymbol ( alphabet::Symbol ( alphabet::SymbolPairSymbol ( std::make_pair ( symbol->getSymbol ( ), alphabet::symbolFrom ( i++ ) ) ) ) ) ); + } else if ( alternation ) { + UnboundedRegExpAlternation * alt = new UnboundedRegExpAlternation(); + + for ( const auto & element : alternation->getElements ( ) ) + alt->appendElement ( indexate ( * element, i ) ); + + return std::rvalue_ref < UnboundedRegExpElement > ( alt ); + } else if ( concatenation ) { + UnboundedRegExpConcatenation * con = new UnboundedRegExpConcatenation(); + + for ( const auto & element : concatenation->getElements ( ) ) + con->appendElement ( indexate ( * element, i ) ); + + return std::rvalue_ref < UnboundedRegExpElement > ( con ); + } else if ( iteration ) { + return std::rvalue_ref < UnboundedRegExpElement > ( new UnboundedRegExpIteration ( indexate ( iteration->getElement ( ), i ) ) ); + } else if ( empty ) { + return std::rvalue_ref < UnboundedRegExpElement > ( new UnboundedRegExpEmpty ( ) ); + } else if ( eps ) { + return std::rvalue_ref < UnboundedRegExpElement > ( new UnboundedRegExpEpsilon ( ) ); + } else { + throw exception::CommonException ( "GlushkovTraversal::getSymbols() - unknown RegExpElement node" ); + } +} + +} /* namespace conversions */ diff --git a/alib2algo/src/regexp/glushkov/GlushkovTraversal.h b/alib2algo/src/regexp/glushkov/GlushkovTraversal.h new file mode 100644 index 0000000000..b169058d1e --- /dev/null +++ b/alib2algo/src/regexp/glushkov/GlushkovTraversal.h @@ -0,0 +1,104 @@ +/* + * GlushkovTraversal.h + * + * Created on: 13. 3. 2014 + * Author: Tomas Pecka + */ + +#ifndef GLUSHKOVTRAVERSAL_H_ +#define GLUSHKOVTRAVERSAL_H_ + +#include <set> +#include <list> +#include <algorithm> + +#include <regexp/unbounded/UnboundedRegExp.h> +#include <regexp/unbounded/UnboundedRegExpElement.h> +#include <regexp/unbounded/UnboundedRegExpAlternation.h> +#include <regexp/unbounded/UnboundedRegExpConcatenation.h> +#include <regexp/unbounded/UnboundedRegExpIteration.h> +#include <regexp/unbounded/UnboundedRegExpSymbol.h> +#include <regexp/unbounded/UnboundedRegExpEmpty.h> +#include <regexp/unbounded/UnboundedRegExpEpsilon.h> + +namespace regexp { + +/** + * RegExp tree traversal utils for Glushkov algorithm. + * + * Thanks to http://www.sciencedirect.com/science/article/pii/S030439759700296X for better follow() solution. + */ +class GlushkovTraversal { +public: + static alphabet::Symbol getSymbolFromGlushkovPair(const alphabet::Symbol & symbol); + + /** + * @param re RegExp to probe + * @return all RegExpSymbols whichcan start the word. + */ + static std::set < UnboundedRegExpSymbol > first ( const regexp::UnboundedRegExp & re ); + + /** + * @param re RegExp to probe + * @return all RegExpSymbols that can terminate the word. + */ + static std::set < UnboundedRegExpSymbol > last ( const regexp::UnboundedRegExp & re ); + + /** + * @param re RegExp to probe + * @param symbol GlushkovSymbol for which we need the follow() + * @return all symbols that can follow specific symbol in word + */ + static std::set < UnboundedRegExpSymbol > follow ( const regexp::UnboundedRegExp & re, const UnboundedRegExpSymbol & symbol ); + + /** + * @param re RegExp to probe + * @return symbols of regexp tree in order of they occurence in regexp. + */ + static regexp::UnboundedRegExp indexate ( const regexp::UnboundedRegExp & re ); + +private: + + /** + * @return bool true if symbol pointer is in this subtree + */ + static bool pos ( const UnboundedRegExpSymbol & symbol, const regexp::UnboundedRegExp & node ); + + static std::rvalue_ref < UnboundedRegExpElement > indexate ( const regexp::UnboundedRegExpElement & node, int & i ); + + static std::set < regexp::UnboundedRegExpSymbol > first ( const regexp::UnboundedRegExpElement & node ); + static std::set < regexp::UnboundedRegExpSymbol > first ( const regexp::UnboundedRegExpAlternation & node ); + static std::set < regexp::UnboundedRegExpSymbol > first ( const regexp::UnboundedRegExpConcatenation & node ); + static std::set < regexp::UnboundedRegExpSymbol > first ( const regexp::UnboundedRegExpIteration & node ); + static std::set < regexp::UnboundedRegExpSymbol > first ( const regexp::UnboundedRegExpSymbol & node ); + static std::set < regexp::UnboundedRegExpSymbol > first ( const regexp::UnboundedRegExpEmpty & node ); + static std::set < regexp::UnboundedRegExpSymbol > first ( const regexp::UnboundedRegExpEpsilon & node ); + + static std::set < regexp::UnboundedRegExpSymbol > last ( const regexp::UnboundedRegExpElement & node ); + static std::set < regexp::UnboundedRegExpSymbol > last ( const regexp::UnboundedRegExpAlternation & node ); + static std::set < regexp::UnboundedRegExpSymbol > last ( const regexp::UnboundedRegExpConcatenation & node ); + static std::set < regexp::UnboundedRegExpSymbol > last ( const regexp::UnboundedRegExpIteration & node ); + static std::set < regexp::UnboundedRegExpSymbol > last ( const regexp::UnboundedRegExpSymbol & node ); + static std::set < regexp::UnboundedRegExpSymbol > last ( const regexp::UnboundedRegExpEmpty & node ); + static std::set < regexp::UnboundedRegExpSymbol > last ( const regexp::UnboundedRegExpEpsilon & node ); + + static bool pos ( const regexp::UnboundedRegExpElement & node, const regexp::UnboundedRegExpSymbol & symbSearch ); + static bool pos ( const regexp::UnboundedRegExpAlternation & node, const regexp::UnboundedRegExpSymbol & symbSearch ); + static bool pos ( const regexp::UnboundedRegExpConcatenation & node, const regexp::UnboundedRegExpSymbol & symbSearch ); + static bool pos ( const regexp::UnboundedRegExpIteration & node, const regexp::UnboundedRegExpSymbol & symbSearch ); + static bool pos ( const regexp::UnboundedRegExpSymbol & node, const regexp::UnboundedRegExpSymbol & symbSearch ); + static bool pos ( const regexp::UnboundedRegExpEmpty & node, const regexp::UnboundedRegExpSymbol & symbSearch ); + static bool pos ( const regexp::UnboundedRegExpEpsilon & node, const regexp::UnboundedRegExpSymbol & symbSearch ); + + static std::set < regexp::UnboundedRegExpSymbol > follow ( const regexp::UnboundedRegExpElement & node, const regexp::UnboundedRegExpSymbol & symbFollow ); + static std::set < regexp::UnboundedRegExpSymbol > follow ( const regexp::UnboundedRegExpAlternation & node, const regexp::UnboundedRegExpSymbol & symbFollow ); + static std::set < regexp::UnboundedRegExpSymbol > follow ( const regexp::UnboundedRegExpConcatenation & node, const regexp::UnboundedRegExpSymbol & symbFollow ); + static std::set < regexp::UnboundedRegExpSymbol > follow ( const regexp::UnboundedRegExpIteration & node, const regexp::UnboundedRegExpSymbol & symbFollow ); + static std::set < regexp::UnboundedRegExpSymbol > follow ( const regexp::UnboundedRegExpSymbol & node, const regexp::UnboundedRegExpSymbol & symbFollow ); + static std::set < regexp::UnboundedRegExpSymbol > follow ( const regexp::UnboundedRegExpEmpty & node, const regexp::UnboundedRegExpSymbol & symbFollow ); + static std::set < regexp::UnboundedRegExpSymbol > follow ( const regexp::UnboundedRegExpEpsilon & node, const regexp::UnboundedRegExpSymbol & symbFollow ); +}; + +} /* namespace conversions */ + +#endif /* GLUSHKOVTRAVERSAL_H_ */ diff --git a/alib2algo/test-src/regexp/RegExpTest.cpp b/alib2algo/test-src/regexp/RegExpTest.cpp index 9f49e0ee4a..962ca5d8f7 100644 --- a/alib2algo/test-src/regexp/RegExpTest.cpp +++ b/alib2algo/test-src/regexp/RegExpTest.cpp @@ -7,7 +7,7 @@ #include "regexp/unbounded/UnboundedRegExp.h" #include "regexp/RegExpFromStringParser.h" -#include "regexp/GlushkovTraversal.h" +#include "regexp/glushkov/GlushkovTraversal.h" #include <factory/StringDataFactory.hpp> @@ -27,16 +27,18 @@ void RegExpTest::testFirst() { { std::string input = "#E* #0*"; regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); + regexp::UnboundedRegExp indexedRegExp = regexp::GlushkovTraversal::indexate(regexp); - std::set<regexp::GlushkovSymbol> first = regexp::GlushkovTraversal::first(regexp); + std::set<regexp::UnboundedRegExpSymbol> first = regexp::GlushkovTraversal::first(indexedRegExp); CPPUNIT_ASSERT(first.size() == 0); } { std::string input = "#E* a"; regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); + regexp::UnboundedRegExp indexedRegExp = regexp::GlushkovTraversal::indexate(regexp); - std::set<regexp::GlushkovSymbol> first = regexp::GlushkovTraversal::first(regexp); + std::set<regexp::UnboundedRegExpSymbol> first = regexp::GlushkovTraversal::first(indexedRegExp); CPPUNIT_ASSERT(first.size() == 1); } @@ -47,17 +49,20 @@ void RegExpTest::testLast() { { std::string input = "a+a"; regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); + regexp::UnboundedRegExp indexedRegExp = regexp::GlushkovTraversal::indexate(regexp); - std::set<regexp::GlushkovSymbol> last = regexp::GlushkovTraversal::last(regexp); + std::set<regexp::UnboundedRegExpSymbol> last = regexp::GlushkovTraversal::last(indexedRegExp); CPPUNIT_ASSERT(last.size() == 2); } { std::string input = "(a+a)b"; regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); + regexp::UnboundedRegExp indexedRegExp = regexp::GlushkovTraversal::indexate(regexp); - std::set<regexp::GlushkovSymbol> last = regexp::GlushkovTraversal::last(regexp); + std::set<regexp::UnboundedRegExpSymbol> last = regexp::GlushkovTraversal::last(indexedRegExp); + std::cout << last << std::endl; CPPUNIT_ASSERT(last.size() == 1); } } @@ -66,52 +71,52 @@ void RegExpTest::testFollow() { { std::string input = "(a+a)b"; regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); + regexp::UnboundedRegExp indexedRegExp = regexp::GlushkovTraversal::indexate(regexp); - std::set<regexp::GlushkovSymbol> symbols = regexp::GlushkovTraversal::getSymbols(regexp); - auto symbolsIter = symbols.begin(); + auto symbolsIter = indexedRegExp.getAlphabet().begin(); - std::set<regexp::GlushkovSymbol> follow1 = regexp::GlushkovTraversal::follow(regexp, *symbolsIter); + std::set<regexp::UnboundedRegExpSymbol> follow1 = regexp::GlushkovTraversal::follow(indexedRegExp, regexp::UnboundedRegExpSymbol ( *symbolsIter) ); CPPUNIT_ASSERT(follow1.size() == 1); symbolsIter++; - std::set<regexp::GlushkovSymbol> follow2 = regexp::GlushkovTraversal::follow(regexp, *symbolsIter); + std::set<regexp::UnboundedRegExpSymbol> follow2 = regexp::GlushkovTraversal::follow(indexedRegExp, regexp::UnboundedRegExpSymbol ( *symbolsIter) ); CPPUNIT_ASSERT(follow2.size() == 1); symbolsIter++; - std::set<regexp::GlushkovSymbol> follow3 = regexp::GlushkovTraversal::follow(regexp, *symbolsIter); + std::set<regexp::UnboundedRegExpSymbol> follow3 = regexp::GlushkovTraversal::follow(indexedRegExp, regexp::UnboundedRegExpSymbol ( *symbolsIter) ); CPPUNIT_ASSERT(follow3.size() == 0); } { std::string input = "a+a* (b+a)* c"; regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); + regexp::UnboundedRegExp indexedRegExp = regexp::GlushkovTraversal::indexate(regexp); - std::set<regexp::GlushkovSymbol> symbols = regexp::GlushkovTraversal::getSymbols(regexp); - auto symbolsIter = symbols.begin(); + auto symbolsIter = indexedRegExp.getAlphabet().begin(); - std::set<regexp::GlushkovSymbol> follow1 = regexp::GlushkovTraversal::follow(regexp, *symbolsIter); + std::set<regexp::UnboundedRegExpSymbol> follow1 = regexp::GlushkovTraversal::follow(indexedRegExp, regexp::UnboundedRegExpSymbol ( *symbolsIter) ); CPPUNIT_ASSERT(follow1.size() == 0); symbolsIter++; - std::set<regexp::GlushkovSymbol> follow2 = regexp::GlushkovTraversal::follow(regexp, *symbolsIter); + std::set<regexp::UnboundedRegExpSymbol> follow2 = regexp::GlushkovTraversal::follow(indexedRegExp, regexp::UnboundedRegExpSymbol ( *symbolsIter) ); CPPUNIT_ASSERT(follow2.size() == 4); symbolsIter++; - std::set<regexp::GlushkovSymbol> follow3 = regexp::GlushkovTraversal::follow(regexp, *symbolsIter); + std::set<regexp::UnboundedRegExpSymbol> follow3 = regexp::GlushkovTraversal::follow(indexedRegExp, regexp::UnboundedRegExpSymbol ( *symbolsIter) ); CPPUNIT_ASSERT(follow3.size() == 3); symbolsIter++; - std::set<regexp::GlushkovSymbol> follow4 = regexp::GlushkovTraversal::follow(regexp, *symbolsIter); + std::set<regexp::UnboundedRegExpSymbol> follow4 = regexp::GlushkovTraversal::follow(indexedRegExp, regexp::UnboundedRegExpSymbol ( *symbolsIter) ); CPPUNIT_ASSERT(follow4.size() == 3); symbolsIter++; - std::set<regexp::GlushkovSymbol> follow5 = regexp::GlushkovTraversal::follow(regexp, *symbolsIter); + std::set<regexp::UnboundedRegExpSymbol> follow5 = regexp::GlushkovTraversal::follow(indexedRegExp, regexp::UnboundedRegExpSymbol ( *symbolsIter) ); CPPUNIT_ASSERT(follow5.size() == 0); } -- GitLab