/* * GlushkovLast.h * * Created on: 13. 3. 2014 * Author: Tomas Pecka */ #ifndef GLUSHKOV_LAST_H_ #define GLUSHKOV_LAST_H_ #include <set> #include <regexp/unbounded/UnboundedRegExp.h> #include <regexp/RegExpFeatures.h> #include <regexp/unbounded/UnboundedRegExpAlternation.h> #include <regexp/unbounded/UnboundedRegExpConcatenation.h> #include <regexp/unbounded/UnboundedRegExpElement.h> #include <regexp/unbounded/UnboundedRegExpEmpty.h> #include <regexp/unbounded/UnboundedRegExpEpsilon.h> #include <regexp/unbounded/UnboundedRegExpIteration.h> #include <regexp/unbounded/UnboundedRegExpSymbol.h> #include <regexp/properties/RegExpEpsilon.h> namespace regexp { /** * RegExp tree traversal utils for Glushkov algorithm. * * Thanks to http://www.sciencedirect.com/science/article/pii/S030439759700296X for better follow() solution. */ class GlushkovLast { public: /** * @param re RegExp to probe * @return all RegExpSymbols that can terminate the word. */ template < class SymbolType > static std::set < UnboundedRegExpSymbol < SymbolType > > last ( const regexp::UnboundedRegExp < SymbolType > & re ); template < class SymbolType > class Unbounded { public: static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpAlternation < SymbolType > & node ); static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpConcatenation < SymbolType > & node ); static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpIteration < SymbolType > & node ); static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpSymbol < SymbolType > & node ); static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpEmpty < SymbolType > & node ); static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpEpsilon < SymbolType > & node ); }; }; template < class SymbolType > std::set < UnboundedRegExpSymbol < SymbolType > > GlushkovLast::last ( const regexp::UnboundedRegExp < SymbolType > & re ) { return re.getRegExp ( ).getStructure ( ).template accept < std::set < regexp::UnboundedRegExpSymbol < SymbolType > >, GlushkovLast::Unbounded < SymbolType > > ( ); } template < class SymbolType > std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpAlternation < SymbolType > & node ) { std::set < regexp::UnboundedRegExpSymbol < SymbolType > > ret; for ( const auto & element : node.getElements ( ) ) { std::set < regexp::UnboundedRegExpSymbol < SymbolType > > tmp = element->template accept < std::set < regexp::UnboundedRegExpSymbol < SymbolType > >, GlushkovLast::Unbounded < SymbolType > > ( ); ret.insert ( tmp.begin ( ), tmp.end ( ) ); } return ret; } template < class SymbolType > std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpConcatenation < SymbolType > & node ) { std::set < regexp::UnboundedRegExpSymbol < SymbolType > > ret; for ( const auto & element : ext::make_reverse ( node.getElements ( ) ) ) { std::set < regexp::UnboundedRegExpSymbol < SymbolType > > tmp = element->template accept < std::set < regexp::UnboundedRegExpSymbol < SymbolType > >, GlushkovLast::Unbounded < SymbolType > > ( ); ret.insert ( tmp.begin ( ), tmp.end ( ) ); if ( ! regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * element ) ) break; } return ret; } template < class SymbolType > std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpIteration < SymbolType > & node ) { return node.getElement ( ).template accept < std::set < regexp::UnboundedRegExpSymbol < SymbolType > >, GlushkovLast::Unbounded < SymbolType > > ( ); } template < class SymbolType > std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpSymbol < SymbolType > & node ) { return std::set < regexp::UnboundedRegExpSymbol < SymbolType > > { node }; } template < class SymbolType > std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpEpsilon < SymbolType > & /* node */ ) { return std::set < regexp::UnboundedRegExpSymbol < SymbolType > > ( ); } template < class SymbolType > std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpEmpty < SymbolType > & /* node */ ) { return std::set < regexp::UnboundedRegExpSymbol < SymbolType > > ( ); } } /* namespace regexp */ #endif /* GLUSHKOV_LAST_H_ */