Skip to content
Snippets Groups Projects
GlushkovLast.h 4.77 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * GlushkovLast.h
     *
     *  Created on: 13. 3. 2014
     *	  Author: Tomas Pecka
     */
    
    #ifndef GLUSHKOV_LAST_H_
    #define GLUSHKOV_LAST_H_
    
    #include <set>
    
    #include <regexp/unbounded/UnboundedRegExp.h>
    #include <regexp/RegExpFeatures.h>
    
    
    #include <regexp/unbounded/UnboundedRegExpAlternation.h>
    #include <regexp/unbounded/UnboundedRegExpConcatenation.h>
    #include <regexp/unbounded/UnboundedRegExpElement.h>
    #include <regexp/unbounded/UnboundedRegExpEmpty.h>
    #include <regexp/unbounded/UnboundedRegExpEpsilon.h>
    #include <regexp/unbounded/UnboundedRegExpIteration.h>
    #include <regexp/unbounded/UnboundedRegExpSymbol.h>
    
    #include <regexp/properties/RegExpEpsilon.h>
    
    
    namespace regexp {
    
    /**
     * RegExp tree traversal utils for Glushkov algorithm.
     *
     * Thanks to http://www.sciencedirect.com/science/article/pii/S030439759700296X for better follow() solution.
     */
    class GlushkovLast {
    public:
    	/**
    	 * @param re RegExp to probe
    	 * @return all RegExpSymbols that can terminate the word.
    	 */
    
    	template < class SymbolType >
    	static std::set < UnboundedRegExpSymbol < SymbolType > > last ( const regexp::UnboundedRegExp < SymbolType > & re );
    
    	template < class SymbolType >
    
    	class Unbounded {
    	public:
    
    		static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpAlternation < SymbolType > & node );
    		static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpConcatenation < SymbolType > & node );
    		static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpIteration < SymbolType > & node );
    		static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpSymbol < SymbolType > & node );
    		static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpEmpty < SymbolType > & node );
    		static std::set < regexp::UnboundedRegExpSymbol < SymbolType > > visit ( const regexp::UnboundedRegExpEpsilon < SymbolType > & node );
    
    template < class SymbolType >
    std::set < UnboundedRegExpSymbol < SymbolType > > GlushkovLast::last ( const regexp::UnboundedRegExp < SymbolType > & re ) {
    	return re.getRegExp ( ).getStructure ( ).template accept < std::set < regexp::UnboundedRegExpSymbol < SymbolType > >, GlushkovLast::Unbounded < SymbolType > > ( );
    }
    
    template < class SymbolType >
    std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpAlternation < SymbolType > & node ) {
    	std::set < regexp::UnboundedRegExpSymbol < SymbolType > > ret;
    
    	for ( const auto & element : node.getElements ( ) ) {
    		std::set < regexp::UnboundedRegExpSymbol < SymbolType > > tmp = element->template accept < std::set < regexp::UnboundedRegExpSymbol < SymbolType > >, GlushkovLast::Unbounded < SymbolType > > ( );
    		ret.insert ( tmp.begin ( ), tmp.end ( ) );
    	}
    
    	return ret;
    }
    
    template < class SymbolType >
    std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpConcatenation < SymbolType > & node ) {
    	std::set < regexp::UnboundedRegExpSymbol < SymbolType > > ret;
    
    
    	for ( const auto & element : ext::make_reverse ( node.getElements ( ) ) ) {
    
    		std::set < regexp::UnboundedRegExpSymbol < SymbolType > > tmp = element->template accept < std::set < regexp::UnboundedRegExpSymbol < SymbolType > >, GlushkovLast::Unbounded < SymbolType > > ( );
    		ret.insert ( tmp.begin ( ), tmp.end ( ) );
    
    		if ( ! regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * element ) )
    			break;
    	}
    
    	return ret;
    }
    
    template < class SymbolType >
    std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpIteration < SymbolType > & node ) {
    	return node.getElement ( ).template accept < std::set < regexp::UnboundedRegExpSymbol < SymbolType > >, GlushkovLast::Unbounded < SymbolType > > ( );
    }
    
    template < class SymbolType >
    std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpSymbol < SymbolType > & node ) {
    	return std::set < regexp::UnboundedRegExpSymbol < SymbolType > > { node };
    }
    
    template < class SymbolType >
    std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpEpsilon < SymbolType > & /* node */ ) {
    	return std::set < regexp::UnboundedRegExpSymbol < SymbolType > > ( );
    }
    
    template < class SymbolType >
    std::set < regexp::UnboundedRegExpSymbol < SymbolType > > GlushkovLast::Unbounded < SymbolType >::visit ( const regexp::UnboundedRegExpEmpty < SymbolType > & /* node */ ) {
    	return std::set < regexp::UnboundedRegExpSymbol < SymbolType > > ( );
    }
    
    
    } /* namespace regexp */
    
    #endif /* GLUSHKOV_LAST_H_ */