From dc33ec0ccebeedea3efa4db4bf4c957ee45abf49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Thu, 13 Mar 2014 15:34:35 +0100 Subject: [PATCH] Glushkov RE->FA cleanup --- aconversions/src/re2fa/Glushkov.cpp | 270 +--------------------------- aconversions/src/re2fa/Glushkov.h | 28 +-- 2 files changed, 5 insertions(+), 293 deletions(-) diff --git a/aconversions/src/re2fa/Glushkov.cpp b/aconversions/src/re2fa/Glushkov.cpp index b61ecf12ac..ccf00c4b0d 100644 --- a/aconversions/src/re2fa/Glushkov.cpp +++ b/aconversions/src/re2fa/Glushkov.cpp @@ -79,271 +79,24 @@ void Glushkov::initNumberSymbols( void ) void Glushkov::constructBeginSymbolSet( void ) { - for( const auto & s : getLeftmostSymbolsInTree( m_re.getRegExp( ) ) ) + for( const auto & s : GlushkovTraversal::getLeftmostSymbolsInTree( m_re ) ) m_beginSymbolSet.insert( m_numberedSymbols.find( s )->second ); } void Glushkov::constructEndSymbolSet( void ) { - for( const auto & s : getRightmostSymbolsInTree( m_re.getRegExp( ) ) ) + for( const auto & s : GlushkovTraversal::getRightmostSymbolsInTree( m_re ) ) m_endSymbolSet.insert( m_numberedSymbols.find( s )->second ); } void Glushkov::constructNeighbourSymbolSet( void ) { - for( const auto & n : getNeighbours( m_re.getRegExp( ) ) ) + for( const auto & n : GlushkovTraversal::getNeighbours( m_re ) ) m_neighbourSymbolSet.insert( n ); } // ---------------------------------------------------------------------------- -set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const RegExpElement * node ) const -{ - const Alternation* alternation = dynamic_cast<const Alternation*>( node ); - const Concatenation* concatenation = dynamic_cast<const Concatenation*>( node ); - const Iteration* iteration = dynamic_cast<const Iteration*>( node ); - const RegExpSymbol* symbol = dynamic_cast<const RegExpSymbol*>( node ); - const RegExpEmpty* empty = dynamic_cast<const RegExpEmpty*>( node ); - const RegExpEpsilon* eps = dynamic_cast<const RegExpEpsilon*>( node ); - - if( symbol ) - return getLeftmostSymbolsInTree( symbol ); - else if( alternation ) - return getLeftmostSymbolsInTree( alternation ); - else if( concatenation ) - return getLeftmostSymbolsInTree( concatenation ); - else if( iteration ) - return getLeftmostSymbolsInTree( iteration ); - else if( eps ) - return getLeftmostSymbolsInTree( eps ); - else if( empty ) - return getLeftmostSymbolsInTree( empty ); - - throw AlibException( "Glushkov::getLeftmostSymbolsInTree - invalid RegExpElement node" ); -} - -set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const Alternation * node ) const -{ - set<const RegExpSymbol*> ret; - - for( const auto & e : node->getElements( ) ) - { - const set<const RegExpSymbol*> tmp = getLeftmostSymbolsInTree( e ); - ret.insert( tmp.begin( ), tmp.end( ) ); - } - - return ret; -} - -set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const Concatenation * node ) const -{ - set<const RegExpSymbol*> ret; - - for( const auto & e : node->getElements( ) ) - { - set<const RegExpSymbol*> tmp = getLeftmostSymbolsInTree( e ); - ret.insert( tmp.begin( ), tmp.end( ) ); - - if( ! e->containsEmptyString( ) ) // If this subtree can be epsilon, then we need to add next subtree also - break; - } - - return ret; -} - -set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const Iteration * node ) const -{ - return getLeftmostSymbolsInTree( node->getElement( ) ); -} - -set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const RegExpSymbol * node ) const -{ - return set<const RegExpSymbol*> { node }; -} - -set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const RegExpEpsilon * node ) const -{ - return set<const RegExpSymbol*>( ); -} - -set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const RegExpEmpty * node ) const -{ - return set<const RegExpSymbol*>( ); -} -// ---------------------------------------------------------------------------- - -set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const RegExpElement * node ) const -{ - const Alternation* alternation = dynamic_cast<const Alternation*>( node ); - const Concatenation* concatenation = dynamic_cast<const Concatenation*>( node ); - const Iteration* iteration = dynamic_cast<const Iteration*>( node ); - const RegExpSymbol* symbol = dynamic_cast<const RegExpSymbol*>( node ); - const RegExpEmpty* empty = dynamic_cast<const RegExpEmpty*>( node ); - const RegExpEpsilon* eps = dynamic_cast<const RegExpEpsilon*>( node ); - - if( symbol ) - return getRightmostSymbolsInTree( symbol ); - else if( alternation ) - return getRightmostSymbolsInTree( alternation ); - else if( concatenation ) - return getRightmostSymbolsInTree( concatenation ); - else if( iteration ) - return getRightmostSymbolsInTree( iteration ); - else if( eps ) - return getRightmostSymbolsInTree( eps ); - else if( empty ) - return getRightmostSymbolsInTree( empty ); - - throw AlibException( "Glushkov::getRightmostSymbolsInTree - invalid RegExpElement node" ); -} - -set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const Alternation * node ) const -{ - set<const RegExpSymbol*> ret; - - for( const auto & e : node->getElements( ) ) - { - set<const RegExpSymbol*> tmp = getRightmostSymbolsInTree( e ); - ret.insert( tmp.begin( ), tmp.end( ) ); - } - - return ret; -} - -set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const Concatenation * node ) const -{ - set<const RegExpSymbol*> ret; - - for( auto it = node->getElements( ).rbegin( ); it != node->getElements( ).rend( ) ; it ++ ) - { - set<const RegExpSymbol*> tmp = getRightmostSymbolsInTree( *it ); - ret.insert( tmp.begin( ), tmp.end( ) ); - - if( ! ( * it )->containsEmptyString( ) ) - break; - } - - return ret; -} - -set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const Iteration * node ) const -{ - return getRightmostSymbolsInTree( node->getElement( ) ); -} - -set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const RegExpSymbol * node ) const -{ - return set<const RegExpSymbol*> { node }; -} - -set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const RegExpEpsilon * node ) const -{ - return set<const RegExpSymbol*>( ); -} - -set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const RegExpEmpty * node ) const -{ - return set<const RegExpSymbol*>( ); -} - -// ---------------------------------------------------------------------------- - -set<Glushkov::Neighbours> Glushkov::getNeighbours( const RegExpElement * node ) const -{ - const Alternation* alternation = dynamic_cast<const Alternation*>( node ); - const Concatenation* concatenation = dynamic_cast<const Concatenation*>( node ); - const Iteration* iteration = dynamic_cast<const Iteration*>( node ); - const RegExpSymbol* symbol = dynamic_cast<const RegExpSymbol*>( node ); - const RegExpEmpty* empty = dynamic_cast<const RegExpEmpty*>( node ); - const RegExpEpsilon* eps = dynamic_cast<const RegExpEpsilon*>( node ); - - if( symbol ) - return getNeighbours( symbol ); - else if( alternation ) - return getNeighbours( alternation ); - else if( concatenation ) - return getNeighbours( concatenation ); - else if( iteration ) - return getNeighbours( iteration ); - else if( eps ) - return getNeighbours( eps ); - else if( empty ) - return getNeighbours( empty ); - - throw AlibException( "Glushkov::getNeighbours - unknown RegExpElement* " ); -} - -set<Glushkov::Neighbours> Glushkov::getNeighbours( const Alternation * node ) const -{ - set<Neighbours> n; - for( const auto & e : node->getElements( ) ) - { - set<Neighbours> tmp = getNeighbours( e ); - n.insert( tmp.begin( ), tmp.end( ) ); - } - - return n; -} - -set<Glushkov::Neighbours> Glushkov::getNeighbours( const Concatenation * node ) const -{ - set<Neighbours> n; - for( const auto & e : node->getElements( ) ) - { - set<Neighbours> tmp = getNeighbours( e ); - n.insert( tmp.begin( ), tmp.end( ) ); - } - - for( auto e = node->getElements( ).begin( ); e != node->getElements( ).end( ); e ++ ) - { - auto f = e; - if( f == node->getElements( ).end( ) ) - continue; - - for( f++ ; f != node->getElements( ).end( ); f ++ ) - { - for( const auto & x : getRightmostSymbolsInTree( * e ) ) - for( const auto & y : getLeftmostSymbolsInTree( * f ) ) - n.insert( Neighbours( x, y ) ); - - if( ! ( * f )->containsEmptyString( ) ) - break; - } - } - - return n; -} - -set<Glushkov::Neighbours> Glushkov::getNeighbours( const Iteration * node ) const -{ - set<Neighbours> n; - set<Neighbours> tmp = getNeighbours( node->getElement( ) ); - n.insert( tmp.begin( ), tmp.end( ) ); - - for( const auto & x : getRightmostSymbolsInTree( node->getElement( ) ) ) - for( const auto & y : getLeftmostSymbolsInTree( node->getElement( ) ) ) - n.insert( Neighbours( x, y ) ); - - return n; -} - -set<Glushkov::Neighbours> Glushkov::getNeighbours( const RegExpSymbol * node ) const -{ - return set<Neighbours>( ); -} - -set<Glushkov::Neighbours> Glushkov::getNeighbours( const RegExpEpsilon * node ) const -{ - return set<Neighbours>( ); -} - -set<Glushkov::Neighbours> Glushkov::getNeighbours( const RegExpEmpty * node ) const -{ - return set<Neighbours>( ); -} - -// ---------------------------------------------------------------------------- - bool Glushkov::NumberedSymbol::operator<( const NumberedSymbol & x ) const { return m_i < x.m_i; @@ -357,21 +110,4 @@ Glushkov::NumberedSymbol::NumberedSymbol( const RegExpSymbol * symbol, int i ) : } -// ---------------------------------------------------------------------------- - -Glushkov::Neighbours::Neighbours( const RegExpSymbol * first, const RegExpSymbol * second ) : - m_first( first ), - m_second( second ) -{ - -} - -bool Glushkov::Neighbours::operator<( const Neighbours & x ) const -{ - if( m_first != x.m_first ) - return m_first < x.m_first; - else - return m_second < x.m_second; -} - } /* namespace conversions */ diff --git a/aconversions/src/re2fa/Glushkov.h b/aconversions/src/re2fa/Glushkov.h index 452028c1f4..5a83b87c1a 100644 --- a/aconversions/src/re2fa/Glushkov.h +++ b/aconversions/src/re2fa/Glushkov.h @@ -15,6 +15,7 @@ #include <regexp/RegExp.h> #include "AbstractREtoFAConverter.h" +#include "../shared/GlushkovTraversal.h" #include "RegExpAlphabet.h" @@ -58,34 +59,9 @@ private: void constructEndSymbolSet( void ); void constructNeighbourSymbolSet( void ); - // TODO: consider moving these to independent class or regexputils - std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::RegExpElement * node ) const; - std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::Alternation * node ) const; - std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::Concatenation * node ) const; - std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::Iteration * node ) const; - std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::RegExpSymbol * node ) const; - std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::RegExpEmpty * node ) const; - std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::RegExpEpsilon * node ) const; - - std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::RegExpElement * node ) const; - std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::Alternation * node ) const; - std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::Concatenation * node ) const; - std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::Iteration * node ) const; - std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::RegExpSymbol * node ) const; - std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::RegExpEmpty * node ) const; - std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::RegExpEpsilon * node ) const; - - std::set<Neighbours> getNeighbours( const regexp::RegExpElement * node ) const; - std::set<Neighbours> getNeighbours( const regexp::Alternation * node ) const; - std::set<Neighbours> getNeighbours( const regexp::Concatenation * node ) const; - std::set<Neighbours> getNeighbours( const regexp::Iteration * node ) const; - std::set<Neighbours> getNeighbours( const regexp::RegExpSymbol * node ) const; - std::set<Neighbours> getNeighbours( const regexp::RegExpEmpty * node ) const; - std::set<Neighbours> getNeighbours( const regexp::RegExpEpsilon * node ) const; - std::map<const regexp::RegExpElement*, NumberedSymbol> m_numberedSymbols; std::set<NumberedSymbol> m_beginSymbolSet, m_endSymbolSet; - std::set<Neighbours> m_neighbourSymbolSet; + std::set<GlushkovTraversal::Neighbours> m_neighbourSymbolSet; }; -- GitLab