From dc33ec0ccebeedea3efa4db4bf4c957ee45abf49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Thu, 13 Mar 2014 15:34:35 +0100
Subject: [PATCH] Glushkov RE->FA cleanup

---
 aconversions/src/re2fa/Glushkov.cpp | 270 +---------------------------
 aconversions/src/re2fa/Glushkov.h   |  28 +--
 2 files changed, 5 insertions(+), 293 deletions(-)

diff --git a/aconversions/src/re2fa/Glushkov.cpp b/aconversions/src/re2fa/Glushkov.cpp
index b61ecf12ac..ccf00c4b0d 100644
--- a/aconversions/src/re2fa/Glushkov.cpp
+++ b/aconversions/src/re2fa/Glushkov.cpp
@@ -79,271 +79,24 @@ void Glushkov::initNumberSymbols( void )
 
 void Glushkov::constructBeginSymbolSet( void )
 {
-    for( const auto & s : getLeftmostSymbolsInTree( m_re.getRegExp( ) ) )
+    for( const auto & s : GlushkovTraversal::getLeftmostSymbolsInTree( m_re ) )
         m_beginSymbolSet.insert( m_numberedSymbols.find( s )->second );
 }
 
 void Glushkov::constructEndSymbolSet( void )
 {
-    for( const auto & s : getRightmostSymbolsInTree( m_re.getRegExp( ) ) )
+    for( const auto & s : GlushkovTraversal::getRightmostSymbolsInTree( m_re ) )
         m_endSymbolSet.insert( m_numberedSymbols.find( s )->second );
 }
 
 void Glushkov::constructNeighbourSymbolSet( void )
 {
-    for( const auto & n : getNeighbours( m_re.getRegExp( ) ) )
+    for( const auto & n : GlushkovTraversal::getNeighbours( m_re ) )
         m_neighbourSymbolSet.insert( n );
 }
 
 // ----------------------------------------------------------------------------
 
-set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const RegExpElement * node ) const
-{
-    const Alternation* alternation = dynamic_cast<const Alternation*>( node );
-    const Concatenation* concatenation = dynamic_cast<const Concatenation*>( node );
-    const Iteration* iteration = dynamic_cast<const Iteration*>( node );
-    const RegExpSymbol* symbol = dynamic_cast<const RegExpSymbol*>( node );
-    const RegExpEmpty* empty = dynamic_cast<const RegExpEmpty*>( node );
-    const RegExpEpsilon* eps = dynamic_cast<const RegExpEpsilon*>( node );
-
-    if( symbol )
-        return getLeftmostSymbolsInTree( symbol );
-    else if( alternation )
-        return getLeftmostSymbolsInTree( alternation );
-    else if( concatenation )
-        return getLeftmostSymbolsInTree( concatenation );
-    else if( iteration )
-        return getLeftmostSymbolsInTree( iteration );
-    else if( eps )
-        return getLeftmostSymbolsInTree( eps );
-    else if( empty )
-        return getLeftmostSymbolsInTree( empty );
-
-     throw AlibException( "Glushkov::getLeftmostSymbolsInTree - invalid RegExpElement node" );
-}
-
-set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const Alternation * node ) const
-{
-    set<const RegExpSymbol*> ret;
-
-    for( const auto & e : node->getElements( ) )
-    {
-        const set<const RegExpSymbol*> tmp = getLeftmostSymbolsInTree( e );
-        ret.insert( tmp.begin( ), tmp.end( ) );
-    }
-
-    return ret;
-}
-
-set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const Concatenation * node ) const
-{
-    set<const RegExpSymbol*> ret;
-
-    for( const auto & e : node->getElements( ) )
-    {
-        set<const RegExpSymbol*> tmp = getLeftmostSymbolsInTree( e );
-        ret.insert( tmp.begin( ), tmp.end( ) );
-
-        if( ! e->containsEmptyString( ) ) // If this subtree can be epsilon, then we need to add next subtree also
-            break;
-    }
-
-    return ret;
-}
-
-set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const Iteration * node ) const
-{
-    return getLeftmostSymbolsInTree( node->getElement( ) );
-}
-
-set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const RegExpSymbol * node ) const
-{
-    return set<const RegExpSymbol*> { node };
-}
-
-set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const RegExpEpsilon * node ) const
-{
-    return set<const RegExpSymbol*>( );
-}
-
-set<const RegExpSymbol*> Glushkov::getLeftmostSymbolsInTree( const RegExpEmpty * node ) const
-{
-    return set<const RegExpSymbol*>( );
-}
-// ----------------------------------------------------------------------------
-
-set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const RegExpElement * node ) const
-{
-    const Alternation* alternation = dynamic_cast<const Alternation*>( node );
-    const Concatenation* concatenation = dynamic_cast<const Concatenation*>( node );
-    const Iteration* iteration = dynamic_cast<const Iteration*>( node );
-    const RegExpSymbol* symbol = dynamic_cast<const RegExpSymbol*>( node );
-    const RegExpEmpty* empty = dynamic_cast<const RegExpEmpty*>( node );
-    const RegExpEpsilon* eps = dynamic_cast<const RegExpEpsilon*>( node );
-
-    if( symbol )
-        return getRightmostSymbolsInTree( symbol );
-    else if( alternation )
-        return getRightmostSymbolsInTree( alternation );
-    else if( concatenation )
-        return getRightmostSymbolsInTree( concatenation );
-    else if( iteration )
-        return getRightmostSymbolsInTree( iteration );
-    else if( eps )
-        return getRightmostSymbolsInTree( eps );
-    else if( empty )
-        return getRightmostSymbolsInTree( empty );
-
-     throw AlibException( "Glushkov::getRightmostSymbolsInTree - invalid RegExpElement node" );
-}
-
-set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const Alternation * node ) const
-{
-    set<const RegExpSymbol*> ret;
-
-    for( const auto & e : node->getElements( ) )
-    {
-        set<const RegExpSymbol*> tmp = getRightmostSymbolsInTree( e );
-        ret.insert( tmp.begin( ), tmp.end( ) );
-    }
-
-    return ret;
-}
-
-set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const Concatenation * node ) const
-{
-    set<const RegExpSymbol*> ret;
-
-    for( auto it = node->getElements( ).rbegin( ); it != node->getElements( ).rend( ) ; it ++ )
-    {
-        set<const RegExpSymbol*> tmp = getRightmostSymbolsInTree( *it );
-        ret.insert( tmp.begin( ), tmp.end( ) );
-
-        if( ! ( * it )->containsEmptyString( ) )
-            break;
-    }
-
-    return ret;
-}
-
-set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const Iteration * node ) const
-{
-    return getRightmostSymbolsInTree( node->getElement( ) );
-}
-
-set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const RegExpSymbol * node ) const
-{
-    return set<const RegExpSymbol*> { node };
-}
-
-set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const RegExpEpsilon * node ) const
-{
-    return set<const RegExpSymbol*>( );
-}
-
-set<const RegExpSymbol*> Glushkov::getRightmostSymbolsInTree( const RegExpEmpty * node ) const
-{
-    return set<const RegExpSymbol*>( );
-}
-
-// ----------------------------------------------------------------------------
-
-set<Glushkov::Neighbours> Glushkov::getNeighbours( const RegExpElement * node )  const
-{
-    const Alternation* alternation = dynamic_cast<const Alternation*>( node );
-    const Concatenation* concatenation = dynamic_cast<const Concatenation*>( node );
-    const Iteration* iteration = dynamic_cast<const Iteration*>( node );
-    const RegExpSymbol* symbol = dynamic_cast<const RegExpSymbol*>( node );
-    const RegExpEmpty* empty = dynamic_cast<const RegExpEmpty*>( node );
-    const RegExpEpsilon* eps = dynamic_cast<const RegExpEpsilon*>( node );
-
-    if( symbol )
-        return getNeighbours( symbol );
-    else if( alternation )
-        return getNeighbours( alternation );
-    else if( concatenation )
-        return getNeighbours( concatenation );
-    else if( iteration )
-        return getNeighbours( iteration );
-    else if( eps )
-        return getNeighbours( eps );
-    else if( empty )
-        return getNeighbours( empty );
-
-     throw AlibException( "Glushkov::getNeighbours - unknown RegExpElement* " );
-}
-
-set<Glushkov::Neighbours> Glushkov::getNeighbours( const Alternation * node ) const
-{
-    set<Neighbours> n;
-    for( const auto & e : node->getElements( ) )
-    {
-        set<Neighbours> tmp = getNeighbours( e );
-        n.insert( tmp.begin( ), tmp.end( ) );
-    }
-
-    return n;
-}
-
-set<Glushkov::Neighbours> Glushkov::getNeighbours( const Concatenation * node ) const
-{
-    set<Neighbours> n;
-    for( const auto & e : node->getElements( ) )
-    {
-        set<Neighbours> tmp = getNeighbours( e );
-        n.insert( tmp.begin( ), tmp.end( ) );
-    }
-
-    for( auto e = node->getElements( ).begin( ); e != node->getElements( ).end( ); e ++ )
-    {
-        auto f = e;
-        if( f == node->getElements( ).end( ) )
-            continue;
-
-        for( f++ ; f != node->getElements( ).end( ); f ++ )
-        {
-            for( const auto & x : getRightmostSymbolsInTree( * e ) )
-                for( const auto & y : getLeftmostSymbolsInTree( * f ) )
-                    n.insert( Neighbours( x, y ) );
-
-            if( ! ( * f )->containsEmptyString( ) )
-                break;
-        }
-    }
-
-    return n;
-}
-
-set<Glushkov::Neighbours> Glushkov::getNeighbours( const Iteration * node ) const
-{
-    set<Neighbours> n;
-    set<Neighbours> tmp = getNeighbours( node->getElement( ) );
-    n.insert( tmp.begin( ), tmp.end( ) );
-
-    for( const auto & x : getRightmostSymbolsInTree( node->getElement( ) ) )
-        for( const auto & y : getLeftmostSymbolsInTree( node->getElement( ) ) )
-            n.insert( Neighbours( x, y ) );
-
-    return n;
-}
-
-set<Glushkov::Neighbours> Glushkov::getNeighbours( const RegExpSymbol * node ) const
-{
-    return set<Neighbours>( );
-}
-
-set<Glushkov::Neighbours> Glushkov::getNeighbours( const RegExpEpsilon * node ) const
-{
-    return set<Neighbours>( );
-}
-
-set<Glushkov::Neighbours> Glushkov::getNeighbours( const RegExpEmpty * node ) const
-{
-    return set<Neighbours>( );
-}
-
-// ----------------------------------------------------------------------------
-
 bool Glushkov::NumberedSymbol::operator<( const NumberedSymbol & x ) const
 {
     return m_i < x.m_i;
@@ -357,21 +110,4 @@ Glushkov::NumberedSymbol::NumberedSymbol( const RegExpSymbol * symbol, int i ) :
 
 }
 
-// ----------------------------------------------------------------------------
-
-Glushkov::Neighbours::Neighbours( const RegExpSymbol * first, const RegExpSymbol * second ) :
-        m_first( first ),
-        m_second( second )
-{
-
-}
-
-bool Glushkov::Neighbours::operator<( const Neighbours & x ) const
-{
-    if( m_first != x.m_first )
-        return m_first < x.m_first;
-    else
-        return m_second < x.m_second;
-}
-
 } /* namespace conversions */
diff --git a/aconversions/src/re2fa/Glushkov.h b/aconversions/src/re2fa/Glushkov.h
index 452028c1f4..5a83b87c1a 100644
--- a/aconversions/src/re2fa/Glushkov.h
+++ b/aconversions/src/re2fa/Glushkov.h
@@ -15,6 +15,7 @@
 #include <regexp/RegExp.h>
 
 #include "AbstractREtoFAConverter.h"
+#include "../shared/GlushkovTraversal.h"
 
 #include "RegExpAlphabet.h"
 
@@ -58,34 +59,9 @@ private:
     void constructEndSymbolSet( void );
     void constructNeighbourSymbolSet( void );
 
-    // TODO: consider moving these to independent class or regexputils
-    std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::RegExpElement * node ) const;
-    std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::Alternation * node ) const;
-    std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::Concatenation * node ) const;
-    std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::Iteration * node ) const;
-    std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::RegExpSymbol * node ) const;
-    std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::RegExpEmpty * node ) const;
-    std::set<const regexp::RegExpSymbol*> getLeftmostSymbolsInTree( const regexp::RegExpEpsilon * node ) const;
-
-    std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::RegExpElement * node ) const;
-    std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::Alternation * node ) const;
-    std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::Concatenation * node ) const;
-    std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::Iteration * node ) const;
-    std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::RegExpSymbol * node ) const;
-    std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::RegExpEmpty * node ) const;
-    std::set<const regexp::RegExpSymbol*> getRightmostSymbolsInTree( const regexp::RegExpEpsilon * node ) const;
-
-    std::set<Neighbours> getNeighbours( const regexp::RegExpElement * node ) const;
-    std::set<Neighbours> getNeighbours( const regexp::Alternation * node ) const;
-    std::set<Neighbours> getNeighbours( const regexp::Concatenation * node ) const;
-    std::set<Neighbours> getNeighbours( const regexp::Iteration * node ) const;
-    std::set<Neighbours> getNeighbours( const regexp::RegExpSymbol * node ) const;
-    std::set<Neighbours> getNeighbours( const regexp::RegExpEmpty * node ) const;
-    std::set<Neighbours> getNeighbours( const regexp::RegExpEpsilon * node ) const;
-
     std::map<const regexp::RegExpElement*, NumberedSymbol> m_numberedSymbols;
     std::set<NumberedSymbol> m_beginSymbolSet, m_endSymbolSet;
-    std::set<Neighbours> m_neighbourSymbolSet;
+    std::set<GlushkovTraversal::Neighbours> m_neighbourSymbolSet;
 
 };
 
-- 
GitLab