diff --git a/libaregexptree/src/RegExpOptimize.cpp b/libaregexptree/src/RegExpOptimize.cpp index 0454a913cf5658406a1e698fe4865fa515a4a77a..175dc1e62726fde01edcc582127b9a83e156bfbe 100644 --- a/libaregexptree/src/RegExpOptimize.cpp +++ b/libaregexptree/src/RegExpOptimize.cpp @@ -347,13 +347,47 @@ bool RegExpOptimize::A7( Concatenation * const & node ) } /** - * optimization A9: x.(y+z) = x.y + x.z + * optimization A8: x.(y+z) = x.y + x.z * @param node Concatenation node * @return bool true if optimization applied else false */ bool RegExpOptimize::A8( Concatenation * const & node ) { bool optimized = false; + + for( auto it = std::next( node->getElements( ).begin( ) ); it != node->getElements( ).end( ); ) + { + Alternation * alt = dynamic_cast<Alternation*>( * it ); + if( ! alt ) + { + it ++; + continue; + } + + // take everything to the left and copy it as prefix of every element in alternation. + Concatenation * leftPart = new Concatenation( ); + leftPart->getElements( ).insert( leftPart->getElements( ).end( ), node->getElements( ).begin( ), it ); + + for( auto altIt = alt->getElements( ).begin( ); altIt != alt->getElements( ).end( ); altIt ++ ) + { + Concatenation * altElem = new Concatenation( ); + altElem->getElements( ).push_back( leftPart->clone( ) ); + altElem->getElements( ).push_back( * altIt ); + + * altIt = altElem; + } + + RegExpElement * optIt = optimize( * it ); + delete *it; + *it = optIt; + + delete leftPart; + it = node->getElements( ).erase( node->getElements( ).begin( ), it ); + + optimized = true; + it ++; + } + return optimized; } @@ -365,6 +399,43 @@ bool RegExpOptimize::A8( Concatenation * const & node ) bool RegExpOptimize::A9( Concatenation * const & node ) { bool optimized = false; + + for( auto it = node->getElements( ).begin( ); it != std::prev( node->getElements( ).end( ) ); ) + { + Alternation * alt = dynamic_cast<Alternation*>( * it ); + if( ! alt ) + { + it ++; + continue; + } + + // take everything to the right and copy it as suffix of every element in alternation. + Concatenation * rest = new Concatenation( ); + rest->getElements( ).insert( rest->getElements( ).end( ), std::next( it ), node->getElements( ).end( ) ); + + for( auto altIt = alt->getElements( ).begin( ); altIt != alt->getElements( ).end( ); altIt ++ ) + { + Concatenation * altElem = new Concatenation( ); + altElem->getElements( ).push_back( * altIt ); + altElem->getElements( ).push_back( rest->clone( ) ); + + * altIt = altElem; + } + + RegExpElement * optIt = optimize( * it ); + delete *it; + *it = optIt; + + delete rest; + it = node->getElements( ).erase( std::next( it ), node->getElements( ).end( ) ); + optimized = true; + + // as we move (delete) the rest of this expression, it surely wont do another round. More optimizations to be performerd are in subtree now. + // we do not care about this here as method optimize(Alternation) will take care of this in next iteration + // it ++; + break; + } + return optimized; } @@ -1112,4 +1183,4 @@ bool RegExpOptimize::X1( Alternation * const & node ) } return false; -} \ No newline at end of file +}