From 83d755fa3d024c8c43f9296fdbc08c5c3d08e311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Fri, 9 May 2014 16:16:02 +0200 Subject: [PATCH] libaregexptree: optim: distributive rules --- libaregexptree/src/RegExpOptimize.cpp | 75 ++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/libaregexptree/src/RegExpOptimize.cpp b/libaregexptree/src/RegExpOptimize.cpp index 0454a913cf..175dc1e627 100644 --- a/libaregexptree/src/RegExpOptimize.cpp +++ b/libaregexptree/src/RegExpOptimize.cpp @@ -347,13 +347,47 @@ bool RegExpOptimize::A7( Concatenation * const & node ) } /** - * optimization A9: x.(y+z) = x.y + x.z + * optimization A8: x.(y+z) = x.y + x.z * @param node Concatenation node * @return bool true if optimization applied else false */ bool RegExpOptimize::A8( Concatenation * const & node ) { bool optimized = false; + + for( auto it = std::next( node->getElements( ).begin( ) ); it != node->getElements( ).end( ); ) + { + Alternation * alt = dynamic_cast<Alternation*>( * it ); + if( ! alt ) + { + it ++; + continue; + } + + // take everything to the left and copy it as prefix of every element in alternation. + Concatenation * leftPart = new Concatenation( ); + leftPart->getElements( ).insert( leftPart->getElements( ).end( ), node->getElements( ).begin( ), it ); + + for( auto altIt = alt->getElements( ).begin( ); altIt != alt->getElements( ).end( ); altIt ++ ) + { + Concatenation * altElem = new Concatenation( ); + altElem->getElements( ).push_back( leftPart->clone( ) ); + altElem->getElements( ).push_back( * altIt ); + + * altIt = altElem; + } + + RegExpElement * optIt = optimize( * it ); + delete *it; + *it = optIt; + + delete leftPart; + it = node->getElements( ).erase( node->getElements( ).begin( ), it ); + + optimized = true; + it ++; + } + return optimized; } @@ -365,6 +399,43 @@ bool RegExpOptimize::A8( Concatenation * const & node ) bool RegExpOptimize::A9( Concatenation * const & node ) { bool optimized = false; + + for( auto it = node->getElements( ).begin( ); it != std::prev( node->getElements( ).end( ) ); ) + { + Alternation * alt = dynamic_cast<Alternation*>( * it ); + if( ! alt ) + { + it ++; + continue; + } + + // take everything to the right and copy it as suffix of every element in alternation. + Concatenation * rest = new Concatenation( ); + rest->getElements( ).insert( rest->getElements( ).end( ), std::next( it ), node->getElements( ).end( ) ); + + for( auto altIt = alt->getElements( ).begin( ); altIt != alt->getElements( ).end( ); altIt ++ ) + { + Concatenation * altElem = new Concatenation( ); + altElem->getElements( ).push_back( * altIt ); + altElem->getElements( ).push_back( rest->clone( ) ); + + * altIt = altElem; + } + + RegExpElement * optIt = optimize( * it ); + delete *it; + *it = optIt; + + delete rest; + it = node->getElements( ).erase( std::next( it ), node->getElements( ).end( ) ); + optimized = true; + + // as we move (delete) the rest of this expression, it surely wont do another round. More optimizations to be performerd are in subtree now. + // we do not care about this here as method optimize(Alternation) will take care of this in next iteration + // it ++; + break; + } + return optimized; } @@ -1112,4 +1183,4 @@ bool RegExpOptimize::X1( Alternation * const & node ) } return false; -} \ No newline at end of file +} -- GitLab