From 83d755fa3d024c8c43f9296fdbc08c5c3d08e311 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Fri, 9 May 2014 16:16:02 +0200
Subject: [PATCH] libaregexptree: optim: distributive rules

---
 libaregexptree/src/RegExpOptimize.cpp | 75 ++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/libaregexptree/src/RegExpOptimize.cpp b/libaregexptree/src/RegExpOptimize.cpp
index 0454a913cf..175dc1e627 100644
--- a/libaregexptree/src/RegExpOptimize.cpp
+++ b/libaregexptree/src/RegExpOptimize.cpp
@@ -347,13 +347,47 @@ bool RegExpOptimize::A7( Concatenation * const & node )
 }
 
 /**
-  * optimization A9: x.(y+z) = x.y + x.z
+  * optimization A8: x.(y+z) = x.y + x.z
   * @param node Concatenation node
   * @return bool true if optimization applied else false
   */
 bool RegExpOptimize::A8( Concatenation * const & node )
 {
     bool optimized = false;
+
+    for( auto it = std::next( node->getElements( ).begin( ) ); it != node->getElements( ).end( ); )
+    {
+        Alternation * alt = dynamic_cast<Alternation*>( * it );
+        if( ! alt )
+        {
+            it ++;
+            continue;
+        }
+
+        // take everything to the left and copy it as prefix of every element in alternation.
+        Concatenation * leftPart = new Concatenation( );
+        leftPart->getElements( ).insert( leftPart->getElements( ).end( ), node->getElements( ).begin( ), it );
+
+        for( auto altIt = alt->getElements( ).begin( ); altIt != alt->getElements( ).end( ); altIt ++ )
+        {
+            Concatenation * altElem = new Concatenation( );
+            altElem->getElements( ).push_back( leftPart->clone( ) );
+            altElem->getElements( ).push_back( * altIt );
+
+            * altIt = altElem;
+        }
+
+        RegExpElement * optIt = optimize( * it );
+        delete *it;
+        *it = optIt;
+
+        delete leftPart;
+        it = node->getElements( ).erase( node->getElements( ).begin( ), it );
+
+        optimized = true;
+        it ++;
+    }
+
     return optimized;
 }
 
@@ -365,6 +399,43 @@ bool RegExpOptimize::A8( Concatenation * const & node )
 bool RegExpOptimize::A9( Concatenation * const & node )
 {
     bool optimized = false;
+
+    for( auto it = node->getElements( ).begin( ); it != std::prev( node->getElements( ).end( ) ); )
+    {
+        Alternation * alt = dynamic_cast<Alternation*>( * it );
+        if( ! alt )
+        {
+            it ++;
+            continue;
+        }
+
+        // take everything to the right and copy it as suffix of every element in alternation.
+        Concatenation * rest = new Concatenation( );
+        rest->getElements( ).insert( rest->getElements( ).end( ), std::next( it ), node->getElements( ).end( ) );
+
+        for( auto altIt = alt->getElements( ).begin( ); altIt != alt->getElements( ).end( ); altIt ++ )
+        {
+            Concatenation * altElem = new Concatenation( );
+            altElem->getElements( ).push_back( * altIt );
+            altElem->getElements( ).push_back( rest->clone( ) );
+
+            * altIt = altElem;
+        }
+
+        RegExpElement * optIt = optimize( * it );
+        delete *it;
+        *it = optIt;
+
+        delete rest;
+        it = node->getElements( ).erase( std::next( it ), node->getElements( ).end( ) );
+        optimized = true;
+
+        // as we move (delete) the rest of this expression, it surely wont do another round. More optimizations to be performerd are in subtree now.
+        // we do not care about this here as method optimize(Alternation) will take care of this in next iteration
+        // it ++;
+        break;
+    }
+
     return optimized;
 }
 
@@ -1112,4 +1183,4 @@ bool RegExpOptimize::X1( Alternation * const & node )
     }
 
     return false;
-}
\ No newline at end of file
+}
-- 
GitLab