From 435a16d313ace9a311d9ef537cb6a581b3a02dd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Mon, 5 May 2014 12:50:20 +0200
Subject: [PATCH] libaregexptree: optim:Concat V9

---
 libaregexptree/makefile               |  2 +-
 libaregexptree/src/RegExpOptimize.cpp | 71 ++++++++++++++++++++++++---
 2 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/libaregexptree/makefile b/libaregexptree/makefile
index 59349150b1..617d601ca0 100644
--- a/libaregexptree/makefile
+++ b/libaregexptree/makefile
@@ -1,6 +1,6 @@
 CC=g++
 LIBRARY=libaregexptree.so 
-CCFLAGS= -std=c++11 -O2 -c -Wall -fPIC -I../alib/src 
+CCFLAGS= -std=c++11 -O2 -c -g -Wall -fPIC -I../alib/src 
 LDFLAGS= -L../alib/lib -lalib -shared
 
 SOURCES=$(shell find src/ -name *cpp)
diff --git a/libaregexptree/src/RegExpOptimize.cpp b/libaregexptree/src/RegExpOptimize.cpp
index 4ff1638418..300f6ef02f 100644
--- a/libaregexptree/src/RegExpOptimize.cpp
+++ b/libaregexptree/src/RegExpOptimize.cpp
@@ -71,7 +71,11 @@ RegExpElement * RegExpOptimize::optimize( Alternation const * const & node )
         return ret;
     }
 
-    assert( alt->getElements( ).size( ) > 0 );
+    // assert( alt->getElements( ).size( ) > 0 );
+    if( alt->getElements( ).size( ) <= 0 )
+    {
+       std::cout << RegExp( node ) << std::endl;
+    }
     return alt;
 }
 
@@ -82,7 +86,16 @@ RegExpElement * RegExpOptimize::optimize( Concatenation const * const & node )
     for( const auto & child : node->getElements( ) )
         concat->getElements( ).push_back( optimize( child ) );
 
-    while( A5( concat ) || A6( concat ) || A7( concat ) || A8( concat ) || A9( concat ) || V8( concat ) || V9( concat ) );
+    do
+    {
+        // A7 is implemented here ~ if not here, it went into infinite loop FIXME
+        if( std::any_of( concat->getElements( ).begin( ), concat->getElements( ).end( ), []( RegExpElement const * const & a ) -> bool{ return dynamic_cast<RegExpEmpty const *>( a ); } ) )
+        {
+            delete concat;
+            return new RegExpEmpty( );
+        }
+    }
+    while( A5( concat ) || A6( concat ) /*|| A7( concat ) */ || A8( concat ) || A9( concat ) || V8( concat ) || V9( concat ) );
 
     if( concat->getElements( ).size( ) == 1 )
     {
@@ -319,9 +332,7 @@ bool RegExpOptimize::A7( Concatenation * const & node )
 
     // FIXME: alib2 uses shared_ptrs, rewrite this using remove_if then
 
-    if( std::any_of( node->getElements( ).begin( ), node->getElements( ).end( ), []( RegExpElement const * const & a ) -> bool{
-        return dynamic_cast<RegExpEmpty const *>( a );
-    }))
+    if( std::any_of( node->getElements( ).begin( ), node->getElements( ).end( ), []( RegExpElement const * const & a ) -> bool{ return dynamic_cast<RegExpEmpty const *>( a ); } ) )
     {
         for( auto const& child : node->getElements( ) )
             delete child;
@@ -688,14 +699,62 @@ bool RegExpOptimize::V8( Concatenation * const & node )
   * @param node Concatenation node
   * @return bool true if optimization applied else false
   */
-
 bool RegExpOptimize::V9( Concatenation * const & node )
 {
     bool optimized = false;
 
+    // interpretation: if concat (C1) with iter && iteration's element is concat (C2), then:
+    //      simultaneously iterate through C1 and C2. (axy)*axz=ax(yax)*z -> get ax that is same and relocate them...
 
+    for( auto it = node->getElements( ).begin( ) ; it != node->getElements( ).end( ) ; )
+    {
+        Iteration * iter = dynamic_cast<Iteration*>( * it );
+        if ( ! iter )
+        {
+            it++;
+            continue;
+        }
+        Concatenation * concat = dynamic_cast<Concatenation*>( iter->getElement ( ) );
+        if( ! concat )
+        {
+            it++;
+            continue;
+        }
 
+        // find range from <it+1;sth> and <concat.begin;sth> that is equal
+        auto c1Iter = std::next( it ), c2Iter = concat->getElements( ).begin( );
+        while( c1Iter != node->getElements( ).end() && c2Iter != concat->getElements( ).end( ) && **c1Iter == ** c2Iter )
+        {
+            c1Iter ++;
+            c2Iter ++;
+        }
 
+        if( c1Iter == std::next( it ) )
+        {
+            it ++;
+            continue;
+        }
+
+        // std::cout << "xy" << std::endl;
+        // Concatenation* tmp = new Concatenation( );
+        // tmp->getElements( ).insert( tmp->getElements( ).end( ), std::next( it ), c1Iter );
+        // std::cout << RegExp( tmp ) << std::endl;
+
+        // copy the range <it;sth>, delete it and go back to the iter node
+        list<RegExpElement*> copyRange;
+        copyRange.insert( copyRange.end(), std::next( it ), c1Iter );
+        it = node->getElements( ).erase( std::next( it ), c1Iter );
+        it = std::prev( it );
+
+        // insert that range before it position
+        it = node->getElements( ).insert( it, copyRange.begin( ), copyRange.end( ) );
+
+        // alter the iteration's concat node
+        copyRange.clear( );
+        copyRange.insert( copyRange.end(), concat->getElements( ).begin( ), c2Iter );
+        concat->getElements( ).erase( concat->getElements( ).begin( ), c2Iter );
+        concat->getElements( ).insert( concat->getElements( ).end(), copyRange.begin( ), copyRange.end( ) );
+    }
 
     return optimized;
 }
-- 
GitLab