From cb103a4aed886d4f03cf7bfd527f8b3157efeff9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Sun, 4 May 2014 22:28:40 +0200
Subject: [PATCH] libaregexptree: optim: All alternations but V5,V6

---
 libaregexptree/src/RegExpOptimize.cpp | 645 ++++++++++++++++++++++----
 libaregexptree/src/RegExpOptimize.h   |  72 ++-
 2 files changed, 608 insertions(+), 109 deletions(-)

diff --git a/libaregexptree/src/RegExpOptimize.cpp b/libaregexptree/src/RegExpOptimize.cpp
index 36ea5c56ea..948e0016c2 100644
--- a/libaregexptree/src/RegExpOptimize.cpp
+++ b/libaregexptree/src/RegExpOptimize.cpp
@@ -7,211 +7,652 @@
 
 #include "RegExpOptimize.h"
 
+#include <cassert>
+#include <iostream>
+
 using namespace alib;
 using namespace regexp;
 
-RegExp RegExpOptimize::optimize( const RegExp & regexp )
+RegExp RegExpOptimize::optimize( RegExp const & regexp )
 {
     RegExpElement* optimized = optimize( regexp.getRegExp( ) );
+
     RegExp ret;
     ret.setRegExp( optimized );
 
     return ret;
 }
 
-RegExpElement* RegExpOptimize::optimize( const RegExpElement * node )
+RegExpElement* RegExpOptimize::optimize( RegExpElement const * const & node )
 {
     const Alternation * alternation = dynamic_cast<const Alternation*>( node );
-    const Concatenation * concatenation = dynamic_cast<const Concatenation*>( node );
-    const Iteration * iteration = dynamic_cast<const Iteration*>( node );
-    const RegExpSymbol * symbol = dynamic_cast<const RegExpSymbol*>( node );
-    const RegExpEmpty * empty= dynamic_cast<const RegExpEmpty*>( node );
-    const RegExpEpsilon * eps = dynamic_cast<const RegExpEpsilon*>( node );
-
     if( alternation )
         return optimize( alternation );
+
+    const Concatenation * concatenation = dynamic_cast<const Concatenation*>( node );
     if( concatenation )
         return optimize( concatenation );
+
+    const Iteration * iteration = dynamic_cast<const Iteration*>( node );
     if( iteration )
         return optimize( iteration );
+
+    const RegExpSymbol * symbol = dynamic_cast<const RegExpSymbol*>( node );
     if( symbol )
         return optimize( symbol );
+
+    const RegExpEmpty * empty= dynamic_cast<const RegExpEmpty*>( node );
     if( empty )
         return optimize( empty );
+
+    const RegExpEpsilon * eps = dynamic_cast<const RegExpEpsilon*>( node );
     if( eps )
         return optimize( eps );
 
     throw AlibException( "RegExpOptimize::optimize - unknown RegExpElement node" );
 }
 
-RegExpElement * RegExpOptimize::optimize( const Alternation * node )
+
+RegExpElement * RegExpOptimize::optimize( Alternation const * const & node )
 {
     Alternation* alt = new Alternation( );
 
     for( const auto & child : node->getElements( ) )
         alt->getElements( ).push_back( optimize( child ) );
 
-    // Targets for optimization: Melichar, 2.87: Rule A1, A2, A3, A4, A8, A9
+    // optimize while you can
+    while( A1( alt ) || A2( alt ) || A3( alt ) || A4( alt ) || A10( alt ) || V2( alt ) || V5( alt ) || V6( alt ) );
 
-    // A1: x + ( y + z ) = ( x + y ) + z
-    for( auto it = alt->getElements( ).begin( ) ; it != alt->getElements( ).end( ); )
+    if( alt->getElements( ).size( ) == 1 )
     {
-        Alternation * childAlt = dynamic_cast<Alternation*> ( * it );
-        if( childAlt )
+        RegExpElement* ret = alt->getElements( ).front( );
+        alt->getElements( ).pop_front( );
+        delete alt;
+        return ret;
+    }
+
+    assert( alt->getElements( ).size( ) > 0 );
+    return alt;
+}
+
+RegExpElement * RegExpOptimize::optimize( Concatenation const * const & node )
+{
+    Concatenation* concat = new Concatenation( );
+
+    for( const auto & child : node->getElements( ) )
+        concat->getElements( ).push_back( optimize( child ) );
+
+    /*
+    while( A5( concat ) || A6( concat ) || A8( concat ) || A9( concat ) || V8( concat ) || V9( concat ) );
+    */
+
+    if( concat->getElements( ).size( ) == 1 )
+    {
+        RegExpElement* ret = concat->getElements( ).front( );
+        concat->getElements( ).pop_front( );
+        delete concat;
+        return ret;
+    }
+
+    assert( concat->getElements( ).size( ) > 0 );
+    return concat;
+}
+
+RegExpElement * RegExpOptimize::optimize( Iteration const * const & node )
+{
+    Iteration* iter = new Iteration( );
+    iter->setElement( optimize( node->getElement( ) ) );
+    return iter;
+}
+
+RegExpElement * RegExpOptimize::optimize( RegExpSymbol const * const & node )
+{
+    return node->clone( );
+}
+
+RegExpElement * RegExpOptimize::optimize( RegExpEmpty const * const & node )
+{
+    return node->clone( );
+}
+
+RegExpElement * RegExpOptimize::optimize( RegExpEpsilon const * const & node )
+{
+    return node->clone( );
+}
+
+// ----------------------------------------------------------------------------
+
+/**
+  * optimization A1: x + ( y + z ) = ( x + y ) + z = x + y + z
+  * @param node Alternation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A1( Alternation * const & node )
+{
+    bool optimized = false;
+
+    for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); )
+    {
+        Alternation * const & childAlternation = dynamic_cast<Alternation *>( * it );
+
+        if( childAlternation )
         {
-            auto it2 = it;
-            it2 ++;
+            it = node->getElements( ).erase( it );
 
-            alt->getElements( ).insert( it2, childAlt->getElements( ).begin( ), childAlt->getElements( ).end( ) );
+            std::copy( childAlternation->getElements( ).begin( ), childAlternation->getElements( ).end( ), inserter( node->getElements( ), it ) );
+            childAlternation->getElements( ).clear( );
+            delete childAlternation;
 
-            childAlt->getElements( ).clear( );
-            delete childAlt;
-            it = alt->getElements( ).erase( it );
+            optimized = true;
         }
         else
+        {
             it ++;
+        }
     }
 
-    // A2: x + y = y + x
-    alt->getElements().sort([](RegExpElement const * const & a, RegExpElement const * const & b) -> bool {
-      return *a < *b;
-    });
+    return optimized;
+}
 
-    // A3: x + EMPTY = x
-    for( auto it = alt->getElements( ).begin( ) ; it != alt->getElements( ).end( ); )
+/**
+  * optimization A2: x + y = y + x (sort)
+  * @param node Alternation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A2( Alternation * const & node )
+{
+    std::function<bool( RegExpElement const * const & a, RegExpElement const * const & b )> cmp = [ ]( RegExpElement const * const & a, RegExpElement const * const & b ) -> bool { return *a < *b; };
+
+    if( std::is_sorted( node->getElements( ).begin( ), node->getElements( ).end( ), cmp ) )
+        return false;
+
+    node->getElements( ).sort( cmp );
+    return true;
+}
+
+/**
+  * optimization A3: x + \0 = x
+  * @param node Alternation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A3( Alternation * const & node )
+{
+    bool optimized = false;
+
+    // input can be \0 + \0, so at least one element must be preserved
+
+    // FIXME: alib2 uses shared_ptrs, rewrite this using remove_if then
+
+    for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); )
     {
-        if ( dynamic_cast<RegExpEmpty*>( *it ) )
+        RegExpEmpty const * const & empty = dynamic_cast<RegExpEmpty const *>( * it );
+
+        if( empty && node->getElements( ).size( ) > 1 )
         {
-            delete *it;
-            it = alt->getElements( ).erase( it );
+            it = node->getElements( ).erase( it );
+            delete empty;
+
+            optimized = true;
         }
         else
+        {
             it ++;
+        }
     }
 
-    // TODO: A4: x + x = x
-        // depends on RegExpElement::operator<
-
+    return optimized;
+}
 
-    // Empty subtree and alternation of single node
-    if( alt->getElements( ).size( ) == 0 )
+/**
+  * optimization A4: x + x = x
+  * @param node Alternation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A4( Alternation * const & node )
+{
+    /*
+     * two ways of implementing this opitimization:
+     * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
+     * - check every element against other ( O(n*n) )
+     *
+     * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
+     */
+
+    // uncomment if smart ptrs used
+    // node->getElements( ).unique( [ ]( RegExpElement const * const & a, RegExpElement const * const & b ) -> bool {
+    //     return *a == *b;
+    // } );
+
+    bool optimized = false;;
+    for( auto it = std::next( node->getElements( ).begin( ) ); it != node->getElements( ).end( ); )
     {
-        delete alt;
-        return new RegExpEmpty( );
+        if ( ** it == ** std::prev( it ) )
+        {
+            delete * it;
+            it = node->getElements( ).erase( it );
+            optimized = true;
+        }
+        else
+        {
+            it ++;
+        }
     }
-    else if( alt->getElements( ).size( ) == 1 )
+
+    return optimized;
+}
+
+/**
+  * optimization A5: x.(y.z) = (x.y).z = x.y.z
+  * @param node Concatenation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A5( Concatenation * const & node )
+{
+    bool optimized = false;
+
+    for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); )
     {
-        RegExpElement * ret = * alt->getElements( ).begin( );
-        alt->getElements( ).clear( );
-        delete alt;
-        return ret;
+        Concatenation * const & childConcatenation = dynamic_cast<Concatenation *>( * it );
+
+        if( childConcatenation )
+        {
+            it = node->getElements( ).erase( it );
+            std::copy( childConcatenation->getElements( ).begin( ), childConcatenation->getElements( ).end( ), inserter( node->getElements( ), it ) );
+            childConcatenation->getElements( ).clear( );
+            delete childConcatenation;
+
+            optimized = true;
+        }
+        else
+            it ++;
     }
 
-    return alt;
+    return optimized;
 }
 
-RegExpElement * RegExpOptimize::optimize( const Concatenation * node )
+/**
+  * optimization A6: \e.x = x.\e = x
+  * @param node Concatenation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A6( Concatenation * const & node )
 {
-    Concatenation* concat = new Concatenation( );
-
-    for( const auto & child : node->getElements( ) )
-        concat->getElements( ).push_back( optimize( child ) );
+    bool optimized = false;
 
-    // Targets for optimization: Melichar, 2.87: Rule A5, A6, A7
+    // FIXME: alib2 uses shared_ptrs, rewrite this using remove_if then
 
-    // A5: a.(b.c) = (a.b).c
-    for( auto it = concat->getElements( ).begin( ) ; it != concat->getElements( ).end( ) ; )
+    for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); )
     {
-        Concatenation * childConcat = dynamic_cast<Concatenation*>( * it );
-        if( childConcat )
+        RegExpEpsilon* epsilon = dynamic_cast<RegExpEpsilon*>( * it );
+        if( epsilon && node->getElements( ).size( ) > 1 )
         {
-            auto it2 = it;
-            it2 ++;
-
-            concat->getElements( ).insert( it2, childConcat->getElements( ).begin( ), childConcat->getElements( ).end( ) );
+            delete * it;
+            it = node->getElements( ).erase( it );
 
-            childConcat->getElements( ).clear( );
-            delete childConcat;
-            it = concat->getElements( ).erase( it );
+            optimized = true;
         }
         else
             it ++;
     }
 
-    // A7: EMPTY.x = EMPTY
-    for( const auto & childNode : concat->getElements( ) )
+    return optimized;
+}
+
+/**
+  * optimization A7: \0.x = x.\0 = \0
+  * @param node Concatenation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A7( Concatenation * const & node )
+{
+    bool optimized = false;
+
+    // FIXME: alib2 uses shared_ptrs, rewrite this using remove_if then
+
+    if( std::any_of( node->getElements( ).begin( ), node->getElements( ).end( ), []( RegExpElement const * const & a ) -> bool{
+        return dynamic_cast<RegExpEmpty const *>( a );
+    }))
     {
-        if( dynamic_cast<RegExpEmpty*>( childNode ) )
-        {
-            delete concat;
-            return new RegExpEmpty( );
-        }
+        for( auto const& child : node->getElements( ) )
+            delete child;
+
+        node->getElements( ).clear( );
+        node->getElements( ).push_back( new RegExpEmpty( ) );
+
+        optimized = true;
     }
 
-    // A6: EPS.x = x.EPS = x
-    for( auto it = concat->getElements( ).begin( ) ; it != concat->getElements( ).end( ); )
+    return optimized;
+}
+
+/**
+  * optimization A9: x.(y+z) = x.y + x.z
+  * @param node Concatenation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A8( Concatenation * const & node )
+{
+    bool optimized = false;
+    return optimized;
+}
+
+/**
+  * optimization A9: (x+y).z = x.z + y.z
+  * @param node Concatenation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A9( Concatenation * const & node )
+{
+    bool optimized = false;
+    return optimized;
+}
+
+
+/**
+  * optimization A10: x* = \e + x*x
+  * @param node Alternation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A10( Alternation * const & node )
+{
+    bool optimized = false, optimizedIter = false;
+
+    for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); )
     {
-        // EPS.EPS.EPS.EPS = EPS
-        if( dynamic_cast<RegExpEpsilon*>( *it ) && concat->getElements( ).size( ) > 1 )
+        optimizedIter = false;
+
+        // check if we have some epsilon left, else nothing to do
+        auto eps = find_if( node->getElements( ).begin( ), node->getElements( ).end( ), [ ]( RegExpElement const * const & a ) -> bool {
+            return dynamic_cast<RegExpEpsilon const *>( a );
+        });
+        if( eps == node->getElements( ).end( ) )
+            break;
+
+        Concatenation const * const & childConcat = dynamic_cast<Concatenation const *>( *it );
+        if( childConcat )
         {
-            delete * it;
-            it = concat->getElements( ).erase( it );
+            // if iteration is first element of concatenation
+            Iteration const * const & iter = dynamic_cast<Iteration const *>( childConcat->getElements( ).front( ) );
+
+            if( iter )
+            {
+                // concatenation without the iteration node
+                Concatenation *tmpConcat = dynamic_cast<Concatenation *>( childConcat->clone( ) );
+                delete tmpConcat->getElements( ).front( );
+                tmpConcat->getElements( ).pop_front( );
+                RegExpElement * tmpConcatOpt = optimize( tmpConcat );
+
+                // check if iteration element is the same subtree as rest of concatenation
+                if( * iter->getElement( ) == * tmpConcatOpt )
+                {
+                    optimized = optimizedIter = true;
+
+                    node->getElements( ).push_back( iter->clone( ) );
+
+                    delete childConcat;
+                    it = node->getElements( ).erase( it );
+
+                    // find the eps again - invalidated after prev erase
+                    eps = find_if( node->getElements( ).begin( ), node->getElements( ).end( ), [ ]( RegExpElement const * const & a ) -> bool {
+                        return dynamic_cast<RegExpEpsilon const *>( a );
+                    });
+                    delete *eps;
+                    it = node->getElements( ).erase( eps );
+                }
+                delete tmpConcat;
+                delete tmpConcatOpt;
+            }
         }
-        else
+
+        if( ! optimizedIter )
             it ++;
     }
 
-    if( concat->getElements( ).size( ) == 1 )
+    return optimized;
+}
+
+/**
+  * optimization A11: x* = (\e + x)*
+  * @param node Iteration node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::A11( Iteration * const & node )
+{
+    bool optimized = false;
+
+    Alternation * const & childAlt = dynamic_cast<Alternation *>( node->getElement( ) );
+
+    if( childAlt )
     {
-        RegExpElement * ret = * concat->getElements( ).begin( );
-        concat->getElements( ).clear( );
-        delete concat;
-        return ret;
+        // check if eps inside iteration's alternation
+        auto eps = find_if( childAlt->getElements( ).begin( ), childAlt->getElements( ).end( ), [ ]( RegExpElement const * const & a ) -> bool {
+            return dynamic_cast<RegExpEpsilon const *>( a );
+        });
+
+        // if no eps
+        if( eps == childAlt->getElements( ).end( ) )
+            return false;
+
+
+        // remove eps from alternation
+        optimized = true;
+        delete * eps;
+        childAlt->getElements( ).erase( eps );
     }
 
-    return concat;
+    return optimized;
 }
 
-RegExpElement * RegExpOptimize::optimize( const Iteration * node )
+/**
+  * optimization V1: \0* = \e
+  * @param node Iteration node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::V1( Iteration * const & node )
 {
-    Iteration* iter = new Iteration( );
-    iter->setElement( optimize( node->getElement( ) ) );
+    // implemented in optimize( Iteration )
 
-    // Targets for optimization: Melichar, 2.87: Rule A10, A11, a** = a*
+    return false;
+}
 
-    // a** = a*
-    Iteration* childIteration;
-    while( ( childIteration = dynamic_cast<Iteration*>( iter->getElement( ) ) ) )
+/**
+  * optimization V2: x* + x = x*
+  * @param node Alternation node
+  * @return bool true if optimization applied else false
+  */
+bool RegExpOptimize::V2( Alternation * const & node )
+{
+    bool optimized = false;
+
+    /*
+     * Bit tricky
+     * We need also to cover the cases like (a+b)* + a + b + c = (a+b)* + c
+     */
+
+    std::list<RegExpElement*> iterElements;
+    // cache  iter elements because of operator invalidation after erase
+    for( const auto & n : node->getElements( ) )
     {
-        iter->setElement( childIteration->getElement( ) );
-        childIteration->setElement( NULL );
-        delete childIteration;
+        Iteration* iter = dynamic_cast<Iteration*>( n );
+        if( iter )
+            iterElements.push_back( iter->getElement( ) );
     }
 
-    // EMPTY* = eps
-    if( dynamic_cast<RegExpEmpty*>( iter->getElement( ) ) )
+    for( const auto & n : iterElements )
     {
-        delete iter;
-        return new RegExpEpsilon( );
+        // if alternation is inside, we need to make sure that every element of alternation is inside node->getElements( ). if so, delete them all
+        Alternation * tmpAlt = dynamic_cast<Alternation*>( n );
+        if( tmpAlt )
+        {
+            bool every = true;
+            for( const auto & altElem : tmpAlt->getElements( ) )
+            {
+                auto it = find_if( node->getElements().begin( ), node->getElements().end( ), [ altElem ]( RegExpElement const * const & a ) -> bool {
+                    return *a == *altElem;
+                });
+
+                if( it == node->getElements( ).end( ) )
+                    every = false;
+            }
+
+            if ( every == true )
+            {
+                optimized = true;
+
+                for( const auto & altElem : tmpAlt->getElements( ) )
+                {
+                    auto it = find_if( node->getElements().begin( ), node->getElements().end( ), [ altElem ]( RegExpElement const * const & a ) -> bool {
+                        return *a == *altElem;
+                    });
+                    assert( it != node->getElements( ).end( ) );
+
+                    delete *it;
+                    node->getElements( ).erase( it );
+                }
+            }
+        }
+
+        // else
+        for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); )
+        {
+            if( *n == **it )
+            {
+                optimized = true;
+
+                delete *it;
+                it = node->getElements( ).erase( it );
+            }
+            else
+            {
+                it ++;
+            }
+        }
     }
 
-    // TODO: A10: x* = EPS + x*x    // prob must be done in Concatenation node
-    // TODO: A11: x* = ( EPS + x )*
+    return optimized;
+}
 
-    return iter;
+bool RegExpOptimize::V5( Alternation * const & node )
+{
+    bool optimized = false;
+    return optimized;
 }
 
-RegExpElement * RegExpOptimize::optimize( const RegExpSymbol * node )
+bool RegExpOptimize::V6( Alternation * const & node )
 {
-    return node->clone( );
+    bool optimized = false;
+    return optimized;
 }
 
-RegExpElement * RegExpOptimize::optimize( const RegExpEmpty * node )
+bool RegExpOptimize::V8( Concatenation * const & node )
 {
-    return node->clone( );
+    bool optimized = false;
+
+    // V8: if \e in h(x) => xx* = x*
+    for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); )
+    {
+        if( it == node->getElements( ).begin( ) ) // no prev
+        {
+            it ++;
+            continue;
+        }
+
+        Iteration* iter = dynamic_cast<Iteration*>( * it );
+        auto prev = std::prev( it );
+
+        if( iter && *( iter->getElement( ) ) == **prev && (*prev)->containsEmptyString( ) )
+        {
+            delete * prev;
+            it = node->getElements( ).erase( prev );
+
+            optimized = true;
+        }
+        else
+            it ++;
+    }
+
+    return optimized;
 }
 
-RegExpElement * RegExpOptimize::optimize( const RegExpEpsilon * node )
+bool RegExpOptimize::V9( Concatenation * const & node )
 {
-    return node->clone( );
+    bool optimized = false;
+
+    return optimized;
+}
+
+
+/*
+RegExpElement * RegExpOptimize::optimize( Iteration * const & node )
+{
+    Iteration* iter = new Iteration( );
+    iter->setElement( optimize( node->getElement( ) ) );
+
+    // V3: x** = x*
+    Iteration* childIter = dynamic_cast<Iteration*>( iter->getElement( ) );
+    if( childIter )
+    {
+        iter->setElement( childIter->getElement( ) );
+        childIter->setElement( NULL );
+        delete childIter;
+    }
+
+    // A10: x* = ( \e + x )*
+    Alternation* childAlt = dynamic_cast<Alternation*>( iter->getElement( ) );
+    if( childAlt )
+    {
+        for( auto it = childAlt->getElements( ).begin( ); it != childAlt->getElements( ).end( ); )
+        {
+            if( dynamic_cast<RegExpEpsilon*>( * it ) )
+            {
+                delete * it;
+                it = childAlt->getElements( ).erase( it );
+            }
+            else
+                it++;
+        }
+    }
+    // TODO: If Alternation size is 0 or 1
+
+    // V1: ( \0 )* = \e
+    RegExpEmpty* childEmpty = dynamic_cast<RegExpEmpty*>( iter->getElement( ) );
+    RegExpEpsilon* childEps= dynamic_cast<RegExpEpsilon*>( iter->getElement( ) );
+    if( childEmpty || childEps )
+    {
+        delete iter;
+        return new RegExpEpsilon( );
+    }
+
+    // V4: ( x + y )* = (x*y*)*
+    Concatenation* childConcat = dynamic_cast<Concatenation*>( iter->getElement( ) );
+    if( childConcat and std::all_of(
+            childConcat->getElements( ).begin( ), childConcat->getElements( ).end( ),
+            []( RegExpElement const * const & a ) -> bool{ return dynamic_cast<const Iteration*>( a ); } ) )
+    {
+        Alternation* alt = new Alternation( );
+        for( const auto & n : childConcat->getElements( ) )
+            alt->getElements( ).push_back( dynamic_cast<Iteration*>( n )->getElement( )->clone( ) );
+
+        delete childConcat;
+        iter->setElement( optimize( alt ) );
+        delete alt;
+    }
+
+    // V10: ( x + y )* = ( x* + y* )*
+    if( childAlt and std::all_of(
+            childAlt->getElements( ).begin( ), childAlt->getElements( ).end( ),
+            []( RegExpElement const * const & a ) -> bool{ return dynamic_cast<const Iteration*>( a ); } ) )
+    {
+        Alternation* alt = new Alternation( );
+        for( const auto & n : childAlt->getElements( ) )
+            alt->getElements( ).push_back( dynamic_cast<Iteration*>( n )->getElement( )->clone( ) );
+
+        delete childAlt;
+        iter->setElement( optimize( alt ) );
+        delete alt;
+    }
+
+    return iter;
 }
+
+*/
\ No newline at end of file
diff --git a/libaregexptree/src/RegExpOptimize.h b/libaregexptree/src/RegExpOptimize.h
index 6408d86bc9..18532f26a8 100644
--- a/libaregexptree/src/RegExpOptimize.h
+++ b/libaregexptree/src/RegExpOptimize.h
@@ -8,22 +8,80 @@
 #ifndef REGEXPOPTIMIZE_H_
 #define REGEXPOPTIMIZE_H_
 
+#include <algorithm>
+#include <functional>
+
+
 #include <regexp/RegExp.h>
 #include <regexp/RegExpElements.h>
 
 #include <AlibException.h>
 
+/*
+ * Optimizes RegExp (or its subtree) using axioms defined in Melichar 2.87
+ * (A1 to A10) and Melichar 2.95(V1 through V6 and V8, V9, V10)
+ * All methods return new tree.
+ *
+ * List of optimization on nodes:
+ *    - Alternation: A1, A2, A3, A4, A9, V2, V5, V6
+ *    - Concatenation: A5, A6, A7, A8, V8, V9
+ *    - Iteration: A10, V1, V3, V4, V10
+ *
+ * Details: ( id : direction of optim. : optim )
+ *  - A1 : -> : x + ( y + z ) = ( x + y ) + z = x + y + z
+ *  - A2 : <- : x + y = y + x
+ *  - A3 : -> : x + \0 = x
+ *  - A4 : -> : x + x = x
+ *  - A5 : -> : x(yz) = (xy)z = xyz
+ *  - A6 : -> : \ex = x\e = x
+ *  - A7 : -> : \0x = x\0 = \0
+ *  - A8 : -> : x( y + z ) = xy + xz
+ *  - A9 : -> : ( x + y )z = xz + yz
+ *  - A10: <- : x* = \e + x*x
+ *  - A11: <- : x* = ( \e + x )*
+ *  - V1 : -> : \0* = \e
+ *  - V2 : -> : x* + x = x*
+ *  - V3 : -> : x** = x*
+ *  - V4 : <- : ( x + y )* = (x*y*)*
+ *  - V5 : <- : x*y = y + x*xy
+ *  - V6 : <- : x*y = y + xx*y
+ *  - V7 :    : bleh
+ *  - V8 : -> : if \e in h(x) => xx* = x*
+ *  - V9 : -> : (xy)*x = x(yx)*
+ *  - V10: <- : ( x + y )* = ( x* + y* )*
+ */
 class RegExpOptimize
 {
 public:
     regexp::RegExp optimize( const regexp::RegExp & regexp );
-    regexp::RegExpElement * optimize( const regexp::RegExpElement* node );
-    regexp::RegExpElement * optimize( const regexp::Alternation * node );
-    regexp::RegExpElement * optimize( const regexp::Concatenation * node );
-    regexp::RegExpElement * optimize( const regexp::Iteration * node );
-    regexp::RegExpElement * optimize( const regexp::RegExpSymbol * node );
-    regexp::RegExpElement * optimize( const regexp::RegExpEpsilon * node );
-    regexp::RegExpElement * optimize( const regexp::RegExpEmpty * node );
+    regexp::RegExpElement * optimize( regexp::RegExpElement const * const & node );
+    regexp::RegExpElement * optimize( regexp::Alternation const * const & node );
+    regexp::RegExpElement * optimize( regexp::Concatenation const * const & node );
+    regexp::RegExpElement * optimize( regexp::Iteration const * const & node );
+    regexp::RegExpElement * optimize( regexp::RegExpSymbol const * const & node );
+    regexp::RegExpElement * optimize( regexp::RegExpEpsilon const * const & node );
+    regexp::RegExpElement * optimize( regexp::RegExpEmpty const * const & node );
+
+
+
+private:
+    bool A1( regexp::Alternation * const & node );
+    bool A2( regexp::Alternation * const & node );
+    bool A3( regexp::Alternation * const & node );
+    bool A4( regexp::Alternation * const & node );
+    bool A5( regexp::Concatenation * const & node );
+    bool A6( regexp::Concatenation * const & node );
+    bool A7( regexp::Concatenation * const & node );
+    bool A8( regexp::Concatenation * const & node );
+    bool A9( regexp::Concatenation * const & node );
+    bool A10( regexp::Alternation * const & node );
+    bool A11( regexp::Iteration * const & node );
+    bool V1( regexp::Iteration * const & node );
+    bool V2( regexp::Alternation * const & node );
+    bool V5( regexp::Alternation * const & node );
+    bool V6( regexp::Alternation * const & node );
+    bool V8( regexp::Concatenation * const & node );
+    bool V9( regexp::Concatenation * const & node );
 };
 
 #endif /* REGEXPNORMALIZE_H_ */
-- 
GitLab