From cb103a4aed886d4f03cf7bfd527f8b3157efeff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Sun, 4 May 2014 22:28:40 +0200 Subject: [PATCH] libaregexptree: optim: All alternations but V5,V6 --- libaregexptree/src/RegExpOptimize.cpp | 645 ++++++++++++++++++++++---- libaregexptree/src/RegExpOptimize.h | 72 ++- 2 files changed, 608 insertions(+), 109 deletions(-) diff --git a/libaregexptree/src/RegExpOptimize.cpp b/libaregexptree/src/RegExpOptimize.cpp index 36ea5c56ea..948e0016c2 100644 --- a/libaregexptree/src/RegExpOptimize.cpp +++ b/libaregexptree/src/RegExpOptimize.cpp @@ -7,211 +7,652 @@ #include "RegExpOptimize.h" +#include <cassert> +#include <iostream> + using namespace alib; using namespace regexp; -RegExp RegExpOptimize::optimize( const RegExp & regexp ) +RegExp RegExpOptimize::optimize( RegExp const & regexp ) { RegExpElement* optimized = optimize( regexp.getRegExp( ) ); + RegExp ret; ret.setRegExp( optimized ); return ret; } -RegExpElement* RegExpOptimize::optimize( const RegExpElement * node ) +RegExpElement* RegExpOptimize::optimize( RegExpElement const * const & node ) { const Alternation * alternation = dynamic_cast<const Alternation*>( node ); - const Concatenation * concatenation = dynamic_cast<const Concatenation*>( node ); - const Iteration * iteration = dynamic_cast<const Iteration*>( node ); - const RegExpSymbol * symbol = dynamic_cast<const RegExpSymbol*>( node ); - const RegExpEmpty * empty= dynamic_cast<const RegExpEmpty*>( node ); - const RegExpEpsilon * eps = dynamic_cast<const RegExpEpsilon*>( node ); - if( alternation ) return optimize( alternation ); + + const Concatenation * concatenation = dynamic_cast<const Concatenation*>( node ); if( concatenation ) return optimize( concatenation ); + + const Iteration * iteration = dynamic_cast<const Iteration*>( node ); if( iteration ) return optimize( iteration ); + + const RegExpSymbol * symbol = dynamic_cast<const RegExpSymbol*>( node ); if( symbol ) return optimize( symbol ); + + const RegExpEmpty * empty= dynamic_cast<const RegExpEmpty*>( node ); if( empty ) return optimize( empty ); + + const RegExpEpsilon * eps = dynamic_cast<const RegExpEpsilon*>( node ); if( eps ) return optimize( eps ); throw AlibException( "RegExpOptimize::optimize - unknown RegExpElement node" ); } -RegExpElement * RegExpOptimize::optimize( const Alternation * node ) + +RegExpElement * RegExpOptimize::optimize( Alternation const * const & node ) { Alternation* alt = new Alternation( ); for( const auto & child : node->getElements( ) ) alt->getElements( ).push_back( optimize( child ) ); - // Targets for optimization: Melichar, 2.87: Rule A1, A2, A3, A4, A8, A9 + // optimize while you can + while( A1( alt ) || A2( alt ) || A3( alt ) || A4( alt ) || A10( alt ) || V2( alt ) || V5( alt ) || V6( alt ) ); - // A1: x + ( y + z ) = ( x + y ) + z - for( auto it = alt->getElements( ).begin( ) ; it != alt->getElements( ).end( ); ) + if( alt->getElements( ).size( ) == 1 ) { - Alternation * childAlt = dynamic_cast<Alternation*> ( * it ); - if( childAlt ) + RegExpElement* ret = alt->getElements( ).front( ); + alt->getElements( ).pop_front( ); + delete alt; + return ret; + } + + assert( alt->getElements( ).size( ) > 0 ); + return alt; +} + +RegExpElement * RegExpOptimize::optimize( Concatenation const * const & node ) +{ + Concatenation* concat = new Concatenation( ); + + for( const auto & child : node->getElements( ) ) + concat->getElements( ).push_back( optimize( child ) ); + + /* + while( A5( concat ) || A6( concat ) || A8( concat ) || A9( concat ) || V8( concat ) || V9( concat ) ); + */ + + if( concat->getElements( ).size( ) == 1 ) + { + RegExpElement* ret = concat->getElements( ).front( ); + concat->getElements( ).pop_front( ); + delete concat; + return ret; + } + + assert( concat->getElements( ).size( ) > 0 ); + return concat; +} + +RegExpElement * RegExpOptimize::optimize( Iteration const * const & node ) +{ + Iteration* iter = new Iteration( ); + iter->setElement( optimize( node->getElement( ) ) ); + return iter; +} + +RegExpElement * RegExpOptimize::optimize( RegExpSymbol const * const & node ) +{ + return node->clone( ); +} + +RegExpElement * RegExpOptimize::optimize( RegExpEmpty const * const & node ) +{ + return node->clone( ); +} + +RegExpElement * RegExpOptimize::optimize( RegExpEpsilon const * const & node ) +{ + return node->clone( ); +} + +// ---------------------------------------------------------------------------- + +/** + * optimization A1: x + ( y + z ) = ( x + y ) + z = x + y + z + * @param node Alternation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A1( Alternation * const & node ) +{ + bool optimized = false; + + for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); ) + { + Alternation * const & childAlternation = dynamic_cast<Alternation *>( * it ); + + if( childAlternation ) { - auto it2 = it; - it2 ++; + it = node->getElements( ).erase( it ); - alt->getElements( ).insert( it2, childAlt->getElements( ).begin( ), childAlt->getElements( ).end( ) ); + std::copy( childAlternation->getElements( ).begin( ), childAlternation->getElements( ).end( ), inserter( node->getElements( ), it ) ); + childAlternation->getElements( ).clear( ); + delete childAlternation; - childAlt->getElements( ).clear( ); - delete childAlt; - it = alt->getElements( ).erase( it ); + optimized = true; } else + { it ++; + } } - // A2: x + y = y + x - alt->getElements().sort([](RegExpElement const * const & a, RegExpElement const * const & b) -> bool { - return *a < *b; - }); + return optimized; +} - // A3: x + EMPTY = x - for( auto it = alt->getElements( ).begin( ) ; it != alt->getElements( ).end( ); ) +/** + * optimization A2: x + y = y + x (sort) + * @param node Alternation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A2( Alternation * const & node ) +{ + std::function<bool( RegExpElement const * const & a, RegExpElement const * const & b )> cmp = [ ]( RegExpElement const * const & a, RegExpElement const * const & b ) -> bool { return *a < *b; }; + + if( std::is_sorted( node->getElements( ).begin( ), node->getElements( ).end( ), cmp ) ) + return false; + + node->getElements( ).sort( cmp ); + return true; +} + +/** + * optimization A3: x + \0 = x + * @param node Alternation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A3( Alternation * const & node ) +{ + bool optimized = false; + + // input can be \0 + \0, so at least one element must be preserved + + // FIXME: alib2 uses shared_ptrs, rewrite this using remove_if then + + for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); ) { - if ( dynamic_cast<RegExpEmpty*>( *it ) ) + RegExpEmpty const * const & empty = dynamic_cast<RegExpEmpty const *>( * it ); + + if( empty && node->getElements( ).size( ) > 1 ) { - delete *it; - it = alt->getElements( ).erase( it ); + it = node->getElements( ).erase( it ); + delete empty; + + optimized = true; } else + { it ++; + } } - // TODO: A4: x + x = x - // depends on RegExpElement::operator< - + return optimized; +} - // Empty subtree and alternation of single node - if( alt->getElements( ).size( ) == 0 ) +/** + * optimization A4: x + x = x + * @param node Alternation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A4( Alternation * const & node ) +{ + /* + * two ways of implementing this opitimization: + * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts... + * - check every element against other ( O(n*n) ) + * + * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2 + */ + + // uncomment if smart ptrs used + // node->getElements( ).unique( [ ]( RegExpElement const * const & a, RegExpElement const * const & b ) -> bool { + // return *a == *b; + // } ); + + bool optimized = false;; + for( auto it = std::next( node->getElements( ).begin( ) ); it != node->getElements( ).end( ); ) { - delete alt; - return new RegExpEmpty( ); + if ( ** it == ** std::prev( it ) ) + { + delete * it; + it = node->getElements( ).erase( it ); + optimized = true; + } + else + { + it ++; + } } - else if( alt->getElements( ).size( ) == 1 ) + + return optimized; +} + +/** + * optimization A5: x.(y.z) = (x.y).z = x.y.z + * @param node Concatenation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A5( Concatenation * const & node ) +{ + bool optimized = false; + + for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); ) { - RegExpElement * ret = * alt->getElements( ).begin( ); - alt->getElements( ).clear( ); - delete alt; - return ret; + Concatenation * const & childConcatenation = dynamic_cast<Concatenation *>( * it ); + + if( childConcatenation ) + { + it = node->getElements( ).erase( it ); + std::copy( childConcatenation->getElements( ).begin( ), childConcatenation->getElements( ).end( ), inserter( node->getElements( ), it ) ); + childConcatenation->getElements( ).clear( ); + delete childConcatenation; + + optimized = true; + } + else + it ++; } - return alt; + return optimized; } -RegExpElement * RegExpOptimize::optimize( const Concatenation * node ) +/** + * optimization A6: \e.x = x.\e = x + * @param node Concatenation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A6( Concatenation * const & node ) { - Concatenation* concat = new Concatenation( ); - - for( const auto & child : node->getElements( ) ) - concat->getElements( ).push_back( optimize( child ) ); + bool optimized = false; - // Targets for optimization: Melichar, 2.87: Rule A5, A6, A7 + // FIXME: alib2 uses shared_ptrs, rewrite this using remove_if then - // A5: a.(b.c) = (a.b).c - for( auto it = concat->getElements( ).begin( ) ; it != concat->getElements( ).end( ) ; ) + for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); ) { - Concatenation * childConcat = dynamic_cast<Concatenation*>( * it ); - if( childConcat ) + RegExpEpsilon* epsilon = dynamic_cast<RegExpEpsilon*>( * it ); + if( epsilon && node->getElements( ).size( ) > 1 ) { - auto it2 = it; - it2 ++; - - concat->getElements( ).insert( it2, childConcat->getElements( ).begin( ), childConcat->getElements( ).end( ) ); + delete * it; + it = node->getElements( ).erase( it ); - childConcat->getElements( ).clear( ); - delete childConcat; - it = concat->getElements( ).erase( it ); + optimized = true; } else it ++; } - // A7: EMPTY.x = EMPTY - for( const auto & childNode : concat->getElements( ) ) + return optimized; +} + +/** + * optimization A7: \0.x = x.\0 = \0 + * @param node Concatenation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A7( Concatenation * const & node ) +{ + bool optimized = false; + + // FIXME: alib2 uses shared_ptrs, rewrite this using remove_if then + + if( std::any_of( node->getElements( ).begin( ), node->getElements( ).end( ), []( RegExpElement const * const & a ) -> bool{ + return dynamic_cast<RegExpEmpty const *>( a ); + })) { - if( dynamic_cast<RegExpEmpty*>( childNode ) ) - { - delete concat; - return new RegExpEmpty( ); - } + for( auto const& child : node->getElements( ) ) + delete child; + + node->getElements( ).clear( ); + node->getElements( ).push_back( new RegExpEmpty( ) ); + + optimized = true; } - // A6: EPS.x = x.EPS = x - for( auto it = concat->getElements( ).begin( ) ; it != concat->getElements( ).end( ); ) + return optimized; +} + +/** + * optimization A9: x.(y+z) = x.y + x.z + * @param node Concatenation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A8( Concatenation * const & node ) +{ + bool optimized = false; + return optimized; +} + +/** + * optimization A9: (x+y).z = x.z + y.z + * @param node Concatenation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A9( Concatenation * const & node ) +{ + bool optimized = false; + return optimized; +} + + +/** + * optimization A10: x* = \e + x*x + * @param node Alternation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A10( Alternation * const & node ) +{ + bool optimized = false, optimizedIter = false; + + for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); ) { - // EPS.EPS.EPS.EPS = EPS - if( dynamic_cast<RegExpEpsilon*>( *it ) && concat->getElements( ).size( ) > 1 ) + optimizedIter = false; + + // check if we have some epsilon left, else nothing to do + auto eps = find_if( node->getElements( ).begin( ), node->getElements( ).end( ), [ ]( RegExpElement const * const & a ) -> bool { + return dynamic_cast<RegExpEpsilon const *>( a ); + }); + if( eps == node->getElements( ).end( ) ) + break; + + Concatenation const * const & childConcat = dynamic_cast<Concatenation const *>( *it ); + if( childConcat ) { - delete * it; - it = concat->getElements( ).erase( it ); + // if iteration is first element of concatenation + Iteration const * const & iter = dynamic_cast<Iteration const *>( childConcat->getElements( ).front( ) ); + + if( iter ) + { + // concatenation without the iteration node + Concatenation *tmpConcat = dynamic_cast<Concatenation *>( childConcat->clone( ) ); + delete tmpConcat->getElements( ).front( ); + tmpConcat->getElements( ).pop_front( ); + RegExpElement * tmpConcatOpt = optimize( tmpConcat ); + + // check if iteration element is the same subtree as rest of concatenation + if( * iter->getElement( ) == * tmpConcatOpt ) + { + optimized = optimizedIter = true; + + node->getElements( ).push_back( iter->clone( ) ); + + delete childConcat; + it = node->getElements( ).erase( it ); + + // find the eps again - invalidated after prev erase + eps = find_if( node->getElements( ).begin( ), node->getElements( ).end( ), [ ]( RegExpElement const * const & a ) -> bool { + return dynamic_cast<RegExpEpsilon const *>( a ); + }); + delete *eps; + it = node->getElements( ).erase( eps ); + } + delete tmpConcat; + delete tmpConcatOpt; + } } - else + + if( ! optimizedIter ) it ++; } - if( concat->getElements( ).size( ) == 1 ) + return optimized; +} + +/** + * optimization A11: x* = (\e + x)* + * @param node Iteration node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::A11( Iteration * const & node ) +{ + bool optimized = false; + + Alternation * const & childAlt = dynamic_cast<Alternation *>( node->getElement( ) ); + + if( childAlt ) { - RegExpElement * ret = * concat->getElements( ).begin( ); - concat->getElements( ).clear( ); - delete concat; - return ret; + // check if eps inside iteration's alternation + auto eps = find_if( childAlt->getElements( ).begin( ), childAlt->getElements( ).end( ), [ ]( RegExpElement const * const & a ) -> bool { + return dynamic_cast<RegExpEpsilon const *>( a ); + }); + + // if no eps + if( eps == childAlt->getElements( ).end( ) ) + return false; + + + // remove eps from alternation + optimized = true; + delete * eps; + childAlt->getElements( ).erase( eps ); } - return concat; + return optimized; } -RegExpElement * RegExpOptimize::optimize( const Iteration * node ) +/** + * optimization V1: \0* = \e + * @param node Iteration node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::V1( Iteration * const & node ) { - Iteration* iter = new Iteration( ); - iter->setElement( optimize( node->getElement( ) ) ); + // implemented in optimize( Iteration ) - // Targets for optimization: Melichar, 2.87: Rule A10, A11, a** = a* + return false; +} - // a** = a* - Iteration* childIteration; - while( ( childIteration = dynamic_cast<Iteration*>( iter->getElement( ) ) ) ) +/** + * optimization V2: x* + x = x* + * @param node Alternation node + * @return bool true if optimization applied else false + */ +bool RegExpOptimize::V2( Alternation * const & node ) +{ + bool optimized = false; + + /* + * Bit tricky + * We need also to cover the cases like (a+b)* + a + b + c = (a+b)* + c + */ + + std::list<RegExpElement*> iterElements; + // cache iter elements because of operator invalidation after erase + for( const auto & n : node->getElements( ) ) { - iter->setElement( childIteration->getElement( ) ); - childIteration->setElement( NULL ); - delete childIteration; + Iteration* iter = dynamic_cast<Iteration*>( n ); + if( iter ) + iterElements.push_back( iter->getElement( ) ); } - // EMPTY* = eps - if( dynamic_cast<RegExpEmpty*>( iter->getElement( ) ) ) + for( const auto & n : iterElements ) { - delete iter; - return new RegExpEpsilon( ); + // if alternation is inside, we need to make sure that every element of alternation is inside node->getElements( ). if so, delete them all + Alternation * tmpAlt = dynamic_cast<Alternation*>( n ); + if( tmpAlt ) + { + bool every = true; + for( const auto & altElem : tmpAlt->getElements( ) ) + { + auto it = find_if( node->getElements().begin( ), node->getElements().end( ), [ altElem ]( RegExpElement const * const & a ) -> bool { + return *a == *altElem; + }); + + if( it == node->getElements( ).end( ) ) + every = false; + } + + if ( every == true ) + { + optimized = true; + + for( const auto & altElem : tmpAlt->getElements( ) ) + { + auto it = find_if( node->getElements().begin( ), node->getElements().end( ), [ altElem ]( RegExpElement const * const & a ) -> bool { + return *a == *altElem; + }); + assert( it != node->getElements( ).end( ) ); + + delete *it; + node->getElements( ).erase( it ); + } + } + } + + // else + for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); ) + { + if( *n == **it ) + { + optimized = true; + + delete *it; + it = node->getElements( ).erase( it ); + } + else + { + it ++; + } + } } - // TODO: A10: x* = EPS + x*x // prob must be done in Concatenation node - // TODO: A11: x* = ( EPS + x )* + return optimized; +} - return iter; +bool RegExpOptimize::V5( Alternation * const & node ) +{ + bool optimized = false; + return optimized; } -RegExpElement * RegExpOptimize::optimize( const RegExpSymbol * node ) +bool RegExpOptimize::V6( Alternation * const & node ) { - return node->clone( ); + bool optimized = false; + return optimized; } -RegExpElement * RegExpOptimize::optimize( const RegExpEmpty * node ) +bool RegExpOptimize::V8( Concatenation * const & node ) { - return node->clone( ); + bool optimized = false; + + // V8: if \e in h(x) => xx* = x* + for( auto it = node->getElements( ).begin( ); it != node->getElements( ).end( ); ) + { + if( it == node->getElements( ).begin( ) ) // no prev + { + it ++; + continue; + } + + Iteration* iter = dynamic_cast<Iteration*>( * it ); + auto prev = std::prev( it ); + + if( iter && *( iter->getElement( ) ) == **prev && (*prev)->containsEmptyString( ) ) + { + delete * prev; + it = node->getElements( ).erase( prev ); + + optimized = true; + } + else + it ++; + } + + return optimized; } -RegExpElement * RegExpOptimize::optimize( const RegExpEpsilon * node ) +bool RegExpOptimize::V9( Concatenation * const & node ) { - return node->clone( ); + bool optimized = false; + + return optimized; +} + + +/* +RegExpElement * RegExpOptimize::optimize( Iteration * const & node ) +{ + Iteration* iter = new Iteration( ); + iter->setElement( optimize( node->getElement( ) ) ); + + // V3: x** = x* + Iteration* childIter = dynamic_cast<Iteration*>( iter->getElement( ) ); + if( childIter ) + { + iter->setElement( childIter->getElement( ) ); + childIter->setElement( NULL ); + delete childIter; + } + + // A10: x* = ( \e + x )* + Alternation* childAlt = dynamic_cast<Alternation*>( iter->getElement( ) ); + if( childAlt ) + { + for( auto it = childAlt->getElements( ).begin( ); it != childAlt->getElements( ).end( ); ) + { + if( dynamic_cast<RegExpEpsilon*>( * it ) ) + { + delete * it; + it = childAlt->getElements( ).erase( it ); + } + else + it++; + } + } + // TODO: If Alternation size is 0 or 1 + + // V1: ( \0 )* = \e + RegExpEmpty* childEmpty = dynamic_cast<RegExpEmpty*>( iter->getElement( ) ); + RegExpEpsilon* childEps= dynamic_cast<RegExpEpsilon*>( iter->getElement( ) ); + if( childEmpty || childEps ) + { + delete iter; + return new RegExpEpsilon( ); + } + + // V4: ( x + y )* = (x*y*)* + Concatenation* childConcat = dynamic_cast<Concatenation*>( iter->getElement( ) ); + if( childConcat and std::all_of( + childConcat->getElements( ).begin( ), childConcat->getElements( ).end( ), + []( RegExpElement const * const & a ) -> bool{ return dynamic_cast<const Iteration*>( a ); } ) ) + { + Alternation* alt = new Alternation( ); + for( const auto & n : childConcat->getElements( ) ) + alt->getElements( ).push_back( dynamic_cast<Iteration*>( n )->getElement( )->clone( ) ); + + delete childConcat; + iter->setElement( optimize( alt ) ); + delete alt; + } + + // V10: ( x + y )* = ( x* + y* )* + if( childAlt and std::all_of( + childAlt->getElements( ).begin( ), childAlt->getElements( ).end( ), + []( RegExpElement const * const & a ) -> bool{ return dynamic_cast<const Iteration*>( a ); } ) ) + { + Alternation* alt = new Alternation( ); + for( const auto & n : childAlt->getElements( ) ) + alt->getElements( ).push_back( dynamic_cast<Iteration*>( n )->getElement( )->clone( ) ); + + delete childAlt; + iter->setElement( optimize( alt ) ); + delete alt; + } + + return iter; } + +*/ \ No newline at end of file diff --git a/libaregexptree/src/RegExpOptimize.h b/libaregexptree/src/RegExpOptimize.h index 6408d86bc9..18532f26a8 100644 --- a/libaregexptree/src/RegExpOptimize.h +++ b/libaregexptree/src/RegExpOptimize.h @@ -8,22 +8,80 @@ #ifndef REGEXPOPTIMIZE_H_ #define REGEXPOPTIMIZE_H_ +#include <algorithm> +#include <functional> + + #include <regexp/RegExp.h> #include <regexp/RegExpElements.h> #include <AlibException.h> +/* + * Optimizes RegExp (or its subtree) using axioms defined in Melichar 2.87 + * (A1 to A10) and Melichar 2.95(V1 through V6 and V8, V9, V10) + * All methods return new tree. + * + * List of optimization on nodes: + * - Alternation: A1, A2, A3, A4, A9, V2, V5, V6 + * - Concatenation: A5, A6, A7, A8, V8, V9 + * - Iteration: A10, V1, V3, V4, V10 + * + * Details: ( id : direction of optim. : optim ) + * - A1 : -> : x + ( y + z ) = ( x + y ) + z = x + y + z + * - A2 : <- : x + y = y + x + * - A3 : -> : x + \0 = x + * - A4 : -> : x + x = x + * - A5 : -> : x(yz) = (xy)z = xyz + * - A6 : -> : \ex = x\e = x + * - A7 : -> : \0x = x\0 = \0 + * - A8 : -> : x( y + z ) = xy + xz + * - A9 : -> : ( x + y )z = xz + yz + * - A10: <- : x* = \e + x*x + * - A11: <- : x* = ( \e + x )* + * - V1 : -> : \0* = \e + * - V2 : -> : x* + x = x* + * - V3 : -> : x** = x* + * - V4 : <- : ( x + y )* = (x*y*)* + * - V5 : <- : x*y = y + x*xy + * - V6 : <- : x*y = y + xx*y + * - V7 : : bleh + * - V8 : -> : if \e in h(x) => xx* = x* + * - V9 : -> : (xy)*x = x(yx)* + * - V10: <- : ( x + y )* = ( x* + y* )* + */ class RegExpOptimize { public: regexp::RegExp optimize( const regexp::RegExp & regexp ); - regexp::RegExpElement * optimize( const regexp::RegExpElement* node ); - regexp::RegExpElement * optimize( const regexp::Alternation * node ); - regexp::RegExpElement * optimize( const regexp::Concatenation * node ); - regexp::RegExpElement * optimize( const regexp::Iteration * node ); - regexp::RegExpElement * optimize( const regexp::RegExpSymbol * node ); - regexp::RegExpElement * optimize( const regexp::RegExpEpsilon * node ); - regexp::RegExpElement * optimize( const regexp::RegExpEmpty * node ); + regexp::RegExpElement * optimize( regexp::RegExpElement const * const & node ); + regexp::RegExpElement * optimize( regexp::Alternation const * const & node ); + regexp::RegExpElement * optimize( regexp::Concatenation const * const & node ); + regexp::RegExpElement * optimize( regexp::Iteration const * const & node ); + regexp::RegExpElement * optimize( regexp::RegExpSymbol const * const & node ); + regexp::RegExpElement * optimize( regexp::RegExpEpsilon const * const & node ); + regexp::RegExpElement * optimize( regexp::RegExpEmpty const * const & node ); + + + +private: + bool A1( regexp::Alternation * const & node ); + bool A2( regexp::Alternation * const & node ); + bool A3( regexp::Alternation * const & node ); + bool A4( regexp::Alternation * const & node ); + bool A5( regexp::Concatenation * const & node ); + bool A6( regexp::Concatenation * const & node ); + bool A7( regexp::Concatenation * const & node ); + bool A8( regexp::Concatenation * const & node ); + bool A9( regexp::Concatenation * const & node ); + bool A10( regexp::Alternation * const & node ); + bool A11( regexp::Iteration * const & node ); + bool V1( regexp::Iteration * const & node ); + bool V2( regexp::Alternation * const & node ); + bool V5( regexp::Alternation * const & node ); + bool V6( regexp::Alternation * const & node ); + bool V8( regexp::Concatenation * const & node ); + bool V9( regexp::Concatenation * const & node ); }; #endif /* REGEXPNORMALIZE_H_ */ -- GitLab