diff --git a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx index ba50634ed769165fab4dc346cd43470f01878727..7a073174a441c6621b40c39e7d31e7a6eca33bbe 100644 --- a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx +++ b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx @@ -166,17 +166,14 @@ UnboundedRegExpElement * RegExpOptimize::optimize( UnboundedRegExpEpsilon const */ bool RegExpOptimize::A1( UnboundedRegExpAlternation * const & node ) const { bool optimized = false; - for( auto it = node->getChildren ( ).begin( ); it != node->getChildren ( ).end( ); ) { if( dynamic_cast < UnboundedRegExpAlternation * > ( it->get ( ) ) ) { - UnboundedRegExpAlternation * childUnboundedRegExpAlternation = static_cast<UnboundedRegExpAlternation *>( it->release() ); + std::smart_ptr < UnboundedRegExpAlternation > childAlt ( static_cast < UnboundedRegExpAlternation * >( it->release() ) ); it = node->getChildren ( ).erase( it ); - it = node->insert( it, std::make_move_iterator(childUnboundedRegExpAlternation->getChildren ( ).begin( )), std::make_move_iterator(childUnboundedRegExpAlternation->getChildren ( ).end( ) )); - - delete childUnboundedRegExpAlternation; + it = node->insert( it, std::make_move_iterator(childAlt->getChildren ( ).begin ( ) ), std::make_move_iterator(childAlt->getChildren ( ).end ( ) ) ); - optimized = true; + optimized = true; } else it ++; } @@ -252,12 +249,10 @@ bool RegExpOptimize::A5( UnboundedRegExpConcatenation * const & node ) const { for( auto it = node->getChildren ( ).begin( ); it != node->getChildren ( ).end( ); ) { if( dynamic_cast<UnboundedRegExpConcatenation *>( it->get() ) ) { - UnboundedRegExpConcatenation * childUnboundedRegExpConcatenation = static_cast<UnboundedRegExpConcatenation *>( it->release() ); + std::smart_ptr < UnboundedRegExpConcatenation > childConcat ( static_cast<UnboundedRegExpConcatenation *>( it->release() ) ); it = node->getChildren ( ).erase( it ); - it = node->insert( it, std::make_move_iterator(childUnboundedRegExpConcatenation->getChildren ( ).begin( )), std::make_move_iterator(childUnboundedRegExpConcatenation->getChildren ( ).end( ) )); - - delete childUnboundedRegExpConcatenation; + it = node->insert( it, std::make_move_iterator(childConcat->getChildren ( ).begin( )), std::make_move_iterator(childConcat->getChildren ( ).end( ) )); optimized = true; } else @@ -295,18 +290,16 @@ bool RegExpOptimize::A6( UnboundedRegExpConcatenation * const & node ) const { * @return bool true if optimization applied else false */ bool RegExpOptimize::A7( UnboundedRegExpConcatenation * const & node ) const { - bool optimized = false; - if( std::any_of( node->getChildren ( ).begin( ), node->getChildren ( ).end( ), []( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool{ return dynamic_cast < UnboundedRegExpEmpty * >( a.get() ); } ) ) { if(node->getChildren ( ).size() == 1) return false; node->getChildren ( ).clear( ); node->pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( new UnboundedRegExpEmpty( ) ) ); - optimized = true; + return true; } - return optimized; + return false; } /** @@ -413,9 +406,8 @@ bool RegExpOptimize::A9( UnboundedRegExpConcatenation * const & /* node */) cons * @param node UnboundedRegExpAlternation node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A10( UnboundedRegExpAlternation * const & /* node */ ) const -{ - /* bool optimized = false, optimizedIter = false; */ +bool RegExpOptimize::A10( UnboundedRegExpAlternation * const & node ) const { + bool optimized = false; /* * problem: @@ -423,64 +415,40 @@ bool RegExpOptimize::A10( UnboundedRegExpAlternation * const & /* node */ ) cons * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node). */ -/* for( auto it = node->elements.begin( ); it != node->elements.end( ); ) - { - optimizedIter = false; + // check if we have some epsilon or iteration left, else nothing to do + auto eps = find_if( node->getElements().begin( ), node->getElements().end( ), [ ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { + return dynamic_cast<UnboundedRegExpEpsilon const *>( a.get() ) || dynamic_cast<UnboundedRegExpIteration const*>( a.get() ); + }); - // check if we have some epsilon or iteration left, else nothing to do - auto eps = find_if( node->elements.begin( ), node->elements.end( ), [ ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { - return dynamic_cast<UnboundedRegExpEpsilon const *>( a.get() ) || dynamic_cast<UnboundedRegExpIteration const*>( a.get() ); - }); - if( eps == node->elements.end( ) ) - break; + if( eps == node->getElements().end( ) ) + return false; - UnboundedRegExpConcatenation const * const & childConcat = dynamic_cast<UnboundedRegExpConcatenation const *>( it->get() ); - if( childConcat ) - { - // if iteration is first element of concatenation - UnboundedRegExpIteration const * const & iter = dynamic_cast<UnboundedRegExpIteration const *>( childConcat->elements.front( ).get() ); + for( unsigned i = 0; i < node->getChildren ( ).size ( ); i++ ) { + UnboundedRegExpConcatenation * childConcat = dynamic_cast<UnboundedRegExpConcatenation *>( node->getChildren ( ) [ i ].get ( ) ); + if( ! childConcat ) + continue; - if( iter ) - { - // concatenation without the iteration node - UnboundedRegExpConcatenation *tmpConcat = dynamic_cast<UnboundedRegExpConcatenation *>( childConcat->clone( ) ); - tmpConcat->elements.erase( tmpConcat->elements.begin( ) ); - UnboundedRegExpElement * tmpConcatOpt = optimize( tmpConcat ); + // if iteration is first element of concatenation + UnboundedRegExpIteration * iter = dynamic_cast<UnboundedRegExpIteration *>( childConcat->getElements().front( ).get() ); + if( ! iter ) + continue; - // check if iteration element is the same subtree as rest of concatenation - if( * iter->element == * tmpConcatOpt ) - { - optimized = optimizedIter = true; - - size_t off = it - node->elements.begin(); - node->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( iter->clone( ) ) ); - it = node->elements.begin() + off; - - it = node->elements.erase( it ); - - // find the eps again - invalidated after prev erase - eps = find_if( node->elements.begin( ), node->elements.end( ), [ ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { - return dynamic_cast<UnboundedRegExpEpsilon const *>( a.get() ); - }); - // if it was eps, delete it - // if it was not the eps but iteration, keep it - if( eps != node->elements.end( ) ) - { - it = node->elements.erase( eps ); - } - } - delete tmpConcat; - delete tmpConcatOpt; - } + // concatenation without the iteration node + UnboundedRegExpConcatenation tmpConcat ( * childConcat ); + tmpConcat.getChildren ( ).erase( tmpConcat.getChildren ( ).begin( ) ); + + UnboundedRegExpElement * tmpConcatOpt = optimize( & tmpConcat ); + // check if the iteration element is the same as the rest of the concatenation + if( iter->getElement() == * tmpConcatOpt ) { + optimized = true; + + node->setChild ( std::move ( childConcat->getElements().front() ), i ); } - if( ! optimizedIter ) - it ++; + delete tmpConcatOpt; } - return optimized; */ - - return false; // FIXME + return optimized; } /** @@ -528,75 +496,43 @@ bool RegExpOptimize::V1( UnboundedRegExpIteration * const &) const * @param node UnboundedRegExpAlternation node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V2( UnboundedRegExpAlternation * const & /* node */ ) const -{ -/* bool optimized = false; */ +bool RegExpOptimize::V2( UnboundedRegExpAlternation * const & node ) const { + bool optimized = false; /* * Bit tricky - * We need also to cover the cases like (a+b)* + a + b + c = (a+b)* + c + * We need also to cover the cases like ( a + b + d )* + ( e )* + a + b + c + e = ( a + b + d )* + ( e )* + c */ -/* std::list<UnboundedRegExpElement*> iterElements; + std::vector < UnboundedRegExpElement * > iterElements; // cache iter elements because of operator invalidation after erase - for( const auto & n : node->elements ) - { - UnboundedRegExpIteration* iter = dynamic_cast<UnboundedRegExpIteration*>( n.get() ); - if( iter ) - iterElements.push_back( iter->element.get() ); - } - - for( const auto & n : iterElements ) - { - // if alternation is inside, we need to make sure that every element of alternation is inside node->elements. if so, delete them all - UnboundedRegExpAlternation * tmpAlt = dynamic_cast<UnboundedRegExpAlternation*>( n ); - if( tmpAlt ) - { - bool every = true; - for( const auto & altElem : tmpAlt->elements ) - { - auto it = find_if( node->elements.begin( ), node->elements.end( ), [ &altElem ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { - return *a == *altElem; - }); - - if( it == node->elements.end( ) ) - every = false; - } - - if ( every == true ) - { - optimized = true; - - for( const auto & altElem : tmpAlt->elements ) - { - auto it = find_if( node->elements.begin( ), node->elements.end( ), [ &altElem ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { - return *a == *altElem; - }); - assert( it != node->elements.end( ) ); + for( const std::smart_ptr < UnboundedRegExpElement > & n : node->getElements ( ) ) { + UnboundedRegExpIteration * iter = dynamic_cast < UnboundedRegExpIteration * > ( n.get ( ) ); + if( iter ) { + UnboundedRegExpAlternation * inner = dynamic_cast < UnboundedRegExpAlternation * > ( iter->getChild ( ).get ( ) ); + if ( inner ) + for ( const std::smart_ptr < UnboundedRegExpElement > & innerElement : inner->getElements ( ) ) + iterElements.push_back ( innerElement.get() ); + else + iterElements.push_back ( iter->getChild ( ).get ( ) ); - node->elements.erase( it ); - } - } } + } - // else - for( auto it = node->elements.begin( ); it != node->elements.end( ); ) - { - if( *n == **it ) - { - optimized = true; + for( UnboundedRegExpElement * n : iterElements ) { + auto it = find_if( node->getChildren().begin( ), node->getChildren().end( ), [ n ] ( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { + return *a == *n; + }); - it = node->elements.erase( it ); - } - else - { - it ++; - } + if( it == node->getChildren().end() ) { + continue; } + + optimized = true; + node->getChildren().erase( it ); } - return optimized; */ - return false; // FIXME + return optimized; } /** @@ -797,32 +733,27 @@ bool RegExpOptimize::V5( UnboundedRegExpAlternation * const & /* node */ ) const * @param node UnboundedRegExpAlternation node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V6( UnboundedRegExpAlternation * const & /* node */ ) const -{ -/* bool optimized = false; */ +bool RegExpOptimize::V6( UnboundedRegExpAlternation * const & node ) const { + bool optimized = false; // reinterpretation: ax*y = ay+axx*y - // so, if we find iter, a = everything that is before it (prefix) - // x = iter's content - // before iter must be exactly iter's content - // y = rest (suffix) + // so, if we find iter + // a = everything that is before it (prefix) + // x = iter's content before iter must be exactly iter's content + // y = rest (suffix) // prefix.xx*.suffix + prefix.suffix = prefix.x*.suffix -/* for( auto itA = node->elements.begin( ); itA != node->elements.end( ); ) - { + for( auto itA = node->getChildren ( ).begin( ); itA != node->getChildren ( ).end( ); ) { UnboundedRegExpConcatenation * concat = dynamic_cast<UnboundedRegExpConcatenation*>( itA->get() ); - if( ! concat ) - { - itA ++; + if( ! concat ) { + ++ itA; continue; } - for( auto itC = std::next( concat->elements.begin( ) ); itC != concat->elements.end( ); ) - { + for( auto itC = std::next( concat->getChildren ( ).begin( ) ); itC != concat->getChildren ( ).end( ); ) { UnboundedRegExpIteration * iter = dynamic_cast<UnboundedRegExpIteration*>( itC->get() ); - if( ! iter ) - { - itC ++; + if( ! iter ) { + ++ itC; continue; } @@ -830,32 +761,24 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation * const & /* node */ ) const auto itStartX = itC; //itStartX points to first x in expression xx*, everything before is therefore prefix - regexp "a" // if iter's element is concat - if( dynamic_cast<UnboundedRegExpConcatenation*>( iter->element.get() ) ) - { - UnboundedRegExpConcatenation * iterConcat = dynamic_cast<UnboundedRegExpConcatenation*>( iter->element.get() ); + UnboundedRegExpConcatenation * iterConcat = dynamic_cast<UnboundedRegExpConcatenation*>( iter->getChild ( ).get() ); + if( iterConcat ) { - if( distance( concat->elements.begin( ), itC ) < (int)iterConcat->elements.size( ) ) - { - itC ++; + if( distance( concat->getChildren ( ).begin( ), itC ) < (int) iterConcat->getChildren ( ).size( ) ) { + ++ itC; continue; } - advance( itStartX, - (int)(iterConcat->elements.size( ) ) ); + advance( itStartX, - (int) iterConcat->getChildren().size( ) ); - if( distance( iterConcat->elements.begin( ), iterConcat->elements.end( ) ) != distance( itStartX, concat->elements.end( ) ) - || - ! equal( iterConcat->elements.begin( ), iterConcat->elements.end( ), itStartX, - []( const std::smart_ptr < UnboundedRegExpElement > & a, const std::smart_ptr < UnboundedRegExpElement > & b ) -> bool{ return *a == *b; } ) ) - { - itC++; + if( iterConcat->getChildren ( ).size( ) != ( unsigned ) distance( itStartX, concat->getChildren ( ).end( ) ) + || ! equal( iterConcat->getChildren().begin( ), iterConcat->getChildren().end( ), itStartX, + []( const std::smart_ptr < UnboundedRegExpElement > & a, const std::smart_ptr < UnboundedRegExpElement > & b ) -> bool{ return *a == *b; } ) ) { + ++ itC; continue; } - } - // else - else - { - if( * iter->element != ** std::prev( itC ) ) - { - itC ++; + } else { + if( * iter->getChild ( ) != ** std::prev( itC ) ) { + ++ itC; continue; } @@ -864,49 +787,39 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation * const & /* node */ ) const // store everything before x as "a" UnboundedRegExpElement * regexpA; - if( concat->elements.begin( ) == itStartX ) - { + if( concat->getChildren().begin( ) == itStartX ) { regexpA = new UnboundedRegExpEpsilon( ); - } - else - { + } else { UnboundedRegExpConcatenation* tmpA = new UnboundedRegExpConcatenation( ); - tmpA->elements.insert( tmpA->elements.end( ), concat->elements.begin( ), itStartX ); + tmpA->insert( tmpA->getChildren().end( ), concat->getChildren().begin( ), itStartX ); regexpA = optimize( tmpA ); - tmpA->elements.clear( ); delete tmpA; } // store everything behind iteration's followup element as "y" UnboundedRegExpElement * regexpY; - if( std::next( itC ) == concat->elements.end( ) ) - { + if( std::next( itC ) == concat->getChildren().end( ) ) { regexpY = new UnboundedRegExpEpsilon( ); - } - else - { + } else { UnboundedRegExpConcatenation* tmpY = new UnboundedRegExpConcatenation( ); - tmpY->elements.insert( tmpY->elements.end( ), std::next( itC ), concat->elements.end( ) ); + tmpY->insert( tmpY->getChildren().end( ), std::next( itC ), concat->getChildren ( ).end( ) ); regexpY = optimize( tmpY ); - tmpY->elements.clear( ); delete tmpY; } - // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->elements ) + // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->getElements() ) UnboundedRegExpConcatenation* tmpAY = new UnboundedRegExpConcatenation( ); - tmpAY->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); - tmpAY->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); + tmpAY->pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); + tmpAY->pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); UnboundedRegExpElement * regexpAY = optimize( tmpAY ); - regexpA = tmpAY->elements[0].release(); - regexpY = tmpAY->elements[1].release(); - tmpAY->elements.clear( ); + regexpA = tmpAY->getChildren()[0].release(); + regexpY = tmpAY->getChildren()[1].release(); delete tmpAY; - auto iterAY = find_if( node->elements.begin( ), node->elements.end( ), [ regexpAY ] ( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool{ return *a == *regexpAY; } ); + auto iterAY = find_if( node->getChildren().begin( ), node->getChildren().end( ), [ regexpAY ] ( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool{ return *a == *regexpAY; } ); delete regexpAY; - if( iterAY == node->elements.end( ) ) - { - itC ++; + if( iterAY == node->getChildren().end( ) ) { + ++ itC; delete regexpA; delete regexpY; @@ -914,34 +827,30 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation * const & /* node */ ) const } // if AY exists, then we can simply do this: - //iterator invalidated, need to backup concat node + // iterator invalidated, need to backup concat node UnboundedRegExpElement * tmpItA = itA->get(); - node->elements.erase( iterAY ); + node->getChildren().erase( iterAY ); - // iterator invalidated, need to recall before erase - itA = find_if( node->elements.begin( ), node->elements.end( ), [ tmpItA ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { return *a == *tmpItA; } ); + // iterator invalidated, need to recall before erase + itA = find_if( node->getChildren().begin( ), node->getChildren().end( ), [ tmpItA ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { return *a == *tmpItA; } ); UnboundedRegExpConcatenation * tmpAltered = new UnboundedRegExpConcatenation( ); - tmpAltered->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); - tmpAltered->elements.push_back( * itC ); - tmpAltered->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); + tmpAltered->pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); + tmpAltered->pushBackChild( * itC ); + tmpAltered->pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); UnboundedRegExpElement * regexpAltered = optimize( tmpAltered ); - tmpAltered->elements.clear( ); delete tmpAltered; - itA = node->elements.erase( itA ); - - node->elements.insert( itA, std::smart_ptr < UnboundedRegExpElement > ( regexpAltered ) ); + node->setChild( std::smart_ptr < UnboundedRegExpElement > ( regexpAltered ), itA ); optimized = true; break; } - itA ++; + ++ itA; } - return optimized; */ - return false; // FIXME + return optimized; } /** @@ -949,80 +858,60 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation * const & /* node */ ) const * @param node UnboundedRegExpConcatenation node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V8( UnboundedRegExpConcatenation * const & /* node */ ) const -{ -/* bool optimized = false; */ +bool RegExpOptimize::V8( UnboundedRegExpConcatenation * const & node ) const { + bool optimized = false; // interpretation: if there is iteration in concatenation node, and element of iteration contains eps and is straight before this iteration, then this element can be omitted -/* for( auto it = next( node->elements.begin( ) ); it != node->elements.end( ); ) - { + if ( node->getChildren ( ).size ( ) == 0 ) + return false; + + for( auto it = next ( node->getChildren ( ).begin( ) ); it != node->getChildren ( ).end( ); ) { UnboundedRegExpIteration* iter = dynamic_cast<UnboundedRegExpIteration*>( it->get() ); - if( ! iter ) - { + if( ! iter ) { it ++; continue; } // if element of iteration is concatenation, we need to check this specially - UnboundedRegExpConcatenation * concat = dynamic_cast<UnboundedRegExpConcatenation*>( iter->element.get() ); + UnboundedRegExpConcatenation * concat = dynamic_cast<UnboundedRegExpConcatenation*>( iter->getChild ( ).get() ); - if( concat ) - { + if( concat ) { // check if not out of bounds - if( distance( node->elements.begin( ), it ) < distance( concat->elements.begin(), concat->elements.end() ) ) - { + if( ( unsigned ) distance( node->getChildren ( ).begin( ), it ) < concat->getChildren ( ).size ( ) ) { it ++; continue; } //FIXME: int cast auto it2 = it; - advance( it2, - (int)concat->elements.size( ) ); + advance( it2, - (int) concat->getChildren().size( ) ); if( regexp::properties::RegExpEpsilon::languageContainsEpsilon(*concat) && - distance( concat->elements.begin( ), concat->elements.end( )) == distance ( it2, node->elements.end( ) ) && - equal( concat->elements.begin( ), concat->elements.end( ), it2, [] ( const std::smart_ptr < UnboundedRegExpElement > & a, const std::smart_ptr < UnboundedRegExpElement > & b ) -> bool { return *a == *b; } ) ) - { + concat->getChildren().size ( ) == ( unsigned ) distance ( it2, node->getChildren ( ).end( ) ) && + equal( concat->getChildren ( ).begin( ), concat->getChildren ( ).end( ), it2, [] ( const std::smart_ptr < UnboundedRegExpElement > & a, const std::smart_ptr < UnboundedRegExpElement > & b ) -> bool { return *a == *b; } ) ) { optimized = true; - it = node->elements.erase( it2, it ); - } - else - { - it ++; - } - } - // else - else - { - if( it == node->elements.begin( ) ) - { - it++; - continue; - } - - auto prev = std::prev( it ); + it = node->getChildren().erase( it2, it ); + } else + ++ it; + } else { + auto prev = std::prev ( it ); - if( regexp::properties::RegExpEpsilon::languageContainsEpsilon(*(iter->element)) && *( iter->element ) == **prev ) - { - it = node->elements.erase( prev ); + if( regexp::properties::RegExpEpsilon::languageContainsEpsilon(iter->getElement()) && iter->getElement ( ) == **prev ) { + it = node->getChildren().erase( prev ); optimized = true; // in case xxx*, we need to stay on the iter element, not to go behind it - if( it != node->elements.begin( ) ) + if( it != node->getChildren().begin( ) ) it = std::prev( it ); - } - else - { - it ++; - } + } else + ++ it; } } - return optimized;*/ - return false; // FIXME + return optimized; } /**