diff --git a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx index e6fe1e05908b54ceaec5be0fb16ed85be10e35b6..0de8982278e1a8931c6a8d28c613e61220688c82 100644 --- a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx +++ b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx @@ -218,7 +218,7 @@ bool RegExpOptimize::A4( UnboundedRegExpAlternation & node ) { auto cmp = [ ]( const std::smart_ptr < UnboundedRegExpElement > & a, const std::smart_ptr < UnboundedRegExpElement > & b ) -> bool { return *a == *b; }; size_t size = node.getChildren ( ).size ( ); - std::unique ( node.getChildren ( ).begin ( ), node.getChildren ( ).end ( ), cmp); + node.getChildren ( ).erase ( std::unique ( node.getChildren ( ).begin ( ), node.getChildren ( ).end ( ), cmp), node.getChildren ( ).end( ) ); return size != node.getChildren ( ).size ( ); } @@ -556,31 +556,27 @@ bool RegExpOptimize::V4( UnboundedRegExpIteration & node ) { * @param node UnboundedRegExpAlternation node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V5( UnboundedRegExpAlternation & /* node */ ) { -/* bool optimized = false; */ +bool RegExpOptimize::V5( UnboundedRegExpAlternation & node ) { + bool optimized = false; // reinterpretation: ax*y = ay+ax*xy - // so, if we find iter, a = everything that is before it (prefix) - // x = iter's content - // behind iter must be exactly iter's content - // y = rest (suffix) + // so, if we find iter, + // a = everything that is before it (prefix) + // x = iter's content behind iter must be exactly iter's content + // y = rest (suffix) // prefix.x*x.suffix + prefix.suffix = prefix.x*.suffix -/* for( auto itA = node->elements.begin( ); itA != node->elements.end( ); ) - { + for( auto itA = node.getChildren().begin( ); itA != node.getChildren().end( ); ) { UnboundedRegExpConcatenation * concat = dynamic_cast<UnboundedRegExpConcatenation*>( itA->get() ); - if( ! concat ) - { - itA ++; + if( ! concat ) { + ++ itA; continue; } - for( auto itC = concat->elements.begin( ); itC != std::prev( concat->elements.end( ) ); ) - { + for( auto itC = concat->getChildren().begin( ); itC != std::prev( concat->getChildren().end( ) ); ) { UnboundedRegExpIteration * iter = dynamic_cast<UnboundedRegExpIteration*>( itC->get() ); - if( ! iter ) - { - itC ++; + if( ! iter ) { + ++ itC; continue; } @@ -588,32 +584,19 @@ bool RegExpOptimize::V5( UnboundedRegExpAlternation & /* node */ ) { auto itStartY = std::next( itC ); //itStartY points to y in expression x*xy // if iter's element is concat - if( dynamic_cast<UnboundedRegExpConcatenation*>( iter->element.get() ) ) - { - UnboundedRegExpConcatenation * iterConcat = dynamic_cast<UnboundedRegExpConcatenation*>( iter->element.get() ); - - // std::cout << "....." << std::endl; - // std::cout << RegExp( concat ) << std::endl; - // std::cout << RegExp( iterConcat ) << std::endl; - // UnboundedRegExpConcatenation * tmp = new UnboundedRegExpConcatenation( ); - // tmp->elements.insert( tmp->elements.end( ), std::next( itC ), concat->elements.end( ) ); - // std::cout << RegExp( tmp) << std::endl; - - if( distance( iterConcat->elements.begin( ), iterConcat->elements.end( ) ) != distance( std::next( itC ), concat->elements.end( ) ) - || ! equal( iterConcat->elements.begin( ), iterConcat->elements.end( ), std::next( itC ), - [ ]( const std::smart_ptr < UnboundedRegExpElement > & a, const std::smart_ptr < UnboundedRegExpElement > & b ) -> bool{ return *a == *b; } ) ) - { + if( dynamic_cast<UnboundedRegExpConcatenation*>( iter->getChild().get() ) ) { + UnboundedRegExpConcatenation * iterConcat = dynamic_cast<UnboundedRegExpConcatenation*>( iter->getChild().get() ); + + if( iterConcat->getChildren().size( ) != ( unsigned ) distance( std::next( itC ), concat->getChildren().end( ) ) + || ! equal( iterConcat->getChildren().begin( ), iterConcat->getChildren().end( ), std::next( itC ), + [ ]( const std::smart_ptr < UnboundedRegExpElement > & a, const std::smart_ptr < UnboundedRegExpElement > & b ) -> bool{ return *a == *b; } ) ) { itC++; continue; } - advance( itStartY, (int)iterConcat->elements.size( ) ); - } - // else - else - { - if( * iter->element != ** std::next( itC ) ) - { - itC ++; + advance( itStartY, (int)iterConcat->getChildren().size( ) ); + } else { + if( * iter->getChild() != ** std::next( itC ) ) { + ++ itC; continue; } @@ -622,86 +605,60 @@ bool RegExpOptimize::V5( UnboundedRegExpAlternation & /* node */ ) { // store everything before iteration as "a" UnboundedRegExpElement * regexpA; - if( concat->elements.begin( ) == itC ) - { + if( concat->getChildren().begin( ) == itC ) { regexpA = new UnboundedRegExpEpsilon( ); - } - else - { - UnboundedRegExpConcatenation * tmpA = new UnboundedRegExpConcatenation( ); - tmpA->elements.insert( tmpA->elements.end( ), concat->elements.begin( ), itC ); - regexpA = optimize( tmpA ); - tmpA->elements.clear( ); - delete tmpA; + } else { + UnboundedRegExpConcatenation tmpA; + tmpA.insert( tmpA.getChildren().end( ), concat->getChildren().begin( ), itC ); + regexpA = optimizeInner( tmpA ); } // store everything behind iteration's followup element as "y" UnboundedRegExpElement * regexpY; - if( itStartY == concat->elements.end( ) ) - { + if( itStartY == concat->getChildren().end( ) ) { regexpY = new UnboundedRegExpEpsilon( ); - } - else - { - UnboundedRegExpConcatenation* tmpY = new UnboundedRegExpConcatenation( ); - tmpY->elements.insert( tmpY->elements.end( ), itStartY, concat->elements.end( ) ); - regexpY = optimize( tmpY ); - tmpY->elements.clear( ); - delete tmpY; + } else { + UnboundedRegExpConcatenation tmpY; + tmpY.insert( tmpY.getChildren().end( ), itStartY, concat->getChildren().end( ) ); + regexpY = optimizeInner( tmpY ); } - // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->elements ) - UnboundedRegExpConcatenation* tmpAY = new UnboundedRegExpConcatenation( ); - tmpAY->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); - tmpAY->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); - UnboundedRegExpElement * regexpAY = optimize( tmpAY ); - regexpA = tmpAY->elements[0].release(); - regexpY = tmpAY->elements[1].release(); - tmpAY->elements.clear( ); - delete tmpAY; - - auto iterAY = find_if( node->elements.begin( ), node->elements.end( ), [ regexpAY ] ( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool{ return *a == *regexpAY; } ); + // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node.getChildren() ) + UnboundedRegExpConcatenation tmpAY; + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); + UnboundedRegExpElement * regexpAY = optimizeInner( tmpAY ); + regexpA = tmpAY.getChildren()[0].release(); + regexpY = tmpAY.getChildren()[1].release(); + + auto iterAY = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ regexpAY ] ( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool{ return *a == *regexpAY; } ); delete regexpAY; - if( iterAY == node->elements.end( ) ) - { - itC ++; + if( iterAY == node.getChildren().end( ) ) { + ++ itC; delete regexpA; delete regexpY; continue; } - // if AY exists, then we can simply do this: - //iterator invalidated, need to backup concat node - UnboundedRegExpElement * tmpItA = itA->get(); - - node->elements.erase( iterAY ); - - // iterator invalidated, need to recall before erase - itA = find_if( node->elements.begin( ), node->elements.end( ), [ tmpItA ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { return *a == *tmpItA; } ); - - UnboundedRegExpConcatenation * tmpAltered = new UnboundedRegExpConcatenation( ); - tmpAltered->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); - tmpAltered->elements.push_back( * itC ); - tmpAltered->elements.push_back( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); - UnboundedRegExpElement * regexpAltered = optimize( tmpAltered ); - - tmpAltered->elements.clear( ); - delete tmpAltered; + UnboundedRegExpConcatenation tmpAltered; + tmpAltered.pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); + tmpAltered.pushBackChild( * itC ); + tmpAltered.pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); + UnboundedRegExpElement * regexpAltered = optimizeInner( tmpAltered ); - itA = node->elements.erase( itA ); + node.setChild( std::smart_ptr < UnboundedRegExpElement > ( regexpAltered ), itA ); - node->elements.insert( itA, std::smart_ptr < UnboundedRegExpElement > ( regexpAltered ) ); + itA = node.getChildren().erase( iterAY ); optimized = true; break; } - itA ++; + ++ itA; } - return optimized; */ - return false; // FIXME + return optimized; } /** @@ -781,7 +738,7 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation & node ) { regexpY = optimizeInner( tmpY ); } - // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->getElements() ) + // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->getChildren() ) UnboundedRegExpConcatenation tmpAY; tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpY ) ); @@ -799,14 +756,6 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation & node ) { continue; } - // if AY exists, then we can simply do this: - // iterator invalidated, need to backup concat node - UnboundedRegExpElement * tmpItA = itA->get(); - node.getChildren().erase( iterAY ); - - // iterator invalidated, need to recall before erase - itA = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ tmpItA ]( const std::smart_ptr < UnboundedRegExpElement > & a ) -> bool { return *a == *tmpItA; } ); - UnboundedRegExpConcatenation tmpAltered; tmpAltered.pushBackChild( std::smart_ptr < UnboundedRegExpElement > ( regexpA ) ); tmpAltered.pushBackChild( * itC ); @@ -814,6 +763,9 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation & node ) { UnboundedRegExpElement * regexpAltered = optimizeInner( tmpAltered ); node.setChild( std::smart_ptr < UnboundedRegExpElement > ( regexpAltered ), itA ); + + itA = node.getChildren().erase( iterAY ); + optimized = true; break; } diff --git a/alib2algo/test-src/regexp/simplify/RegExpOptimizeTest.cpp b/alib2algo/test-src/regexp/simplify/RegExpOptimizeTest.cpp index 53e4772a9fda056973a50180dde35c5a0629c1b5..c5f4a38916e326126b372025706b71c657c17f39 100644 --- a/alib2algo/test-src/regexp/simplify/RegExpOptimizeTest.cpp +++ b/alib2algo/test-src/regexp/simplify/RegExpOptimizeTest.cpp @@ -20,16 +20,47 @@ void RegExpOptimizeTest::tearDown() { } void RegExpOptimizeTest::testOptimize() { + { + std::string input = "a+a"; + regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); + + regexp::UnboundedRegExp res = regexp::simplify::RegExpOptimize::optimize(regexp); + + std::string inputRes = "a"; + regexp::UnboundedRegExp regexpRes( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(inputRes).getData() ) ); + + std::cout << res << std::endl; + std::cout << regexpRes << std::endl; + + CPPUNIT_ASSERT ( regexpRes == res ); + } { std::string input = "(a+a)b + (#0 b + (#0 a + (#0 b + a)))"; regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); regexp::UnboundedRegExp res = regexp::simplify::RegExpOptimize::optimize(regexp); + + std::string inputRes = "a + ab"; + regexp::UnboundedRegExp regexpRes( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(inputRes).getData() ) ); + + std::cout << res << std::endl; + std::cout << regexpRes << std::endl; + + CPPUNIT_ASSERT ( regexpRes == res ); } { - std::string input = "a+a* (b+a)* c"; + std::string input = "a z + a b* b z"; regexp::UnboundedRegExp regexp( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(input).getData() ) ); + regexp::UnboundedRegExp res = regexp::simplify::RegExpOptimize::optimize(regexp); + + std::string inputRes = "a b* z"; + regexp::UnboundedRegExp regexpRes( static_cast<const regexp::UnboundedRegExp &>( alib::StringDataFactory::fromString<regexp::RegExp>(inputRes).getData() ) ); + + std::cout << res << std::endl; + std::cout << regexpRes << std::endl; + + CPPUNIT_ASSERT ( regexpRes == res ); } }