From 977a0f13ea3f242bb663bc5f9b83d08ffb2f5d28 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Wed, 27 Feb 2019 17:15:30 +0100 Subject: [PATCH] reimplement V8 unbounded regexp optimisation pattern --- .../simplify/RegExpOptimizeUnboundedPart.hpp | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp index 5c971f0e8f..d2d3fce9c4 100644 --- a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp +++ b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp @@ -766,16 +766,16 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation < SymbolType > & /* node */ * @return bool true if optimization applied else false */ template < class SymbolType > -bool RegExpOptimize::V8( UnboundedRegExpConcatenation < SymbolType > & /* node */ ) { +bool RegExpOptimize::V8( UnboundedRegExpConcatenation < SymbolType > & node ) { bool optimized = false; // interpretation: if there is iteration in concatenation node, and element of iteration contains eps and is straight before this iteration, then this element can be omitted - /*if ( node.getChildren ( ).size ( ) == 0 ) + if ( node.getChildren ( ).size ( ) == 0 ) return false; - for( auto it = next ( node.getChildren ( ).begin( ) ); it != node.getChildren ( ).end( ); ) { - UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * it ); + for( auto it = node.getChildren ( ).begin( ); it != node.getChildren ( ).end( ); ) { + const UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & * it ); if( ! iter ) { ++ it; @@ -783,7 +783,7 @@ bool RegExpOptimize::V8( UnboundedRegExpConcatenation < SymbolType > & /* node * } // if element of iteration is concatenation, we need to check this specially - UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) ); + const UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < const UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) ); if( concat ) { // check if not out of bounds @@ -800,23 +800,25 @@ bool RegExpOptimize::V8( UnboundedRegExpConcatenation < SymbolType > & /* node * equal ( concat->getChildren ( ).begin( ), concat->getChildren ( ).end( ), it2, [ ] ( const UnboundedRegExpElement < SymbolType > & a, const UnboundedRegExpElement < SymbolType > & b ) -> bool { return a == b; } ) ) { optimized = true; - it = node.getChildren ( ).erase ( it2, it ); + it = node.erase ( it2, it ); } else ++ it; } else { + // check if not at the first node + if ( it == node.getChildren ( ).begin ( ) ) { + it ++; + continue; + } + auto prev = std::prev ( it ); if ( regexp::properties::RegExpEpsilon::languageContainsEpsilon ( iter->getElement ( ) ) && iter->getElement ( ) == * prev ) { - it = node.getChildren ( ).erase ( prev ); + it = node.erase ( prev ); optimized = true; - - // in case xxx*, we need to stay on the iter element, not to go behind it - if( it != node.getChildren().begin( ) ) - it = std::prev( it ); } else ++ it; } - }*/ + } return optimized; } -- GitLab