From 435a16d313ace9a311d9ef537cb6a581b3a02dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Mon, 5 May 2014 12:50:20 +0200 Subject: [PATCH] libaregexptree: optim:Concat V9 --- libaregexptree/makefile | 2 +- libaregexptree/src/RegExpOptimize.cpp | 71 ++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/libaregexptree/makefile b/libaregexptree/makefile index 59349150b1..617d601ca0 100644 --- a/libaregexptree/makefile +++ b/libaregexptree/makefile @@ -1,6 +1,6 @@ CC=g++ LIBRARY=libaregexptree.so -CCFLAGS= -std=c++11 -O2 -c -Wall -fPIC -I../alib/src +CCFLAGS= -std=c++11 -O2 -c -g -Wall -fPIC -I../alib/src LDFLAGS= -L../alib/lib -lalib -shared SOURCES=$(shell find src/ -name *cpp) diff --git a/libaregexptree/src/RegExpOptimize.cpp b/libaregexptree/src/RegExpOptimize.cpp index 4ff1638418..300f6ef02f 100644 --- a/libaregexptree/src/RegExpOptimize.cpp +++ b/libaregexptree/src/RegExpOptimize.cpp @@ -71,7 +71,11 @@ RegExpElement * RegExpOptimize::optimize( Alternation const * const & node ) return ret; } - assert( alt->getElements( ).size( ) > 0 ); + // assert( alt->getElements( ).size( ) > 0 ); + if( alt->getElements( ).size( ) <= 0 ) + { + std::cout << RegExp( node ) << std::endl; + } return alt; } @@ -82,7 +86,16 @@ RegExpElement * RegExpOptimize::optimize( Concatenation const * const & node ) for( const auto & child : node->getElements( ) ) concat->getElements( ).push_back( optimize( child ) ); - while( A5( concat ) || A6( concat ) || A7( concat ) || A8( concat ) || A9( concat ) || V8( concat ) || V9( concat ) ); + do + { + // A7 is implemented here ~ if not here, it went into infinite loop FIXME + if( std::any_of( concat->getElements( ).begin( ), concat->getElements( ).end( ), []( RegExpElement const * const & a ) -> bool{ return dynamic_cast<RegExpEmpty const *>( a ); } ) ) + { + delete concat; + return new RegExpEmpty( ); + } + } + while( A5( concat ) || A6( concat ) /*|| A7( concat ) */ || A8( concat ) || A9( concat ) || V8( concat ) || V9( concat ) ); if( concat->getElements( ).size( ) == 1 ) { @@ -319,9 +332,7 @@ bool RegExpOptimize::A7( Concatenation * const & node ) // FIXME: alib2 uses shared_ptrs, rewrite this using remove_if then - if( std::any_of( node->getElements( ).begin( ), node->getElements( ).end( ), []( RegExpElement const * const & a ) -> bool{ - return dynamic_cast<RegExpEmpty const *>( a ); - })) + if( std::any_of( node->getElements( ).begin( ), node->getElements( ).end( ), []( RegExpElement const * const & a ) -> bool{ return dynamic_cast<RegExpEmpty const *>( a ); } ) ) { for( auto const& child : node->getElements( ) ) delete child; @@ -688,14 +699,62 @@ bool RegExpOptimize::V8( Concatenation * const & node ) * @param node Concatenation node * @return bool true if optimization applied else false */ - bool RegExpOptimize::V9( Concatenation * const & node ) { bool optimized = false; + // interpretation: if concat (C1) with iter && iteration's element is concat (C2), then: + // simultaneously iterate through C1 and C2. (axy)*axz=ax(yax)*z -> get ax that is same and relocate them... + for( auto it = node->getElements( ).begin( ) ; it != node->getElements( ).end( ) ; ) + { + Iteration * iter = dynamic_cast<Iteration*>( * it ); + if ( ! iter ) + { + it++; + continue; + } + Concatenation * concat = dynamic_cast<Concatenation*>( iter->getElement ( ) ); + if( ! concat ) + { + it++; + continue; + } + // find range from <it+1;sth> and <concat.begin;sth> that is equal + auto c1Iter = std::next( it ), c2Iter = concat->getElements( ).begin( ); + while( c1Iter != node->getElements( ).end() && c2Iter != concat->getElements( ).end( ) && **c1Iter == ** c2Iter ) + { + c1Iter ++; + c2Iter ++; + } + if( c1Iter == std::next( it ) ) + { + it ++; + continue; + } + + // std::cout << "xy" << std::endl; + // Concatenation* tmp = new Concatenation( ); + // tmp->getElements( ).insert( tmp->getElements( ).end( ), std::next( it ), c1Iter ); + // std::cout << RegExp( tmp ) << std::endl; + + // copy the range <it;sth>, delete it and go back to the iter node + list<RegExpElement*> copyRange; + copyRange.insert( copyRange.end(), std::next( it ), c1Iter ); + it = node->getElements( ).erase( std::next( it ), c1Iter ); + it = std::prev( it ); + + // insert that range before it position + it = node->getElements( ).insert( it, copyRange.begin( ), copyRange.end( ) ); + + // alter the iteration's concat node + copyRange.clear( ); + copyRange.insert( copyRange.end(), concat->getElements( ).begin( ), c2Iter ); + concat->getElements( ).erase( concat->getElements( ).begin( ), c2Iter ); + concat->getElements( ).insert( concat->getElements( ).end(), copyRange.begin( ), copyRange.end( ) ); + } return optimized; } -- GitLab