From f81e77a2749a915ed8dec58c4da01a237d8967da Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Tue, 19 Mar 2019 09:32:24 +0100 Subject: [PATCH] enable A9 and some move semantics improvements --- .../simplify/RegExpOptimizeUnboundedPart.hpp | 276 ++++-------------- .../src/extensions/container/tree_base.hpp | 40 +++ 2 files changed, 91 insertions(+), 225 deletions(-) diff --git a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp index 95d2501e16..00fb3cef1a 100644 --- a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp +++ b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp @@ -307,14 +307,14 @@ bool RegExpOptimize::Unbounded < SymbolType >::A8( UnboundedRegExpAlternation < innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) ); } } - innerConcat.appendElement ( std::move ( innerAlt ) ); + innerConcat.insert ( innerConcat.end ( ), std::move ( innerAlt ) ); res.appendElement ( Unbounded < SymbolType >::visit ( std::move ( innerConcat ), true ) ); } } - node = res; + node = std::move ( res ); - return false; + return true; } /** @@ -323,48 +323,51 @@ bool RegExpOptimize::Unbounded < SymbolType >::A8( UnboundedRegExpAlternation < * @return bool true if optimization applied else false */ template < class SymbolType > -bool RegExpOptimize::Unbounded < SymbolType >::A9( UnboundedRegExpAlternation < SymbolType > & /* node */) { -/* - bool optimized = false; +bool RegExpOptimize::Unbounded < SymbolType >::A9( UnboundedRegExpAlternation < SymbolType > & node ) { + std::map < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > >, ext::vector < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > > > data; - for( auto it = node->elements.begin( ); it != std::prev( node->elements.end( ) ); ) - { - UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast<UnboundedRegExpAlternation < SymbolType >*>( * it ); - if( ! alt ) - { - it ++; - continue; + for ( UnboundedRegExpElement < SymbolType > & element : node ) { + UnboundedRegExpConcatenation < SymbolType > * childConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & element ); + if ( childConcat ) { + data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( childConcat->getChild ( childConcat->getElements ( ).size ( ) - 1 ) ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) ); + } else { + data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) ); } + } - // take everything to the right and copy it as suffix of every element in alternation. - UnboundedRegExpConcatenation < SymbolType > rest; - rest.elements.insert( rest.elements.end( ), std::next( it ), node->elements.end( ) ); - - for( auto altIt = alt->elements.begin( ); altIt != alt->elements.end( ); altIt ++ ) - { - UnboundedRegExpConcatenation < SymbolType > * altElem = new UnboundedRegExpConcatenation < SymbolType >( ); - altElem->elements.push_back( * altIt ); - altElem->elements.push_back( rest ); + if ( data.size ( ) == node.getChildren ( ).size ( ) ) + return false; - * altIt = altElem; + UnboundedRegExpAlternation < SymbolType > res; + for ( std::pair < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > >, ext::vector < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > > > && entry : ext::make_mover ( data ) ) { + if ( entry.second.size ( ) == 1 ) { + res.appendElement ( std::move ( entry.second.front ( ).get ( ) ) ); + } else { + UnboundedRegExpConcatenation < SymbolType > innerConcat; + innerConcat.appendElement ( std::move ( entry.first.get ( ) ) ); + UnboundedRegExpAlternation < SymbolType > innerAlt; + for ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > & innerEntry : entry.second ) { + UnboundedRegExpElement < SymbolType > & innerEntryElement = innerEntry.get ( ); + UnboundedRegExpConcatenation < SymbolType > * innerEntryConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & innerEntryElement ); + if ( innerEntryConcat ) { + if ( innerEntryConcat->getElements ( ).size ( ) == 1 ) { + innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) ); + } else { + innerEntryConcat->erase ( innerEntryConcat->rbegin ( ) ); + innerAlt.appendElement ( std::move ( * innerEntryConcat ) ); + } + } else { + innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) ); + } + } + innerConcat.insert ( innerConcat.begin ( ), std::move ( innerAlt ) ); + res.appendElement ( Unbounded < SymbolType >::visit ( std::move ( innerConcat ), true ) ); } - - UnboundedRegExpElement < SymbolType > * optIt = optimize( * it ); - delete *it; - *it = optIt; - - it = node->elements.erase( std::next( it ), node->elements.end( ) ); - optimized = true; - - // as we move (delete) the rest of this expression, it surely wont do another round. More optimizations to be performerd are in subtree now. - // we do not care about this here as method optimize(UnboundedRegExpAlternation < SymbolType >) will take care of this in next iteration - // it ++; - break; } - return optimized; -*/ - return false; //TODO + node = std::move ( res ); + + return true; } /** @@ -531,7 +534,7 @@ bool RegExpOptimize::Unbounded < SymbolType >::V4( UnboundedRegExpIteration < Sy UnboundedRegExpAlternation < SymbolType > newAlt; - for ( UnboundedRegExpElement < SymbolType > && n : ext::make_mover ( std::move ( * cont ).getChildren ( ) ) ) + for ( UnboundedRegExpElement < SymbolType > & n : * cont ) newAlt.pushBackChild ( std::move ( static_cast < UnboundedRegExpIteration < SymbolType > & > ( n ).getChild ( ) ) ); node.setChild ( Unbounded < SymbolType >::visit ( std::move ( newAlt ), true ) ); @@ -546,95 +549,9 @@ bool RegExpOptimize::Unbounded < SymbolType >::V4( UnboundedRegExpIteration < Sy */ template < class SymbolType > bool RegExpOptimize::Unbounded < SymbolType >::V5( UnboundedRegExpAlternation < SymbolType > & /* node */ ) { - bool optimized = false; - - // reinterpretation: ax*y = ay+ax*xy - // so, if we find iter, - // a = everything that is before it (prefix) - // x = iter's content behind iter must be exactly iter's content - // y = rest (suffix) - // prefix.x*x.suffix + prefix.suffix = prefix.x*.suffix - - /* for( auto itA = node.getChildren().begin( ); itA != node.getChildren().end( ); ) { - UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & * itA ); - if( ! concat ) { - ++ itA; - continue; - } + // implemented by combination of A9 and A10 - for( auto itC = concat->getChildren().begin( ); itC != std::prev( concat->getChildren().end( ) ); ) { - UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * itC ); - if( ! iter ) { - ++ itC; - continue; - } - - // iteration's element must follow the iteration (x*x) - auto itStartY = std::next( itC ); //itStartY points to y in expression x*xy - - // if iter's element is concat - if( dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) ) ) { - UnboundedRegExpConcatenation < SymbolType > * iterConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) ); - - if( iterConcat->getChildren().size( ) != ( size_t ) distance ( std::next ( itC ), concat->getChildren ( ).end ( ) ) - || ! equal ( iterConcat->getChildren ( ).begin ( ), iterConcat->getChildren ( ).end ( ), std::next ( itC ), - [ ] ( const UnboundedRegExpElement < SymbolType > & a, const UnboundedRegExpElement < SymbolType > & b ) -> bool { return a == b; } ) ) { - ++ itC; - continue; - } - std::advance( itStartY, iterConcat->getChildren().size( ) ); - } else { - if( iter->getChild() != * std::next( itC ) ) { - ++ itC; - continue; - } - - std::advance( itStartY, 1 ); - } - - // store everything before iteration as "a" - UnboundedRegExpConcatenation < SymbolType > tmpAY; - if( concat->getChildren().begin( ) == itC ) { - tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) ); - } else { - UnboundedRegExpConcatenation < SymbolType > tmpA; - tmpA.insert( tmpA.getChildren().end( ), concat->getChildren().begin( ), itC ); - tmpAY.pushBackChild ( optimizeInner( tmpA ) ); - } - - // store everything behind iteration's followup element as "y" - if( itStartY == concat->getChildren().end( ) ) { - tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) ); - } else { - UnboundedRegExpConcatenation < SymbolType > tmpY; - tmpY.insert( tmpY.getChildren().end( ), itStartY, concat->getChildren().end( ) ); - tmpAY.pushBackChild ( optimizeInner ( tmpY ) ); - } - - // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node.getChildren() ) - ext::ptr_value < UnboundedRegExpElement < SymbolType > > regexpAY = optimizeInner( tmpAY ); - - auto iterAY = find_if ( node.getChildren().begin( ), node.getChildren().end( ), [ & ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return a == regexpAY.get ( ); } ); - if( iterAY == node.getChildren().end( ) ) { - ++ itC; - - continue; - } - - tmpAY.insert ( tmpAY.getChildren ( ).begin ( ) + 1, * itC ); - - node.setChild ( optimizeInner( tmpAY ), itA ); - - itA = node.getChildren().erase( iterAY ); - - optimized = true; - break; - } - - ++ itA; - }*/ - - return optimized; + return false; } /** @@ -644,100 +561,9 @@ bool RegExpOptimize::Unbounded < SymbolType >::V5( UnboundedRegExpAlternation < */ template < class SymbolType > bool RegExpOptimize::Unbounded < SymbolType >::V6( UnboundedRegExpAlternation < SymbolType > & /* node */ ) { - bool optimized = false; + // implemented by combination of A9 and A10R - // reinterpretation: ax*y = ay+axx*y - // so, if we find iter - // a = everything that is before it (prefix) - // x = iter's content before iter must be exactly iter's content - // y = rest (suffix) - // prefix.xx*.suffix + prefix.suffix = prefix.x*.suffix - - /* for( auto itA = node.getChildren ( ).begin( ); itA != node.getChildren ( ).end( ); ) { - UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & * itA ); - if( ! concat ) { - ++ itA; - continue; - } - - for( auto itC = std::next( concat->getChildren ( ).begin( ) ); itC != concat->getChildren ( ).end( ); ) { - UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * itC ); - if( ! iter ) { - ++ itC; - continue; - } - - // iteration's element must preceed the iteration (xx*) - auto itStartX = itC; //itStartX points to first x in expression xx*, everything before is therefore prefix - regexp "a" - - // if iter's element is concat - UnboundedRegExpConcatenation < SymbolType > * iterConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) ); - if( iterConcat ) { - - if( distance( concat->getChildren ( ).begin( ), itC ) < (int) iterConcat->getChildren ( ).size( ) ) { - ++ itC; - continue; - } - ext::retract ( itStartX, iterConcat->getChildren().size( ) ); - - if( iterConcat->getChildren ( ).size( ) != ( size_t ) distance( itStartX, concat->getChildren ( ).end( ) ) - || ! equal ( iterConcat->getChildren ( ).begin ( ), iterConcat->getChildren ( ).end ( ), itStartX, - [ ] ( const UnboundedRegExpElement < SymbolType > & a, const UnboundedRegExpElement < SymbolType > & b ) -> bool { return a == b; } ) ) { - ++ itC; - continue; - } - } else { - if( iter->getChild ( ) != * std::prev( itC ) ) { - ++ itC; - continue; - } - - std::advance( itStartX, -1 ); - } - - // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->getChildren() ) - UnboundedRegExpConcatenation < SymbolType > tmpAY; - if( concat->getChildren ( ).begin ( ) == itStartX ) { - tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) ); - } else { - UnboundedRegExpConcatenation < SymbolType > tmpA; - tmpA.insert ( tmpA.getChildren ( ).end ( ), concat->getChildren().begin ( ), itStartX ); - tmpAY.pushBackChild ( optimizeInner ( tmpA ) ); - } - - if( std::next ( itC ) == concat->getChildren().end( ) ) { - tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType >( ) ); - } else { - UnboundedRegExpConcatenation < SymbolType > tmpY; - tmpY.insert ( tmpY.getChildren().end( ), std::next( itC ), concat->getChildren ( ).end( ) ); - tmpAY.pushBackChild ( optimizeInner( tmpY ) ); - } - - ext::ptr_value < UnboundedRegExpElement < SymbolType > > regexpAY = optimizeInner ( tmpAY ); - - auto iterAY = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ & ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return a == regexpAY.get ( ); } ); - if( iterAY == node.getChildren().end( ) ) { - ++ itC; - - continue; - } - - // if so make a x* y and replace a x x* y - tmpAY.insert( tmpAY.getChildren ( ).begin ( ) + 1, * itC ); - - node.setChild ( optimizeInner( tmpAY ), itA ); - - // remove a y - itA = node.getChildren().erase( iterAY ); - - optimized = true; - break; - } - - ++ itA; - } */ - - return optimized; + return false; } /** @@ -892,7 +718,7 @@ bool RegExpOptimize::Unbounded < SymbolType >::V9( UnboundedRegExpConcatenation continue; } - it = node.insert ( it, * std::next ( it ) ); + it = node.insert ( it, std::move ( * std::next ( it ) ) ); it = std::next ( it ); it = node.erase ( std::next ( it ) ); it = std::prev ( it ); @@ -916,18 +742,18 @@ bool RegExpOptimize::Unbounded < SymbolType >::V9( UnboundedRegExpConcatenation // copy the range <it;sth>, delete it and go back to the iter node ext::ptr_vector < UnboundedRegExpElement < SymbolType > > copyRange; - copyRange.insert ( copyRange.end(), std::next( it ), c1Iter ); + copyRange.insert ( copyRange.end(), std::make_move_iterator ( std::next( it ) ), std::make_move_iterator ( c1Iter ) ); it = node.erase ( std::next ( it ), c1Iter ); it = std::prev( it ); // insert that range before it position - it = node.insert( it, copyRange.begin( ), copyRange.end( ) ); + it = node.insert( it, std::make_move_iterator ( copyRange.begin( ) ), std::make_move_iterator ( copyRange.end( ) ) ); // alter the iteration's concat node copyRange.clear( ); - copyRange.insert ( copyRange.end(), concat->begin( ), c2Iter ); + copyRange.insert ( copyRange.end(), std::make_move_iterator ( concat->begin( ) ), std::make_move_iterator ( c2Iter ) ); concat->erase ( concat->begin( ), c2Iter ); - concat->insert ( concat->end(), copyRange.begin( ), copyRange.end( ) ); + concat->insert ( concat->end(), std::make_move_iterator ( copyRange.begin( ) ), std::make_move_iterator ( copyRange.end( ) ) ); } } diff --git a/alib2std/src/extensions/container/tree_base.hpp b/alib2std/src/extensions/container/tree_base.hpp index 7f648c3c93..786ffdcc83 100644 --- a/alib2std/src/extensions/container/tree_base.hpp +++ b/alib2std/src/extensions/container/tree_base.hpp @@ -1514,6 +1514,26 @@ public: return m_children.begin ( ); } + /** + * \brief + * Getter of an iterator to the begining of children vector + * + * \return begin iterator + */ + typename ext::ptr_vector < Data >::reverse_iterator rbegin ( ) { + return m_children.rbegin ( ); + } + + /** + * \brief + * Getter of an iterator to the begining of children vector + * + * \return begin iterator + */ + typename ext::ptr_vector < Data >::const_reverse_iterator rbegin ( ) const { + return m_children.rbegin ( ); + } + /** * \brief * Getter of an iterator to the end of children vector @@ -1534,6 +1554,26 @@ public: return m_children.end ( ); } + /** + * \brief + * Getter of an iterator to the end of children vector + * + * \return end iterator + */ + typename ext::ptr_vector < Data >::reverse_iterator rend ( ) { + return m_children.rend ( ); + } + + /** + * \brief + * Getter of an iterator to the end of children vector + * + * \return end iterator + */ + typename ext::ptr_vector < Data >::const_reverse_iterator rend ( ) const { + return m_children.rend ( ); + } + }; } /* namespace ext */ -- GitLab