Skip to content
Snippets Groups Projects
Commit f81e77a2 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

enable A9 and some move semantics improvements

parent 14d6dd76
No related branches found
No related tags found
1 merge request!73Dev jt
......@@ -307,14 +307,14 @@ bool RegExpOptimize::Unbounded < SymbolType >::A8( UnboundedRegExpAlternation <
innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
innerConcat.appendElement ( std::move ( innerAlt ) );
innerConcat.insert ( innerConcat.end ( ), std::move ( innerAlt ) );
res.appendElement ( Unbounded < SymbolType >::visit ( std::move ( innerConcat ), true ) );
node = res;
node = std::move ( res );
return false;
return true;
......@@ -323,48 +323,51 @@ bool RegExpOptimize::Unbounded < SymbolType >::A8( UnboundedRegExpAlternation <
* @return bool true if optimization applied else false
template < class SymbolType >
bool RegExpOptimize::Unbounded < SymbolType >::A9( UnboundedRegExpAlternation < SymbolType > & /* node */) {
bool optimized = false;
bool RegExpOptimize::Unbounded < SymbolType >::A9( UnboundedRegExpAlternation < SymbolType > & node ) {
std::map < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > >, ext::vector < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > > > data;
for( auto it = node->elements.begin( ); it != std::prev( node->elements.end( ) ); )
UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast<UnboundedRegExpAlternation < SymbolType >*>( * it );
if( ! alt )
it ++;
for ( UnboundedRegExpElement < SymbolType > & element : node ) {
UnboundedRegExpConcatenation < SymbolType > * childConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & element );
if ( childConcat ) {
data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( childConcat->getChild ( childConcat->getElements ( ).size ( ) - 1 ) ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) );
} else {
data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) );
// take everything to the right and copy it as suffix of every element in alternation.
UnboundedRegExpConcatenation < SymbolType > rest;
rest.elements.insert( rest.elements.end( ), std::next( it ), node->elements.end( ) );
for( auto altIt = alt->elements.begin( ); altIt != alt->elements.end( ); altIt ++ )
UnboundedRegExpConcatenation < SymbolType > * altElem = new UnboundedRegExpConcatenation < SymbolType >( );
altElem->elements.push_back( * altIt );
altElem->elements.push_back( rest );
if ( data.size ( ) == node.getChildren ( ).size ( ) )
return false;
* altIt = altElem;
UnboundedRegExpAlternation < SymbolType > res;
for ( std::pair < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > >, ext::vector < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > > > && entry : ext::make_mover ( data ) ) {
if ( entry.second.size ( ) == 1 ) {
res.appendElement ( std::move ( entry.second.front ( ).get ( ) ) );
} else {
UnboundedRegExpConcatenation < SymbolType > innerConcat;
innerConcat.appendElement ( std::move ( entry.first.get ( ) ) );
UnboundedRegExpAlternation < SymbolType > innerAlt;
for ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > & innerEntry : entry.second ) {
UnboundedRegExpElement < SymbolType > & innerEntryElement = innerEntry.get ( );
UnboundedRegExpConcatenation < SymbolType > * innerEntryConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & innerEntryElement );
if ( innerEntryConcat ) {
if ( innerEntryConcat->getElements ( ).size ( ) == 1 ) {
innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
} else {
innerEntryConcat->erase ( innerEntryConcat->rbegin ( ) );
innerAlt.appendElement ( std::move ( * innerEntryConcat ) );
} else {
innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
innerConcat.insert ( innerConcat.begin ( ), std::move ( innerAlt ) );
res.appendElement ( Unbounded < SymbolType >::visit ( std::move ( innerConcat ), true ) );
UnboundedRegExpElement < SymbolType > * optIt = optimize( * it );
delete *it;
*it = optIt;
it = node->elements.erase( std::next( it ), node->elements.end( ) );
optimized = true;
// as we move (delete) the rest of this expression, it surely wont do another round. More optimizations to be performerd are in subtree now.
// we do not care about this here as method optimize(UnboundedRegExpAlternation < SymbolType >) will take care of this in next iteration
// it ++;
return optimized;
return false; //TODO
node = std::move ( res );
return true;
......@@ -531,7 +534,7 @@ bool RegExpOptimize::Unbounded < SymbolType >::V4( UnboundedRegExpIteration < Sy
UnboundedRegExpAlternation < SymbolType > newAlt;
for ( UnboundedRegExpElement < SymbolType > && n : ext::make_mover ( std::move ( * cont ).getChildren ( ) ) )
for ( UnboundedRegExpElement < SymbolType > & n : * cont )
newAlt.pushBackChild ( std::move ( static_cast < UnboundedRegExpIteration < SymbolType > & > ( n ).getChild ( ) ) );
node.setChild ( Unbounded < SymbolType >::visit ( std::move ( newAlt ), true ) );
......@@ -546,95 +549,9 @@ bool RegExpOptimize::Unbounded < SymbolType >::V4( UnboundedRegExpIteration < Sy
template < class SymbolType >
bool RegExpOptimize::Unbounded < SymbolType >::V5( UnboundedRegExpAlternation < SymbolType > & /* node */ ) {
bool optimized = false;
// reinterpretation: ax*y = ay+ax*xy
// so, if we find iter,
// a = everything that is before it (prefix)
// x = iter's content behind iter must be exactly iter's content
// y = rest (suffix)
// prefix.x*x.suffix + prefix.suffix = prefix.x*.suffix
/* for( auto itA = node.getChildren().begin( ); itA != node.getChildren().end( ); ) {
UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & * itA );
if( ! concat ) {
++ itA;
// implemented by combination of A9 and A10
for( auto itC = concat->getChildren().begin( ); itC != std::prev( concat->getChildren().end( ) ); ) {
UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * itC );
if( ! iter ) {
++ itC;
// iteration's element must follow the iteration (x*x)
auto itStartY = std::next( itC ); //itStartY points to y in expression x*xy
// if iter's element is concat
if( dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) ) ) {
UnboundedRegExpConcatenation < SymbolType > * iterConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) );
if( iterConcat->getChildren().size( ) != ( size_t ) distance ( std::next ( itC ), concat->getChildren ( ).end ( ) )
|| ! equal ( iterConcat->getChildren ( ).begin ( ), iterConcat->getChildren ( ).end ( ), std::next ( itC ),
[ ] ( const UnboundedRegExpElement < SymbolType > & a, const UnboundedRegExpElement < SymbolType > & b ) -> bool { return a == b; } ) ) {
++ itC;
std::advance( itStartY, iterConcat->getChildren().size( ) );
} else {
if( iter->getChild() != * std::next( itC ) ) {
++ itC;
std::advance( itStartY, 1 );
// store everything before iteration as "a"
UnboundedRegExpConcatenation < SymbolType > tmpAY;
if( concat->getChildren().begin( ) == itC ) {
tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) );
} else {
UnboundedRegExpConcatenation < SymbolType > tmpA;
tmpA.insert( tmpA.getChildren().end( ), concat->getChildren().begin( ), itC );
tmpAY.pushBackChild ( optimizeInner( tmpA ) );
// store everything behind iteration's followup element as "y"
if( itStartY == concat->getChildren().end( ) ) {
tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) );
} else {
UnboundedRegExpConcatenation < SymbolType > tmpY;
tmpY.insert( tmpY.getChildren().end( ), itStartY, concat->getChildren().end( ) );
tmpAY.pushBackChild ( optimizeInner ( tmpY ) );
// concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node.getChildren() )
ext::ptr_value < UnboundedRegExpElement < SymbolType > > regexpAY = optimizeInner( tmpAY );
auto iterAY = find_if ( node.getChildren().begin( ), node.getChildren().end( ), [ & ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return a == regexpAY.get ( ); } );
if( iterAY == node.getChildren().end( ) ) {
++ itC;
tmpAY.insert ( tmpAY.getChildren ( ).begin ( ) + 1, * itC );
node.setChild ( optimizeInner( tmpAY ), itA );
itA = node.getChildren().erase( iterAY );
optimized = true;
++ itA;
return optimized;
return false;
......@@ -644,100 +561,9 @@ bool RegExpOptimize::Unbounded < SymbolType >::V5( UnboundedRegExpAlternation <
template < class SymbolType >
bool RegExpOptimize::Unbounded < SymbolType >::V6( UnboundedRegExpAlternation < SymbolType > & /* node */ ) {
bool optimized = false;
// implemented by combination of A9 and A10R
// reinterpretation: ax*y = ay+axx*y
// so, if we find iter
// a = everything that is before it (prefix)
// x = iter's content before iter must be exactly iter's content
// y = rest (suffix)
// prefix.xx*.suffix + prefix.suffix = prefix.x*.suffix
/* for( auto itA = node.getChildren ( ).begin( ); itA != node.getChildren ( ).end( ); ) {
UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & * itA );
if( ! concat ) {
++ itA;
for( auto itC = std::next( concat->getChildren ( ).begin( ) ); itC != concat->getChildren ( ).end( ); ) {
UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * itC );
if( ! iter ) {
++ itC;
// iteration's element must preceed the iteration (xx*)
auto itStartX = itC; //itStartX points to first x in expression xx*, everything before is therefore prefix - regexp "a"
// if iter's element is concat
UnboundedRegExpConcatenation < SymbolType > * iterConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) );
if( iterConcat ) {
if( distance( concat->getChildren ( ).begin( ), itC ) < (int) iterConcat->getChildren ( ).size( ) ) {
++ itC;
ext::retract ( itStartX, iterConcat->getChildren().size( ) );
if( iterConcat->getChildren ( ).size( ) != ( size_t ) distance( itStartX, concat->getChildren ( ).end( ) )
|| ! equal ( iterConcat->getChildren ( ).begin ( ), iterConcat->getChildren ( ).end ( ), itStartX,
[ ] ( const UnboundedRegExpElement < SymbolType > & a, const UnboundedRegExpElement < SymbolType > & b ) -> bool { return a == b; } ) ) {
++ itC;
} else {
if( iter->getChild ( ) != * std::prev( itC ) ) {
++ itC;
std::advance( itStartX, -1 );
// concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->getChildren() )
UnboundedRegExpConcatenation < SymbolType > tmpAY;
if( concat->getChildren ( ).begin ( ) == itStartX ) {
tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) );
} else {
UnboundedRegExpConcatenation < SymbolType > tmpA;
tmpA.insert ( tmpA.getChildren ( ).end ( ), concat->getChildren().begin ( ), itStartX );
tmpAY.pushBackChild ( optimizeInner ( tmpA ) );
if( std::next ( itC ) == concat->getChildren().end( ) ) {
tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType >( ) );
} else {
UnboundedRegExpConcatenation < SymbolType > tmpY;
tmpY.insert ( tmpY.getChildren().end( ), std::next( itC ), concat->getChildren ( ).end( ) );
tmpAY.pushBackChild ( optimizeInner( tmpY ) );
ext::ptr_value < UnboundedRegExpElement < SymbolType > > regexpAY = optimizeInner ( tmpAY );
auto iterAY = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ & ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return a == regexpAY.get ( ); } );
if( iterAY == node.getChildren().end( ) ) {
++ itC;
// if so make a x* y and replace a x x* y
tmpAY.insert( tmpAY.getChildren ( ).begin ( ) + 1, * itC );
node.setChild ( optimizeInner( tmpAY ), itA );
// remove a y
itA = node.getChildren().erase( iterAY );
optimized = true;
++ itA;
} */
return optimized;
return false;
......@@ -892,7 +718,7 @@ bool RegExpOptimize::Unbounded < SymbolType >::V9( UnboundedRegExpConcatenation
it = node.insert ( it, * std::next ( it ) );
it = node.insert ( it, std::move ( * std::next ( it ) ) );
it = std::next ( it );
it = node.erase ( std::next ( it ) );
it = std::prev ( it );
......@@ -916,18 +742,18 @@ bool RegExpOptimize::Unbounded < SymbolType >::V9( UnboundedRegExpConcatenation
// copy the range <it;sth>, delete it and go back to the iter node
ext::ptr_vector < UnboundedRegExpElement < SymbolType > > copyRange;
copyRange.insert ( copyRange.end(), std::next( it ), c1Iter );
copyRange.insert ( copyRange.end(), std::make_move_iterator ( std::next( it ) ), std::make_move_iterator ( c1Iter ) );
it = node.erase ( std::next ( it ), c1Iter );
it = std::prev( it );
// insert that range before it position
it = node.insert( it, copyRange.begin( ), copyRange.end( ) );
it = node.insert( it, std::make_move_iterator ( copyRange.begin( ) ), std::make_move_iterator ( copyRange.end( ) ) );
// alter the iteration's concat node
copyRange.clear( );
copyRange.insert ( copyRange.end(), concat->begin( ), c2Iter );
copyRange.insert ( copyRange.end(), std::make_move_iterator ( concat->begin( ) ), std::make_move_iterator ( c2Iter ) );
concat->erase ( concat->begin( ), c2Iter );
concat->insert ( concat->end(), copyRange.begin( ), copyRange.end( ) );
concat->insert ( concat->end(), std::make_move_iterator ( copyRange.begin( ) ), std::make_move_iterator ( copyRange.end( ) ) );
......@@ -1514,6 +1514,26 @@ public:
return m_children.begin ( );
* \brief
* Getter of an iterator to the begining of children vector
* \return begin iterator
typename ext::ptr_vector < Data >::reverse_iterator rbegin ( ) {
return m_children.rbegin ( );
* \brief
* Getter of an iterator to the begining of children vector
* \return begin iterator
typename ext::ptr_vector < Data >::const_reverse_iterator rbegin ( ) const {
return m_children.rbegin ( );
* \brief
* Getter of an iterator to the end of children vector
......@@ -1534,6 +1554,26 @@ public:
return m_children.end ( );
* \brief
* Getter of an iterator to the end of children vector
* \return end iterator
typename ext::ptr_vector < Data >::reverse_iterator rend ( ) {
return m_children.rend ( );
* \brief
* Getter of an iterator to the end of children vector
* \return end iterator
typename ext::ptr_vector < Data >::const_reverse_iterator rend ( ) const {
return m_children.rend ( );
} /* namespace ext */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment