Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
RegExpOptimizeUnboundedPart.hpp 33.75 KiB
/*
 * RegExpOptimize.cpp
 *
 *  Created on: 20. 1. 2014
 *	  Author: Tomas Pecka
 */

#include <iostream>

namespace regexp {

namespace simplify {

template < class SymbolType >
void RegExpOptimize::optimize( UnboundedRegExpAlternation < SymbolType > & alt ) {
	for( unsigned i = 0; i < alt.getChildren ( ).size ( ); i++ )
		alt.setChild ( optimizeInner ( alt.getChildren ( ) [ i ] ), i );

	while ( A1( alt ) || A2( alt ) || A3( alt ) || A4( alt ) || A10( alt ) || V2( alt ) || V5( alt ) || V6( alt ) || X1( alt ) );
}

template < class SymbolType >
void RegExpOptimize::optimize( UnboundedRegExpConcatenation < SymbolType > & concat ) {
	for( unsigned i = 0; i < concat.getChildren ( ).size ( ); i++ )
		concat.setChild ( optimizeInner ( concat.getChildren ( ) [ i ] ), i );

	while ( A5( concat ) || A6( concat ) || A7( concat ) || A8( concat ) || A9( concat ) || V8( concat ) || V9( concat ) );
}

template < class SymbolType >
void RegExpOptimize::optimize( UnboundedRegExpIteration < SymbolType > & iter ) {
	iter.setChild ( optimizeInner ( iter.getChild ( ) ) );

	while ( A11( iter ) || V1( iter ) || V3( iter ) || V4( iter ) || V10( iter ) );
}

template < class SymbolType >
ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const UnboundedRegExpElement < SymbolType > & node ) {
	const UnboundedRegExpAlternation < SymbolType > * alternation = dynamic_cast<const UnboundedRegExpAlternation < SymbolType >*>( & node );
	if( alternation )
		return optimizeInner( * alternation );

	const UnboundedRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<const UnboundedRegExpConcatenation < SymbolType >*>( & node );
	if( concatenation )
		return optimizeInner( * concatenation );

	const UnboundedRegExpIteration < SymbolType > * iteration = dynamic_cast<const UnboundedRegExpIteration < SymbolType >*>( & node );
	if( iteration )
		return optimizeInner( * iteration );

	const UnboundedRegExpSymbol < SymbolType > * symbol = dynamic_cast<const UnboundedRegExpSymbol < SymbolType >*>( & node );
	if( symbol )
		return optimizeInner( * symbol );

	const UnboundedRegExpEmpty < SymbolType > * empty = dynamic_cast<const UnboundedRegExpEmpty < SymbolType >*>( & node );
	if( empty )
		return optimizeInner( * empty );

	const UnboundedRegExpEpsilon < SymbolType > * eps = dynamic_cast<const UnboundedRegExpEpsilon < SymbolType >*>( & node );
	if( eps )
		return optimizeInner( * eps );

	throw exception::CommonException( "RegExpOptimize::optimize - unknown UnboundedRegExpElement < SymbolType > node" );
}

template < class SymbolType >
ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const UnboundedRegExpAlternation < SymbolType > & node ) {
	UnboundedRegExpAlternation < SymbolType > alt;

	for( const UnboundedRegExpElement < SymbolType > & child : node.getElements ( ) )
		alt.pushBackChild ( optimizeInner( child ) );

	// optimize while you can
	while( A1 ( alt ) || A2 ( alt ) || A3 ( alt ) || A4 ( alt ) || A10 ( alt ) || V2 ( alt ) || V5 ( alt ) || V6 ( alt ) || X1 ( alt ) );

	if( alt.getElements ( ).size( ) == 1 )
		return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( std::move ( alt.getChildren ( ).front ( ) ) );

	if( alt.getElements ( ).size( ) == 0 )
		return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( UnboundedRegExpEmpty < SymbolType > ( ) );

	return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( std::move ( alt ) );
}

template < class SymbolType >
ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const UnboundedRegExpConcatenation < SymbolType > & node ) {
	UnboundedRegExpConcatenation < SymbolType > concat;

	for( const UnboundedRegExpElement < SymbolType > & child : node.getElements ( ) )
		concat.pushBackChild ( optimizeInner( child ) );

	while( A5 ( concat ) || A6 ( concat ) || A7 ( concat ) || A8 ( concat ) || A9 ( concat ) || V8 ( concat ) || V9 ( concat ) );

	if( concat.getElements ( ).size( ) == 1 )
		return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( std::move ( concat.getChildren ( ).front ( ) ) );

	if( concat.getElements ( ).size( ) == 0 )
		return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( UnboundedRegExpEpsilon < SymbolType > ( ) );

	return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( std::move ( concat ) );
}

template < class SymbolType >
ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const UnboundedRegExpIteration < SymbolType > & node ) {
	UnboundedRegExpIteration < SymbolType > iter ( optimizeInner ( node.getChild ( ) ) );

	while ( A11 ( iter ) || V1 ( iter ) || V3 ( iter ) || V4 ( iter ) || V10 ( iter ) );

	// V1 is implemented right here
	if( dynamic_cast < UnboundedRegExpEmpty < SymbolType > * > ( & iter.getChild ( ) ) )
		return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( UnboundedRegExpEpsilon < SymbolType > ( ) );

	// T1 is implemented right here \e* = \e
	if ( dynamic_cast < UnboundedRegExpEpsilon < SymbolType > * > ( & iter.getChild ( ) ) )
		return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( UnboundedRegExpEpsilon < SymbolType > ( ) );

	return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( iter );
}

template < class SymbolType >
ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const UnboundedRegExpSymbol < SymbolType > & node ) {
	return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( node );
}

template < class SymbolType >
ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const UnboundedRegExpEmpty < SymbolType > & node ) {
	return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( node );
}

template < class SymbolType >
ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const UnboundedRegExpEpsilon < SymbolType > & node ) {
	return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( node );
}

/**
  * optimization A1: x + ( y + z ) = ( x + y ) + z = x + y + z
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A1( UnboundedRegExpAlternation < SymbolType > & node ) {
	bool optimized = false;

	for( auto it = node.begin( ); it != node.end( ); ) {
		if( dynamic_cast < UnboundedRegExpAlternation < SymbolType > * > ( & * it ) ) {
			UnboundedRegExpAlternation < SymbolType > childAlt ( static_cast < UnboundedRegExpAlternation < SymbolType > && > ( std::move ( * it ) ) );
			it = node.erase( it );

			it = node.insert ( it, std::make_move_iterator ( childAlt.begin ( ) ), std::make_move_iterator ( childAlt.end ( ) ) );

 			optimized = true;
		} else
			it ++;
	}

	return optimized;
}

/**
  * optimization A2: x + y = y + x (sort)
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A2( UnboundedRegExpAlternation < SymbolType > & node ) {

	auto cmp = [ ]( const UnboundedRegExpElement < SymbolType > * a, const UnboundedRegExpElement < SymbolType > * b ) -> bool { return * a < * b; };

	if ( std::is_sorted ( node.begin( ).base ( ), node.end( ).base ( ), cmp ) )
		return false;

	std::sort ( node.begin ( ).base ( ), node.end ( ).base ( ), cmp );
	return true;
}

/**
  * optimization A3: x + \0 = x
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A3( UnboundedRegExpAlternation < SymbolType > & node ) {
	bool optimized = false;

	// alternation with no children is efectively \0

	for( auto it = node.begin ( ); it != node.end ( ); ) {
		if( dynamic_cast < UnboundedRegExpEmpty < SymbolType > * >( & * it ) ) {
			it = node.erase ( it );

			optimized = true;
		} else
			it ++;
	}

	return optimized;
}

/**
  * optimization A4: x + x = x
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A4( UnboundedRegExpAlternation < SymbolType > & node ) {

	/*
	 * two ways of implementing this opitimization:
	 * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
	 * - check every element against other ( O(n*n) )
	 *
	 * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
	 */
	auto cmp = [ ]( const UnboundedRegExpElement < SymbolType > * a, const UnboundedRegExpElement < SymbolType > * b ) -> bool { return * a == * b; };

	size_t size = node.getChildren ( ).size ( );
	node.erase ( dereferencer ( ext::unique ( node.begin ( ).base ( ), node.end ( ).base ( ), cmp ) ), node.end( ) );

	return size != node.getChildren ( ).size ( );
}

/**
  * optimization A5: x.(y.z) = (x.y).z = x.y.z
  * @param node UnboundedRegExpConcatenation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A5( UnboundedRegExpConcatenation < SymbolType > & node ) {
	bool optimized = false;

	for( auto it = node.begin( ); it != node.end( ); ) {
		if( dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & * it ) ) {

			UnboundedRegExpConcatenation < SymbolType > childConcat = static_cast < UnboundedRegExpConcatenation < SymbolType > && > ( std::move ( * it ) );
			it = node.erase ( it );

			it = node.insert ( it, std::make_move_iterator ( childConcat.begin( ) ), std::make_move_iterator ( childConcat.end( ) ) );

			optimized = true;
		} else
			++ it;
	}

	return optimized;
}

/**
  * optimization A6: \e.x = x.\e = x
  * @param node UnboundedRegExpConcatenation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A6( UnboundedRegExpConcatenation < SymbolType > & node ) {
	bool optimized = false;

	// concatenation with no children is efectively \e

	for( auto it = node.begin( ); it != node.end( ); ) {
		if ( dynamic_cast < UnboundedRegExpEpsilon < SymbolType > * > ( & * it ) ) {
			it = node.erase ( it );

			optimized = true;
		} else
			++ it;
	}

	return optimized;
}

/**
  * optimization A7: \0.x = x.\0 = \0
  * @param node UnboundedRegExpConcatenation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A7( UnboundedRegExpConcatenation < SymbolType > & node ) {

	if(node.getChildren ( ).size() == 1) return false;

	if( std::any_of ( node.begin( ), node.end( ), [ ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpEmpty < SymbolType > * > ( & a ); } ) ) {
		node.clear( );
		node.pushBackChild ( UnboundedRegExpEmpty < SymbolType > ( ) );

		return true;
	}

	return false;
}

/**
  * optimization A8: x.(y+z) = x.y + x.z
  * @param node UnboundedRegExpConcatenation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A8( UnboundedRegExpConcatenation < SymbolType > & /* node */) {
/*
	bool optimized = false;

	for( auto it = std::next( node->elements.begin( ) ); it != node->elements.end( ); )
	{
		UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast<UnboundedRegExpAlternation < SymbolType >*>( * it );
		if( ! alt )
		{
			it ++;
			continue;
		}

		// take everything to the left and copy it as prefix of every element in alternation.
		UnboundedRegExpConcatenation < SymbolType > leftPart;
		leftPart.elements.insert( leftPart.elements.end( ), node->elements.begin( ), it );

		for( auto altIt = alt->elements.begin( ); altIt != alt->elements.end( ); altIt ++ )
		{
			UnboundedRegExpConcatenation < SymbolType > * altElem = new UnboundedRegExpConcatenation < SymbolType >( );
			altElem->elements.push_back( leftPart );
			altElem->elements.push_back( * altIt );

			* altIt = altElem;
		}

		UnboundedRegExpElement < SymbolType > * optIt = optimize( * it );
		delete *it;
		*it = optIt;

		it = node->elements.erase( node->elements.begin( ), it );

		optimized = true;
		it ++;
	}

	return optimized;
*/
	return false; //TODO
}

/**
  * optimization A9: (x+y).z = x.z + y.z
  * @param node UnboundedRegExpConcatenation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A9( UnboundedRegExpConcatenation < SymbolType > & /* node */) {
/*
	bool optimized = false;

	for( auto it = node->elements.begin( ); it != std::prev( node->elements.end( ) ); )
	{
		UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast<UnboundedRegExpAlternation < SymbolType >*>( * it );
		if( ! alt )
		{
			it ++;
			continue;
		}

		// take everything to the right and copy it as suffix of every element in alternation.
		UnboundedRegExpConcatenation < SymbolType > rest;
		rest.elements.insert( rest.elements.end( ), std::next( it ), node->elements.end( ) );

		for( auto altIt = alt->elements.begin( ); altIt != alt->elements.end( ); altIt ++ )
		{
			UnboundedRegExpConcatenation < SymbolType > * altElem = new UnboundedRegExpConcatenation < SymbolType >( );
			altElem->elements.push_back( * altIt );
			altElem->elements.push_back( rest );

			* altIt = altElem;
		}

		UnboundedRegExpElement < SymbolType > * optIt = optimize( * it );
		delete *it;
		*it = optIt;

		it = node->elements.erase( std::next( it ), node->elements.end( ) );
		optimized = true;

		// as we move (delete) the rest of this expression, it surely wont do another round. More optimizations to be performerd are in subtree now.
		// we do not care about this here as method optimize(UnboundedRegExpAlternation < SymbolType >) will take care of this in next iteration
		// it ++;
		break;
	}

	return optimized;
*/
	return false; //TODO
}

/**
  * optimization A10: x* = \e + x*x
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A10( UnboundedRegExpAlternation < SymbolType > & /* node */ ) {
	bool optimized = false;

	/*
	 * problem:
	 * - \e + x*x = x*
	 * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node).
	 */

	// check if we have some epsilon or iteration left, else nothing to do
	/*auto eps = find_if( node.getElements().begin( ), node.getElements().end( ), [ ]( const UnboundedRegExpElement < SymbolType > & a ) -> bool {
		return dynamic_cast < const UnboundedRegExpEpsilon < SymbolType > * > ( & a ) || dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & a  );
	});

	if( eps == node.getElements().end( ) )
		return false;

	for( unsigned i = 0; i < node.getChildren ( ).size ( ); i++ ) {
		UnboundedRegExpConcatenation < SymbolType > * childConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & node.getChildren ( ) [ i ]  );
		if( ! childConcat )
			continue;

		// if iteration is first element of concatenation
		UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & childConcat->getElements ( ).front( ) );
		if( ! iter )
			continue;

		// concatenation without the iteration node
		UnboundedRegExpConcatenation < SymbolType > tmpConcat ( * childConcat );
		tmpConcat.getChildren ( ).erase ( tmpConcat.getChildren ( ).begin( ) );
		ext::ptr_value < UnboundedRegExpElement < SymbolType > > tmpConcatOpt = optimizeInner ( tmpConcat );

		// check if the iteration element is the same as the rest of the concatenation
		if( iter->getElement() == tmpConcatOpt ) {
			optimized = true;

			node.setChild ( std::move ( childConcat->getElements().front() ), i );
		}
	} */

	return optimized;
}

/**
  * optimization A11: x* = (\e + x)*
  * @param node UnboundedRegExpIteration < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A11( UnboundedRegExpIteration < SymbolType > & node ) {

	UnboundedRegExpAlternation < SymbolType > * childAlt = dynamic_cast<UnboundedRegExpAlternation < SymbolType > * >( & node.getChild ( ) );

	if( childAlt ) {
		// check if eps inside iteration's alternation
		auto eps = find_if ( childAlt->begin( ), childAlt->end( ), [ ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool {
			return dynamic_cast < const UnboundedRegExpEpsilon < SymbolType > * > ( & a );
		});

		// if no eps
		if ( eps == childAlt->end( ) )
			return false;

		// remove eps from alternation
		childAlt->erase ( eps );
		return true;
	}

	return false;
}

/**
  * optimization V1: \0* = \e
  * @param node UnboundedRegExpIteration < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V1( UnboundedRegExpIteration < SymbolType > &) {
	// implemented in optimize( UnboundedRegExpIteration < SymbolType > )

	return false;
}

/**
  * optimization V2: x* + x = x*
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V2( UnboundedRegExpAlternation < SymbolType > & /* node */ ) {
	bool optimized = false;

	/*
	 * Bit tricky
	 * We need also to cover the cases like ( a + b + d )* + ( e )* + a + b + c + e = ( a + b + d )* + ( e )* + c
	 */

	/*ext::vector < const UnboundedRegExpElement < SymbolType > * > iterElements;
	// cache iter elements because of operator invalidation after erase
	for( const UnboundedRegExpElement < SymbolType > & n : node.getElements ( ) ) {
		const UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & n );
		if( iter ) {
			const UnboundedRegExpAlternation < SymbolType > * inner = dynamic_cast < const UnboundedRegExpAlternation < SymbolType > * > ( & iter->getChild ( ) );
			if ( inner )
				for ( const UnboundedRegExpElement < SymbolType > & innerElement : inner->getElements ( ) )
					iterElements.push_back ( & innerElement );
			else
				iterElements.push_back ( & iter->getChild ( ) );

		}
	}

	for( const UnboundedRegExpElement < SymbolType > * n : iterElements ) {
		auto it = find_if ( node.getChildren ( ).begin ( ), node.getChildren ( ).end ( ), [ n ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool {
			return a == *n;
		});

		if( it == node.getChildren().end() ) {
			continue;
		}

		optimized = true;
		node.getChildren ( ).erase( it );
	}*/

	return optimized;
}

/**
  * optimization V3: x** = x*
  * @param node UnboundedRegExpIteration < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V3 ( UnboundedRegExpIteration < SymbolType > & node ) {

	UnboundedRegExpIteration < SymbolType > * childIter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & node.getChild ( ) );
	if ( childIter ) {
		node.setChild ( std::move ( childIter->getChild ( ) ) );

		return true;
	}

	return false;
}

/**
  * optimization V4: (x+y)* = (x*y*)*
  * @param node UnboundedRegExpIteration < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V4( UnboundedRegExpIteration < SymbolType > & node ) {

	// interpretation: if iteration's element is concat and every concat's element is iteration
	UnboundedRegExpConcatenation < SymbolType > * cont = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & node.getChild ( ) );
	if ( ! cont || ! all_of ( cont->begin( ), cont->end ( ), [ ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & a ); } ) )
		return false;

	UnboundedRegExpAlternation < SymbolType > newAlt;

	for ( UnboundedRegExpElement < SymbolType > && n : ext::make_mover ( std::move ( * cont ).getChildren ( ) ) )
		newAlt.pushBackChild ( std::move ( static_cast < UnboundedRegExpIteration < SymbolType > & > ( n ).getChild ( ) ) );

	node.setChild ( optimizeInner ( newAlt ) );

	return true;
}

/**
  * optimization V5: x*y = y + x*xy
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V5( UnboundedRegExpAlternation < SymbolType > & /* node */ ) {
	bool optimized = false;

	// reinterpretation: ax*y = ay+ax*xy
	// so, if we find iter,
	//	a = everything that is before it (prefix)
	//	x = iter's content behind iter must be exactly iter's content
	//	y = rest (suffix)
	// prefix.x*x.suffix + prefix.suffix = prefix.x*.suffix

	/* for( auto itA = node.getChildren().begin( ); itA != node.getChildren().end( ); ) {
		UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & * itA );
		if( ! concat ) {
			++ itA;
			continue;
		}

		for( auto itC = concat->getChildren().begin( ); itC != std::prev( concat->getChildren().end( ) ); ) {
			UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * itC );
			if( ! iter ) {
				++ itC;
				continue;
			}

			// iteration's element must follow the iteration (x*x)
			auto itStartY = std::next( itC ); //itStartY points to y in expression x*xy

			// if iter's element is concat
			if( dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) ) ) {
				UnboundedRegExpConcatenation < SymbolType > * iterConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) );

				if( iterConcat->getChildren().size( ) != ( unsigned ) distance ( std::next ( itC ), concat->getChildren ( ).end ( ) )
					|| ! equal ( iterConcat->getChildren ( ).begin ( ), iterConcat->getChildren ( ).end ( ), std::next ( itC ),
					[ ] ( const UnboundedRegExpElement < SymbolType > & a, const UnboundedRegExpElement < SymbolType > & b ) -> bool { return a == b; } ) ) {
					++ itC;
					continue;
				}
				std::advance( itStartY, iterConcat->getChildren().size( ) );
			} else {
				if( iter->getChild() != * std::next( itC ) ) {
					++ itC;
					continue;
				}

				std::advance( itStartY, 1 );
			}

			// store everything before iteration as "a"
			UnboundedRegExpConcatenation < SymbolType > tmpAY;
			if( concat->getChildren().begin( ) == itC ) {
				tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) );
			} else {
				UnboundedRegExpConcatenation < SymbolType > tmpA;
				tmpA.insert( tmpA.getChildren().end( ), concat->getChildren().begin( ), itC );
				tmpAY.pushBackChild ( optimizeInner( tmpA ) );
			}

			// store everything behind iteration's followup element as "y"
			if( itStartY == concat->getChildren().end( ) ) {
				tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) );
			} else {
				UnboundedRegExpConcatenation < SymbolType > tmpY;
				tmpY.insert( tmpY.getChildren().end( ), itStartY, concat->getChildren().end( ) );
				tmpAY.pushBackChild ( optimizeInner ( tmpY ) );
			}

			// concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node.getChildren() )
			ext::ptr_value < UnboundedRegExpElement < SymbolType > > regexpAY = optimizeInner( tmpAY );

			auto iterAY = find_if ( node.getChildren().begin( ), node.getChildren().end( ), [ & ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return a == regexpAY.get ( ); } );
			if( iterAY == node.getChildren().end( ) ) {
				++ itC;

				continue;
			}

			tmpAY.insert ( tmpAY.getChildren ( ).begin ( ) + 1, * itC );

			node.setChild ( optimizeInner( tmpAY ), itA );

			itA = node.getChildren().erase( iterAY );

			optimized = true;
			break;
		}

		++ itA;
	}*/

	return optimized;
}

/**
  * optimization V6: x*y = y + xx*y
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V6( UnboundedRegExpAlternation < SymbolType > & /* node */ ) {
	bool optimized = false;

	// reinterpretation: ax*y = ay+axx*y
	// so, if we find iter
	//	a = everything that is before it (prefix)
	//	x = iter's content before iter must be exactly iter's content
	//	y = rest (suffix)
	// prefix.xx*.suffix + prefix.suffix = prefix.x*.suffix

	/* for( auto itA = node.getChildren ( ).begin( ); itA != node.getChildren ( ).end( ); ) {
		UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & * itA );
		if( ! concat ) {
			++ itA;
			continue;
		}

		for( auto itC = std::next( concat->getChildren ( ).begin( ) ); itC != concat->getChildren ( ).end( ); ) {
			UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * itC );
			if( ! iter ) {
				++ itC;
				continue;
			}

			// iteration's element must preceed the iteration (xx*)
			auto itStartX = itC; //itStartX points to first x in expression xx*, everything before is therefore prefix - regexp "a"

			// if iter's element is concat
			UnboundedRegExpConcatenation < SymbolType > * iterConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) );
			if( iterConcat ) {

				if( distance( concat->getChildren ( ).begin( ), itC ) < (int) iterConcat->getChildren ( ).size( ) ) {
					++ itC;
					continue;
				}
				ext::retract ( itStartX, iterConcat->getChildren().size( ) );

				if( iterConcat->getChildren ( ).size( ) != ( unsigned ) distance( itStartX, concat->getChildren ( ).end( ) )
						|| ! equal ( iterConcat->getChildren ( ).begin ( ), iterConcat->getChildren ( ).end ( ), itStartX,
						[ ] ( const UnboundedRegExpElement < SymbolType > & a, const UnboundedRegExpElement < SymbolType > & b ) -> bool { return a == b; } ) ) {
					++ itC;
					continue;
				}
			} else {
				if( iter->getChild ( ) != * std::prev( itC ) ) {
					++ itC;
					continue;
				}

				std::advance( itStartX, -1 );
			}

			// concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->getChildren() )
			UnboundedRegExpConcatenation < SymbolType > tmpAY;
			if( concat->getChildren ( ).begin ( ) == itStartX ) {
				tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType > ( ) );
			} else {
				UnboundedRegExpConcatenation < SymbolType > tmpA;
				tmpA.insert ( tmpA.getChildren ( ).end ( ), concat->getChildren().begin ( ), itStartX );
				tmpAY.pushBackChild ( optimizeInner ( tmpA ) );
			}

			if( std::next ( itC ) == concat->getChildren().end( ) ) {
				tmpAY.pushBackChild ( UnboundedRegExpEpsilon < SymbolType >( ) );
			} else {
				UnboundedRegExpConcatenation < SymbolType > tmpY;
				tmpY.insert ( tmpY.getChildren().end( ), std::next( itC ), concat->getChildren ( ).end( ) );
				tmpAY.pushBackChild ( optimizeInner( tmpY ) );
			}

			ext::ptr_value < UnboundedRegExpElement < SymbolType > > regexpAY = optimizeInner ( tmpAY );

			auto iterAY = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ & ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return a == regexpAY.get ( ); } );
			if( iterAY == node.getChildren().end( ) ) {
				++ itC;

				continue;
			}

			// if so make a x* y and replace a x x* y
			tmpAY.insert( tmpAY.getChildren ( ).begin ( ) + 1, * itC );

			node.setChild ( optimizeInner( tmpAY ), itA );

			// remove a y
			itA = node.getChildren().erase( iterAY );

			optimized = true;
			break;
		}

		++ itA;
	} */

	return optimized;
}

/**
  * optimization V8: \e in h(x) => xx*=x*
  * @param node UnboundedRegExpConcatenation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V8( UnboundedRegExpConcatenation < SymbolType > & node ) {
	bool optimized = false;

	// interpretation: if there is iteration in concatenation node, and element of iteration contains eps and is straight before this iteration, then this element can be omitted

	if ( node.getChildren ( ).size ( ) == 0 )
		return false;

	for( auto it = node.getChildren ( ).begin( ); it != node.getChildren ( ).end( ); ) {
		const UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & * it );

		if( ! iter ) {
			++ it;
			continue;
		}

		// if element of iteration is concatenation, we need to check this specially
		const UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < const UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) );

		if( concat ) {
			// check if not out of bounds
			if( ( unsigned ) distance( node.getChildren ( ).begin( ), it ) < concat->getChildren ( ).size ( ) ) {
				it ++;
				continue;
			}

			auto it2 = it;
			ext::retract ( it2, concat->getChildren ( ).size ( ) );

			if( regexp::properties::RegExpEpsilon::languageContainsEpsilon ( * concat ) &&
				concat->getChildren().size ( ) == ( unsigned ) distance ( it2, node.getChildren ( ).end( ) ) &&
				equal ( concat->getChildren ( ).begin( ), concat->getChildren ( ).end( ), it2, [ ] ( const UnboundedRegExpElement < SymbolType > & a, const UnboundedRegExpElement < SymbolType > & b ) -> bool { return a == b; } ) ) {
				optimized = true;

				it = node.erase ( it2, it );
			} else
				++ it;
		} else {
			// check if not at the first node
			if ( it == node.getChildren ( ).begin ( ) ) {
				it ++;
				continue;
			}

			auto prev = std::prev ( it );

			if ( regexp::properties::RegExpEpsilon::languageContainsEpsilon ( iter->getElement ( ) ) && iter->getElement ( ) == * prev ) {
				it = node.erase ( prev );
				optimized = true;
			} else
				++ it;
		}
	}

	return optimized;
}

/**
  * optimization V9: (xy)*x = x(yx)*
  * @param node UnboundedRegExpConcatenation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V9( UnboundedRegExpConcatenation < SymbolType > & /*node*/ ) {
	bool optimized = false;

	// interpretation: if concat (C1) with iter && iteration's element is concat (C2), then:
	//	  simultaneously iterate through C1 and C2. (axy)*axz=ax(yax)*z -> get ax that is same and relocate them...

	/*for( auto it = node.getChildren().begin( ) ; it != node.getChildren().end( ) ; ) {
		UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast<UnboundedRegExpIteration < SymbolType > * > ( & * it );
		if ( ! iter ) {
			++ it;
			continue;
		}
		UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast<UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild( ) );
		if( ! concat ) {
			++it;
			continue;
		}

		// find range from <it+1;sth> and <concat.begin;sth> that is equal
		auto c1Iter = std::next( it ), c2Iter = concat->getChildren().begin( );
		while( c1Iter != node.getChildren().end() && c2Iter != concat->getChildren().end( ) && *c1Iter == * c2Iter ) {
			++ c1Iter;
			++ c2Iter;
		}

		if( c1Iter == std::next( it ) ) {
			++ it;
			continue;
		}

		// common::Streams::out << "xy" << std::endl;
		// UnboundedRegExpConcatenation < SymbolType >* tmp = new UnboundedRegExpConcatenation < SymbolType >( );
		// tmp->insert( tmp->getChildren().end( ), std::next( it ), c1Iter );
		// common::Streams::out << RegExp( tmp ) << std::endl;

		// copy the range <it;sth>, delete it and go back to the iter node
		ext::ptr_vector < UnboundedRegExpElement < SymbolType > > copyRange;
		copyRange.insert ( copyRange.end(), std::next( it ), c1Iter );
		it = node.getChildren().erase( std::next( it ), c1Iter );
		it = std::prev( it );

		// insert that range before it position
		node.insert( it, copyRange.begin( ), copyRange.end( ) );

		// alter the iteration's concat node
		copyRange.clear( );
		copyRange.insert( copyRange.end(), concat->getChildren().begin( ), c2Iter );
		concat->getChildren().erase( concat->getChildren().begin( ), c2Iter );
		concat->insert( concat->getChildren().end(), copyRange.begin( ), copyRange.end( ) );
	}*/

	return optimized;
}

/**
  * optimization V10: (x+y)* = (x*+y*)*
  * generalized to (x+y)* = (x+y*)* = (x*+y)* = (x*+y*)*
  * @param node UnboundedRegExpIteration < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V10( UnboundedRegExpIteration < SymbolType > & node ) {

	// interpretation: if iter's child is alternation where some of its children are iteration, then they do not have to be iterations
	UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast < UnboundedRegExpAlternation < SymbolType > * > ( & node.getChild ( ) );
	if ( ! alt || ! any_of ( alt->begin( ), alt->end( ), [] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & a ); } ) )
		return false;

	for ( auto it = alt->begin( ); it != alt->end ( ); ++ it ) {
		if ( dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * it ) )
			alt->setChild ( std::move ( static_cast < UnboundedRegExpIteration < SymbolType > & > ( * it ).getChild ( ) ), it );
	}

	return true;
}

/**
  * optimization X1: a* + \e = a*
  * @param node UnboundedRegExpAlternation < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::X1( UnboundedRegExpAlternation < SymbolType > & node ) {

	// theorem: In regexp like a* + \e, \e is described twice, first in a*, second in \e.
	//  therefore we can delete the \e as it is redundant

	auto iter = find_if ( node.begin( ), node.end( ), [] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & a );} );
	auto eps = find_if ( node.begin( ), node.end( ), [] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpEpsilon < SymbolType > * > ( & a );} );

	if( iter != node.end( ) && eps != node.end( ) ) {
		node.erase( eps );
		return true;
	}

	return false;
}

} /* namespace simplify */

} /* namespace regexp */