Skip to content
Snippets Groups Projects
RegExpOptimizeFormalPart.hpp 20.5 KiB
Newer Older
namespace regexp::simplify {

template < class SymbolType >
void RegExpOptimize::optimize( FormalRegExpElement < SymbolType > & element ) {
	ext::smart_ptr < FormalRegExpElement < SymbolType > > optimized = optimizeInner ( element );
Jan Travnicek's avatar
Jan Travnicek committed
	if ( FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( & element ) ) {
		FormalRegExpAlternation < SymbolType > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( optimized.get ( ) );
		if( alternationOptimized ) {
			* alternation = std::move( * alternationOptimized );
		} else {
			* alternation = FormalRegExpAlternation < SymbolType > { * optimized, FormalRegExpEmpty < SymbolType > { } };
Jan Travnicek's avatar
Jan Travnicek committed
	} else if ( FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( & element ) ) {
		FormalRegExpConcatenation < SymbolType > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < SymbolType > * > ( optimized.get ( ) );
		if( concatenationOptimized ) {
			* concatenation = std::move( * concatenationOptimized );
		} else {
			* concatenation = FormalRegExpConcatenation < SymbolType > { * optimized, FormalRegExpEpsilon < SymbolType > { } };
Jan Travnicek's avatar
Jan Travnicek committed
	} else if ( FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType > *>( & element ) ) {
		FormalRegExpIteration < SymbolType > * iterationOptimized = dynamic_cast<FormalRegExpIteration < SymbolType > *>( optimized.get ( ) );
		if( iterationOptimized ) {
			* iteration = std::move( * iterationOptimized );
		} else {
			* iteration = FormalRegExpIteration < SymbolType > { optimized };
		}
	}

	// Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty
}

template < class SymbolType >
ext::smart_ptr < FormalRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const FormalRegExpElement < SymbolType > & node ) {
	FormalRegExpElement < SymbolType > * elem = node.clone();

	// optimize while you can
	while(    A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem )
	       || A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem )
	       || A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) );

	return ext::smart_ptr < FormalRegExpElement < SymbolType > > ( elem );
}

template < class SymbolType >
bool RegExpOptimize::S( FormalRegExpElement < SymbolType > * & node ) {
	bool optimized = false;
	if( FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType >*>( node ); alternation ) {
		if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getLeftElement ( ) ); * tmp != alternation->getLeftElement ( ) ) {
			optimized = true;
			alternation->setLeftElement ( * tmp );
		if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getRightElement ( ) ); * tmp != alternation->getRightElement ( ) ) {
			optimized = true;
			alternation->setRightElement ( * tmp );
		}

		return optimized;
	}

	if( FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType >*>( node ); concatenation ) {
		if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getLeftElement ( ) ); * tmp != concatenation->getLeftElement ( ) ) {
			optimized = true;
			concatenation->setLeftElement ( * tmp );
		if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getRightElement ( ) ); * tmp != concatenation->getRightElement ( ) ) {
			optimized = true;
			concatenation->setRightElement ( * tmp );
		}

		return optimized;
	}

	if( FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node ); iteration ) {
		if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( iteration->getElement() ); * tmp != iteration->getElement ( ) ) {
			optimized = true;
			iteration->setElement ( * tmp );
		}
		return optimized;
	}

	return optimized;
}


/**
  * optimization A1: ( x + y ) + z = x + ( y + z )
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A1( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
	if( ! n ) return false;
	if( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getLeft ( ) ) ) {
		FormalRegExpAlternation < SymbolType > leftAlt ( std::move ( static_cast < FormalRegExpAlternation < SymbolType > & > ( n->getLeft ( ) ) ) );
		n->setLeft ( std::move ( leftAlt.getLeft ( ) ) );
		leftAlt.setLeft ( std::move ( leftAlt.getRight ( ) ) );
		leftAlt.setRight ( std::move ( n->getRight ( ) ) );
		n->setRight ( std::move ( leftAlt ) );

		return true;
	}

	return false;
}

/**
  * optimization A2: x + y = y + x (sort)
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A2 ( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
	if( ! n ) return false;
	if ( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getRight ( ) ) ) {
		// deeper structure, like b + (a + c)
		FormalRegExpAlternation < SymbolType > & rightAlt = static_cast < FormalRegExpAlternation < SymbolType > & > ( n->getRight ( ) );
		if ( n->getLeft ( ) > rightAlt.getLeft ( ) ) {
			ext::ptr_value < FormalRegExpElement < SymbolType > > tmp ( std::move ( n->getLeft ( ) ) );
			n->setLeft ( std::move ( rightAlt.getLeft ( ) ) );
			rightAlt.setLeft ( std::move ( tmp ) );
			return true;
		} else {
			return false;
		}
	} else {
		// last two, like a + (c + b)
		if ( n->getLeft ( ) > n->getRight ( ) ) {
			ext::ptr_value < FormalRegExpElement < SymbolType > > tmp ( std::move ( n->getLeft ( ) ) );

			n->setLeft ( std::move ( n->getRight ( ) ) );
			n->setRight ( std::move ( tmp ) );
			return true;
		} else {
			return false;
		}
	}

	return false;
}

/**
  * optimization A3: x + \0 = x
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A3 ( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpAlternation < SymbolType > * n = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( node );
	if( ! n ) return false;

	// input can be \0 + \0, so at least one element must be preserved

	if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getRight ( ) ) ) {
		node = std::move ( n->getLeft ( ) ).clone ( );
		delete n;
		return true;
	}

	if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getLeft ( ) ) ) {
		node = std::move ( n->getRight ( ) ).clone ( );
		delete n;
		return true;
	}

	return false;
}

/**
  * optimization A4: x + x = x
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A4( FormalRegExpElement < SymbolType > * & node ) {
	/*
	 * two ways of implementing this opitimization:
	 * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
	 * - check every element against other ( O(n*n) )
	 *
	 * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
	 */

	FormalRegExpAlternation < SymbolType > * n = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( node );
	if ( ! n ) return false;
	if ( n->getLeftElement() == n->getRightElement() ) {
		node = std::move ( n->getRight ( ) ).clone ( );
		return true;
	}

	return false;
}

/**
  * optimization A5: x.(y.z) = (x.y).z = x.y.z
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A5( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
	if( ! n ) return false;
	if( dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getLeft ( ) ) ) {
		FormalRegExpConcatenation < SymbolType > leftCon ( std::move ( static_cast < FormalRegExpConcatenation < SymbolType > & > ( n->getLeft ( ) ) ) );
		n->setLeft ( std::move ( leftCon.getLeft ( ) ) );
		leftCon.setLeft ( std::move ( leftCon.getRight ( ) ) );
		leftCon.setRight ( std::move ( n->getRight ( ) ) );
		n->setRight ( std::move ( leftCon ) );

		return true;
	}

	return false;
}

/**
  * optimization A6: \e.x = x.\e = x
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A6( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
	if( ! n ) return false;

	// input can be \e + \e, so at least one element must be preserved

	if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
		node = std::move ( n->getLeft ( ) ).clone ( );
		delete n;
		return true;
	}

	if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
		node = std::move ( n->getRight ( ) ).clone ( );
		delete n;
		return true;
	}

	return false;
}

/**
  * optimization A7: \0.x = x.\0 = \0
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A7( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
	if( ! n ) return false;
	if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getRight ( ) ) || dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getLeft ( ) ) ) {
		delete n;
		node = new FormalRegExpEmpty < SymbolType > { };
		return true;
	}

	return false;
}

/**
  * optimization A8: x.(y+z) = x.y + x.z
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A8( FormalRegExpElement < SymbolType > * & /* node */) {
	return false; //TODO
}

/**
  * optimization A9: (x+y).z = x.z + y.z
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A9( FormalRegExpElement < SymbolType > * & /* node */) {
	return false; //TODO
}

/**
  * optimization A10: x* = \e + x*x
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A10( FormalRegExpElement < SymbolType > * & node ) {
	/*
	 * problem:
	 * - \e + x*x = x*
	 * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node).
	 */

	FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( node );
	if ( ! n ) return false;
	if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
		FormalRegExpConcatenation < SymbolType > * rightCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getRight ( ) );
		if ( ! rightCon ) return false;
		if ( FormalRegExpIteration < SymbolType > * rightLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getLeft ( ) ); rightLeftIte && rightLeftIte->getElement ( ) == rightCon->getRightElement ( ) ) {
			node = std::move ( rightCon->getLeft ( ) ).clone ( );
			delete n;
			return true;
		if ( FormalRegExpIteration < SymbolType > * rightRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getRight ( ) ); rightRightIte && rightRightIte->getElement ( ) == rightCon->getLeftElement ( ) ) {
			node = std::move ( rightCon->getRight ( ) ).clone ( );
			delete n;
			return true;
	if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
		FormalRegExpConcatenation < SymbolType > * leftCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getLeft ( ) );
		if ( ! leftCon ) return false;
		if ( FormalRegExpIteration < SymbolType > * leftLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getLeft ( ) ); leftLeftIte && leftLeftIte->getElement ( ) == leftCon->getRightElement ( ) ) {
			node = std::move ( leftCon->getLeft ( ) ).clone ( );
			delete n;
			return true;
		if ( FormalRegExpIteration < SymbolType > * leftRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getRight ( ) ); leftRightIte && leftRightIte->getElement ( ) == leftCon->getLeftElement ( ) ) {
			node = std::move ( leftCon->getRight ( ) ).clone ( );
			delete n;
			return true;
		}
	}

	return false;
}

/**
  * optimization A11: x* = (\e + x)*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A11( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
	if( ! n ) return false;
	if ( FormalRegExpAlternation < SymbolType > * childAlt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getChild ( ) ); childAlt ) {
		if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getLeft ( ) ) ) {
			n->setChild ( std::move ( childAlt->getRight ( ) ) );
			return true;
		}
		if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getRight ( ) ) ) {
			n->setChild ( std::move ( childAlt->getLeft ( ) ) );
			return true;
		}
	}

	return false;
}

/**
  * optimization V1: \0* = \e
  * optimization T1: \e* = \e
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V1( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
	if( ! n ) return false;
	if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getChild ( ) ) ) {
		delete n;
		node = new FormalRegExpEpsilon < SymbolType > ( );
		return true;
	}
	if ( dynamic_cast<FormalRegExpEpsilon < SymbolType > * > ( & n->getChild ( ) ) ) {
		delete n;
		node = new FormalRegExpEpsilon < SymbolType > ( );
		return true;
	}
	return false;
}

/**
  * optimization V2: x* + x = x*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V2( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
	if( ! n ) return false;
	if ( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getLeft ( ) ); leftIte && leftIte->getElement ( ) == n->getRightElement ( ) ) {
		node = std::move ( n->getLeft ( ) ).clone ( );
		delete n;
		return true;
	if ( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getRight ( ) ); rightIte && rightIte->getElement ( ) == n->getLeftElement ( ) ) {
		node = std::move ( n->getRight ( ) ).clone ( );
		delete n;
		return true;
	}

	return false;
}

/**
  * optimization V3: x** = x*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V3( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
	if( ! n ) return false;
	if( FormalRegExpIteration < SymbolType > * childIter = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getChild ( ) ); childIter ) {
		n->setChild ( std::move ( childIter->getChild ( ) ) );
		return true;
	}

	return false;
}

/**
  * optimization V4: (x+y)* = (x*y*)*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V4( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpIteration < SymbolType > * n = dynamic_cast < FormalRegExpIteration < SymbolType > *>( node );
	if( ! n ) return false;
	if( FormalRegExpConcatenation < SymbolType > * child = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getChild ( ) ); child ) {
		if( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getLeft ( ) ); leftIte ) {
			if( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getRight ( ) ); rightIte ) {
				n->setChild ( FormalRegExpAlternation < SymbolType >( std::move ( leftIte->getElement ( ) ), std::move ( rightIte->getElement ( ) ) ) );
			}
		}
	}

	return true;
}

/**
  * optimization V5: x*y = y + x*xy
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V5( FormalRegExpElement < SymbolType > * & /* node */) {
	return false; //TODO
}

/**
  * optimization V6: x*y = y + xx*y
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V6( FormalRegExpElement < SymbolType > * & /* node */) {
	return false; //TODO
}

/**
  * optimization V8: \e in h(x) => xx*=x*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V8( FormalRegExpElement < SymbolType > * & /* node */) {
	return false; //TODO
}

/**
  * optimization V9: (xy)*x = x(yx)*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V9( FormalRegExpElement < SymbolType > * & /* node */) {
	return false; //TODO
}

/**
  * optimization V10: (x+y)* = (x*+y*)*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V10( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpIteration < SymbolType > * n = dynamic_cast < FormalRegExpIteration < SymbolType > *>( node );
	if( ! n ) return false;
	if( FormalRegExpAlternation < SymbolType > * alt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getChild ( ) ); alt ) {
		if( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getLeft ( ) ); leftIte ) {
			if( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getRight ( ) ); rightIte ) {
				alt->setLeft ( std::move ( leftIte->getChild ( ) ) );
				alt->setRight ( std::move ( rightIte->getChild ( ) ) );
			}
		}
	}

	return true;
}

/**
  * optimization X1: a* + \e = a*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::X1( FormalRegExpElement < SymbolType > * & node ) {
	FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
	if( ! n ) return false;
	if ( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getLeft ( ) ); leftIte && dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
		node = std::move ( n->getLeft ( ) ).clone ( );
		delete n;
		return true;
	if ( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getRight ( ) ); rightIte && dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
		node = std::move ( n->getRight ( ) ).clone ( );
		delete n;
		return true;
} /* namespace regexp::simplify */