-
Jan Trávníček authoredJan Trávníček authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
RegExpOptimizeFormalPart.cxx 21.31 KiB
/*
* RegExpOptimize.cpp
*
* Created on: 20. 1. 2014
* Author: Jan Travnicek
*/
namespace regexp {
namespace simplify {
void RegExpOptimize::optimize( FormalRegExpElement < alphabet::Symbol > & element ) {
FormalRegExpElement < alphabet::Symbol >* optimized = optimizeInner( element );
FormalRegExpAlternation < alphabet::Symbol > * alternation = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( & element );
if( alternation ) {
FormalRegExpAlternation < alphabet::Symbol > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( optimized );
if( alternationOptimized ) {
* alternation = std::move( * alternationOptimized );
delete alternationOptimized;
} else {
* alternation = FormalRegExpAlternation < alphabet::Symbol > { std::manage_move ( optimized ), FormalRegExpEmpty < alphabet::Symbol > { } };
}
return;
}
FormalRegExpConcatenation < alphabet::Symbol > * concatenation = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( & element );
if( concatenation ) {
FormalRegExpConcatenation < alphabet::Symbol > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( optimized );
if( concatenationOptimized ) {
* concatenation = std::move( * concatenationOptimized );
delete concatenationOptimized;
} else {
* concatenation = FormalRegExpConcatenation < alphabet::Symbol > { std::manage_move ( optimized ), FormalRegExpEpsilon < alphabet::Symbol > { } };
}
return;
}
FormalRegExpIteration < alphabet::Symbol > * iteration = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( & element );
if( iteration ) {
FormalRegExpIteration < alphabet::Symbol > * iterationOptimized = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( optimized );
if( iterationOptimized ) {
* iteration = std::move( * iterationOptimized );
delete iterationOptimized;
} else {
* iteration = FormalRegExpIteration < alphabet::Symbol > { std::manage_move ( optimized ) };
}
return;
}
// Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty
return;
}
FormalRegExpElement < alphabet::Symbol >* RegExpOptimize::optimizeInner( const FormalRegExpElement < alphabet::Symbol > & node ) {
FormalRegExpElement < alphabet::Symbol >* elem = node.clone();
// optimize while you can
while( A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem )
|| A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem )
|| A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) );
return elem;
}
bool RegExpOptimize::S( FormalRegExpElement < alphabet::Symbol > * & node ) {
bool optimized = false;
FormalRegExpAlternation < alphabet::Symbol > * alternation = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol >*>( node );
if( alternation ) {
FormalRegExpElement < alphabet::Symbol > * tmp = optimizeInner ( alternation->getLeftElement ( ) );
if(* tmp != alternation->getLeftElement ( ) ) {
optimized = true;
alternation->setLeftElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) );
}
tmp = optimizeInner ( alternation->getRightElement ( ) );
if(* tmp != alternation->getRightElement ( ) ) {
optimized = true;
alternation->setRightElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) );
}
return optimized;
}
FormalRegExpConcatenation < alphabet::Symbol > * concatenation = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol >*>( node );
if( concatenation ) {
FormalRegExpElement < alphabet::Symbol >* tmp = optimizeInner ( concatenation->getLeftElement() );
if(* tmp != concatenation->getLeftElement ( ) ) {
optimized = true;
concatenation->setLeftElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) );
}
tmp = optimizeInner ( concatenation->getRightElement ( ));
if(* tmp != concatenation->getRightElement ( )) {
optimized = true;
concatenation->setRightElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) );
}
return optimized;
}
FormalRegExpIteration < alphabet::Symbol > * iteration = dynamic_cast<FormalRegExpIteration < alphabet::Symbol >*>( node );
if( iteration ) {
FormalRegExpElement < alphabet::Symbol >* tmp = optimizeInner ( iteration->getElement() );
if(* tmp != iteration->getElement ( ) ) {
optimized = true;
iteration->setElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) );
}
return optimized;
}
return optimized;
}
/**
* optimization A1: ( x + y ) + z = x + ( y + z )
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A1( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n );
if( ! node ) return false;
if( dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( node->getLeft().get() ) ) {
std::smart_ptr < FormalRegExpAlternation < alphabet::Symbol > > leftAlt ( static_cast<FormalRegExpAlternation < alphabet::Symbol > *>( node->getLeft().release() ) );
std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > x = std::move ( leftAlt->getLeft() );
std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > y = std::move ( leftAlt->getRight() );
std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > z = std::move ( node->getRight() );
leftAlt->setLeft ( std::move ( y ) );
leftAlt->setRight ( std::move ( z ) );
node->setLeft ( std::move ( x ) );
node->setRight ( std::move ( leftAlt ) );
return true;
}
return false;
}
/**
* optimization A2: x + y = y + x (sort)
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A2( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n );
if( ! node ) return false;
if( dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( node->getRight().get() ) ) {
FormalRegExpAlternation < alphabet::Symbol > * rightAlt = static_cast < FormalRegExpAlternation < alphabet::Symbol > * > ( node->getRight ( ).get ( ) );
std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > x = std::move ( node->getLeft ( ) );
std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > y = std::move ( rightAlt->getLeft ( ) );
if(*x > *y) {
node->setLeft ( std::move ( y ) );
rightAlt->setLeft ( std::move ( x ) );
return true;
} else {
node->setLeft ( std::move ( x ) );
rightAlt->setLeft ( std::move ( y ) );
return false;
}
}
return false;
}
/**
* optimization A3: x + \0 = x
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A3( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n );
if( ! node ) return false;
// input can be \0 + \0, so at least one element must be preserved
if( dynamic_cast<FormalRegExpEmpty < alphabet::Symbol > *>( node->getRight().get() ) ) {
n = node->getLeft().release();
delete node;
return true;
}
if( dynamic_cast<FormalRegExpEmpty < alphabet::Symbol > *>( node->getLeft().get() ) ) {
n = node->getRight().release();
delete node;
return true;
}
return false;
}
/**
* optimization A4: x + x = x
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A4( FormalRegExpElement < alphabet::Symbol > * & n ) {
/*
* two ways of implementing this opitimization:
* - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
* - check every element against other ( O(n*n) )
*
* As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
*/
FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n );
if( ! node ) return false;
if( node->getLeftElement() == node->getRightElement() ) {
n = node->getRight().release();
delete node;
return true;
}
return false;
}
/**
* optimization A5: x.(y.z) = (x.y).z = x.y.z
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A5( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpConcatenation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( n );
if( ! node ) return false;
if( dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getLeft().get() ) ) {
std::smart_ptr < FormalRegExpConcatenation < alphabet::Symbol > > leftCon ( static_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getLeft().release() ) );
std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > x = std::move ( leftCon->getLeft() );
std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > y = std::move ( leftCon->getRight() );
std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > z = std::move ( node->getRight() );
leftCon->setLeft ( std::move ( y ) );
leftCon->setRight ( std::move ( z ) );
node->setLeft ( std::move ( x ) );
node->setRight ( std::move ( leftCon ) );
return true;
}
return false;
}
/**
* optimization A6: \e.x = x.\e = x
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A6( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpConcatenation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( n );
if( ! node ) return false;
// input can be \e + \e, so at least one element must be preserved
if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getRight().get() ) ) {
n = node->getLeft().release();
delete node;
return true;
}
if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getLeft().get() ) ) {
n = node->getRight().release();
delete node;
return true;
}
return false;
}
/**
* optimization A7: \0.x = x.\0 = \0
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A7( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpConcatenation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( n );
if( ! node ) return false;
if( dynamic_cast<FormalRegExpEmpty < alphabet::Symbol > *>( node->getRight().get() ) || dynamic_cast<FormalRegExpEmpty < alphabet::Symbol > *>( node->getLeft().get() ) ) {
delete node;
n = new FormalRegExpEmpty < alphabet::Symbol > { };
return true;
}
return false;
}
/**
* optimization A8: x.(y+z) = x.y + x.z
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A8( FormalRegExpElement < alphabet::Symbol > * & /* n */) {
return false; //TODO
}
/**
* optimization A9: (x+y).z = x.z + y.z
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A9( FormalRegExpElement < alphabet::Symbol > * & /* n */) {
return false; //TODO
}
/**
* optimization A10: x* = \e + x*x
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A10( FormalRegExpElement < alphabet::Symbol > * & n ) {
/*
* problem:
* - \e + x*x = x*
* - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node).
*/
FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n );
if( ! node ) return false;
if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getLeft().get() ) ) {
FormalRegExpConcatenation < alphabet::Symbol > * rightCon = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getRight().get() );
if( ! rightCon ) return false;
FormalRegExpIteration < alphabet::Symbol > * rightLeftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( rightCon->getLeft().get() );
if( rightLeftIte ) {
if(rightLeftIte->getElement() == rightCon->getRightElement()) {
n = rightCon->getLeft().release();
delete node;
return true;
}
}
FormalRegExpIteration < alphabet::Symbol > * rightRightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( rightCon->getRight().get() );
if( rightRightIte ) {
if(rightRightIte->getElement() == rightCon->getLeftElement()) {
n = rightCon->getRight().release();
delete node;
return true;
}
}
}
if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getRight().get() ) ) {
FormalRegExpConcatenation < alphabet::Symbol > * leftCon = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getLeft().get() );
if( ! leftCon ) return false;
FormalRegExpIteration < alphabet::Symbol > * leftLeftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( leftCon->getLeft().get() );
if( leftLeftIte ) {
if(leftLeftIte->getElement() == leftCon->getRightElement()) {
n = leftCon->getLeft().release();
delete node;
return true;
}
}
FormalRegExpIteration < alphabet::Symbol > * leftRightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( leftCon->getRight().get() );
if( leftRightIte ) {
if(leftRightIte->getElement() == leftCon->getLeftElement()) {
n = leftCon->getRight().release();
delete node;
return true;
}
}
}
return false;
}
/**
* optimization A11: x* = (\e + x)*
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::A11( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpIteration < alphabet::Symbol > * node = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( n );
if( ! node ) return false;
FormalRegExpAlternation < alphabet::Symbol > * childAlt = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( node->getChild().get() );
if( childAlt ) {
if( dynamic_cast < FormalRegExpEpsilon < alphabet::Symbol > * > ( childAlt->getLeft ( ).get ( ) ) ) {
node->setChild ( std::move ( childAlt->getRight ( ) ) );
return true;
}
if( dynamic_cast < FormalRegExpEpsilon < alphabet::Symbol > * > ( childAlt->getRight ( ).get ( ) ) ) {
node->setChild ( std::move ( childAlt->getLeft ( ) ) );
return true;
}
}
return false;
}
/**
* optimization V1: \0* = \e
* optimization T1: \e* = \e
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V1( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpIteration < alphabet::Symbol > * node = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( n );
if( ! node ) return false;
if( dynamic_cast<FormalRegExpEmpty< alphabet::Symbol > *>( node->getChild ( ).get() ) ) {
delete node;
n = new FormalRegExpEpsilon < alphabet::Symbol > ( );
return true;
}
if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getChild ( ).get() ) ) {
delete node;
n = new FormalRegExpEpsilon < alphabet::Symbol > ( );
return true;
}
return false;
}
/**
* optimization V2: x* + x = x*
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V2( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n );
if( ! node ) return false;
FormalRegExpIteration < alphabet::Symbol > * leftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getLeft().get() );
if( leftIte ) {
if(leftIte->getElement() == node->getRightElement()) {
n = node->getLeft().release();
delete node;
return true;
}
}
FormalRegExpIteration < alphabet::Symbol > * rightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getRight().get() );
if( rightIte ) {
if(rightIte->getElement() == node->getLeftElement()) {
n = node->getRight().release();
delete node;
return true;
}
}
return false;
}
/**
* optimization V3: x** = x*
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V3( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpIteration < alphabet::Symbol > * node = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( n );
if( ! node ) return false;
FormalRegExpIteration < alphabet::Symbol >* childIter = dynamic_cast<FormalRegExpIteration < alphabet::Symbol >*>( node->getChild().get() );
if( childIter ) {
node->setChild ( std::move ( childIter->getChild ( ) ) );
return true;
}
return false;
}
/**
* optimization V4: (x+y)* = (x*y*)*
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V4( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpIteration < alphabet::Symbol > * node = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( n );
if( ! node ) return false;
FormalRegExpConcatenation < alphabet::Symbol > * child = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getChild().get() );
if( ! child ) return false;
FormalRegExpIteration < alphabet::Symbol > * leftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( child->getLeft().get() );
if( ! leftIte ) return false;
FormalRegExpIteration < alphabet::Symbol > * rightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( child->getRight().get() );
if( ! rightIte ) return false;
n = new FormalRegExpIteration < alphabet::Symbol >( FormalRegExpAlternation < alphabet::Symbol >(std::move( leftIte->getElement ( ) ), std::move( rightIte->getElement ( ) ) ) );
delete node;
return true;
}
/**
* optimization V5: x*y = y + x*xy
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V5( FormalRegExpElement < alphabet::Symbol > * & /* n */) {
return false; //TODO
}
/**
* optimization V6: x*y = y + xx*y
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V6( FormalRegExpElement < alphabet::Symbol > * & /* n */) {
return false; //TODO
}
/**
* optimization V8: \e in h(x) => xx*=x*
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V8( FormalRegExpElement < alphabet::Symbol > * & /* n */) {
return false; //TODO
}
/**
* optimization V9: (xy)*x = x(yx)*
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V9( FormalRegExpElement < alphabet::Symbol > * & /* n */) {
return false; //TODO
}
/**
* optimization V10: (x+y)* = (x*+y*)*
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::V10( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n );
if( ! node ) return false;
FormalRegExpIteration < alphabet::Symbol > * leftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getLeft().get() );
if( ! leftIte ) return false;
FormalRegExpIteration < alphabet::Symbol > * rightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getRight().get() );
if( ! rightIte ) return false;
n = new FormalRegExpConcatenation < alphabet::Symbol >(std::move( leftIte->getElement() ), std::move( rightIte->getElement() ) );
delete node;
return true;
}
/**
* optimization X1: a* + \e = a*
* @param node FormalRegExpElement < alphabet::Symbol > node
* @return bool true if optimization applied else false
*/
bool RegExpOptimize::X1( FormalRegExpElement < alphabet::Symbol > * & n ) {
FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n );
if( ! node ) return false;
FormalRegExpIteration < alphabet::Symbol > * leftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getLeft().get() );
if( leftIte ) {
if(dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>(node->getRight().get())) {
n = node->getLeft().release();
delete node;
return true;
}
}
FormalRegExpIteration < alphabet::Symbol > * rightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getRight().get() );
if( rightIte ) {
if(dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>(node->getLeft().get())) {
n = node->getRight().release();
delete node;
return true;
}
}
return false;
}
} /* namespace simplify */
} /* namespace regexp */