Newer
Older
namespace regexp::simplify {
template < class SymbolType >
void RegExpOptimize::optimize( FormalRegExpElement < SymbolType > & element ) {
ext::smart_ptr < FormalRegExpElement < SymbolType > > optimized = optimizeInner ( element );
if ( FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( & element ) ) {
FormalRegExpAlternation < SymbolType > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( optimized.get ( ) );
if( alternationOptimized ) {
* alternation = std::move( * alternationOptimized );
} else {
* alternation = FormalRegExpAlternation < SymbolType > { * optimized, FormalRegExpEmpty < SymbolType > { } };
} else if ( FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( & element ) ) {
FormalRegExpConcatenation < SymbolType > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < SymbolType > * > ( optimized.get ( ) );
if( concatenationOptimized ) {
* concatenation = std::move( * concatenationOptimized );
} else {
* concatenation = FormalRegExpConcatenation < SymbolType > { * optimized, FormalRegExpEpsilon < SymbolType > { } };
} else if ( FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType > *>( & element ) ) {
FormalRegExpIteration < SymbolType > * iterationOptimized = dynamic_cast<FormalRegExpIteration < SymbolType > *>( optimized.get ( ) );
if( iterationOptimized ) {
* iteration = std::move( * iterationOptimized );
} else {
* iteration = FormalRegExpIteration < SymbolType > { optimized };
}
}
// Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty
}
template < class SymbolType >
ext::smart_ptr < FormalRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const FormalRegExpElement < SymbolType > & node ) {
FormalRegExpElement < SymbolType > * elem = node.clone();
// optimize while you can
while( A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem )
|| A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem )
|| A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) );
return ext::smart_ptr < FormalRegExpElement < SymbolType > > ( elem );
}
template < class SymbolType >
bool RegExpOptimize::S( FormalRegExpElement < SymbolType > * & node ) {
bool optimized = false;
if( FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType >*>( node ); alternation ) {
if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getLeftElement ( ) ); * tmp != alternation->getLeftElement ( ) ) {
if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getRightElement ( ) ); * tmp != alternation->getRightElement ( ) ) {
if( FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType >*>( node ); concatenation ) {
if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getLeftElement ( ) ); * tmp != concatenation->getLeftElement ( ) ) {
if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getRightElement ( ) ); * tmp != concatenation->getRightElement ( ) ) {
concatenation->setRightElement ( * tmp );
if( FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node ); iteration ) {
if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( iteration->getElement() ); * tmp != iteration->getElement ( ) ) {
}
return optimized;
}
return optimized;
}
/**
* optimization A1: ( x + y ) + z = x + ( y + z )
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A1( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
if( ! n ) return false;
if( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getLeft ( ) ) ) {
FormalRegExpAlternation < SymbolType > leftAlt ( std::move ( static_cast < FormalRegExpAlternation < SymbolType > & > ( n->getLeft ( ) ) ) );
n->setLeft ( std::move ( leftAlt.getLeft ( ) ) );
leftAlt.setLeft ( std::move ( leftAlt.getRight ( ) ) );
leftAlt.setRight ( std::move ( n->getRight ( ) ) );
n->setRight ( std::move ( leftAlt ) );
return true;
}
return false;
}
/**
* optimization A2: x + y = y + x (sort)
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A2 ( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
if( ! n ) return false;
if ( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getRight ( ) ) ) {
// deeper structure, like b + (a + c)
FormalRegExpAlternation < SymbolType > & rightAlt = static_cast < FormalRegExpAlternation < SymbolType > & > ( n->getRight ( ) );
if ( n->getLeft ( ) > rightAlt.getLeft ( ) ) {
ext::ptr_value < FormalRegExpElement < SymbolType > > tmp ( std::move ( n->getLeft ( ) ) );
n->setLeft ( std::move ( rightAlt.getLeft ( ) ) );
rightAlt.setLeft ( std::move ( tmp ) );
return true;
} else {
return false;
}
} else {
// last two, like a + (c + b)
if ( n->getLeft ( ) > n->getRight ( ) ) {
ext::ptr_value < FormalRegExpElement < SymbolType > > tmp ( std::move ( n->getLeft ( ) ) );
n->setLeft ( std::move ( n->getRight ( ) ) );
n->setRight ( std::move ( tmp ) );
return true;
} else {
return false;
}
}
return false;
}
/**
* optimization A3: x + \0 = x
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A3 ( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpAlternation < SymbolType > * n = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( node );
if( ! n ) return false;
// input can be \0 + \0, so at least one element must be preserved
if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getRight ( ) ) ) {
node = std::move ( n->getLeft ( ) ).clone ( );
delete n;
if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getLeft ( ) ) ) {
node = std::move ( n->getRight ( ) ).clone ( );
delete n;
return true;
}
return false;
}
/**
* optimization A4: x + x = x
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A4( FormalRegExpElement < SymbolType > * & node ) {
/*
* two ways of implementing this opitimization:
* - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
* - check every element against other ( O(n*n) )
*
* As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
*/
FormalRegExpAlternation < SymbolType > * n = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( node );
if ( ! n ) return false;
if ( n->getLeftElement() == n->getRightElement() ) {
node = std::move ( n->getRight ( ) ).clone ( );
return true;
}
return false;
}
/**
* optimization A5: x.(y.z) = (x.y).z = x.y.z
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A5( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
if( dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getLeft ( ) ) ) {
FormalRegExpConcatenation < SymbolType > leftCon ( std::move ( static_cast < FormalRegExpConcatenation < SymbolType > & > ( n->getLeft ( ) ) ) );
n->setLeft ( std::move ( leftCon.getLeft ( ) ) );
leftCon.setLeft ( std::move ( leftCon.getRight ( ) ) );
leftCon.setRight ( std::move ( n->getRight ( ) ) );
n->setRight ( std::move ( leftCon ) );
return true;
}
return false;
}
/**
* optimization A6: \e.x = x.\e = x
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A6( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
if( ! n ) return false;
// input can be \e + \e, so at least one element must be preserved
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
node = std::move ( n->getLeft ( ) ).clone ( );
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
node = std::move ( n->getRight ( ) ).clone ( );
return true;
}
return false;
}
/**
* optimization A7: \0.x = x.\0 = \0
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A7( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
if( ! n ) return false;
if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getRight ( ) ) || dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getLeft ( ) ) ) {
delete n;
node = new FormalRegExpEmpty < SymbolType > { };
return true;
}
return false;
}
/**
* optimization A8: x.(y+z) = x.y + x.z
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A8( FormalRegExpElement < SymbolType > * & /* node */) {
return false; //TODO
}
/**
* optimization A9: (x+y).z = x.z + y.z
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A9( FormalRegExpElement < SymbolType > * & /* node */) {
return false; //TODO
}
/**
* optimization A10: x* = \e + x*x
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A10( FormalRegExpElement < SymbolType > * & node ) {
/*
* problem:
* - \e + x*x = x*
* - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node).
*/
FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( node );
if ( ! n ) return false;
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
FormalRegExpConcatenation < SymbolType > * rightCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getRight ( ) );
if ( ! rightCon ) return false;
if ( FormalRegExpIteration < SymbolType > * rightLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getLeft ( ) ); rightLeftIte && rightLeftIte->getElement ( ) == rightCon->getRightElement ( ) ) {
node = std::move ( rightCon->getLeft ( ) ).clone ( );
delete n;
return true;
if ( FormalRegExpIteration < SymbolType > * rightRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getRight ( ) ); rightRightIte && rightRightIte->getElement ( ) == rightCon->getLeftElement ( ) ) {
node = std::move ( rightCon->getRight ( ) ).clone ( );
delete n;
return true;
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
FormalRegExpConcatenation < SymbolType > * leftCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getLeft ( ) );
if ( ! leftCon ) return false;
if ( FormalRegExpIteration < SymbolType > * leftLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getLeft ( ) ); leftLeftIte && leftLeftIte->getElement ( ) == leftCon->getRightElement ( ) ) {
node = std::move ( leftCon->getLeft ( ) ).clone ( );
delete n;
return true;
if ( FormalRegExpIteration < SymbolType > * leftRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getRight ( ) ); leftRightIte && leftRightIte->getElement ( ) == leftCon->getLeftElement ( ) ) {
node = std::move ( leftCon->getRight ( ) ).clone ( );
delete n;
return true;
}
}
return false;
}
/**
* optimization A11: x* = (\e + x)*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A11( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
if( ! n ) return false;
if ( FormalRegExpAlternation < SymbolType > * childAlt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getChild ( ) ); childAlt ) {
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getLeft ( ) ) ) {
n->setChild ( std::move ( childAlt->getRight ( ) ) );
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getRight ( ) ) ) {
n->setChild ( std::move ( childAlt->getLeft ( ) ) );
return true;
}
}
return false;
}
/**
* optimization V1: \0* = \e
* optimization T1: \e* = \e
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V1( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
if( ! n ) return false;
if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getChild ( ) ) ) {
delete n;
node = new FormalRegExpEpsilon < SymbolType > ( );
if ( dynamic_cast<FormalRegExpEpsilon < SymbolType > * > ( & n->getChild ( ) ) ) {
delete n;
node = new FormalRegExpEpsilon < SymbolType > ( );
return true;
}
return false;
}
/**
* optimization V2: x* + x = x*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V2( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
if( ! n ) return false;
if ( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getLeft ( ) ); leftIte && leftIte->getElement ( ) == n->getRightElement ( ) ) {
node = std::move ( n->getLeft ( ) ).clone ( );
delete n;
return true;
if ( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getRight ( ) ); rightIte && rightIte->getElement ( ) == n->getLeftElement ( ) ) {
node = std::move ( n->getRight ( ) ).clone ( );
delete n;
return true;
}
return false;
}
/**
* optimization V3: x** = x*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V3( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
if( ! n ) return false;
if( FormalRegExpIteration < SymbolType > * childIter = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getChild ( ) ); childIter ) {
n->setChild ( std::move ( childIter->getChild ( ) ) );
return true;
}
return false;
}
/**
* optimization V4: (x+y)* = (x*y*)*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V4( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpIteration < SymbolType > * n = dynamic_cast < FormalRegExpIteration < SymbolType > *>( node );
if( ! n ) return false;
if( FormalRegExpConcatenation < SymbolType > * child = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getChild ( ) ); child ) {
if( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getLeft ( ) ); leftIte ) {
if( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getRight ( ) ); rightIte ) {
n->setChild ( FormalRegExpAlternation < SymbolType >( std::move ( leftIte->getElement ( ) ), std::move ( rightIte->getElement ( ) ) ) );
}
}
}
return true;
}
/**
* optimization V5: x*y = y + x*xy
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V5( FormalRegExpElement < SymbolType > * & /* node */) {
return false; //TODO
}
/**
* optimization V6: x*y = y + xx*y
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V6( FormalRegExpElement < SymbolType > * & /* node */) {
return false; //TODO
}
/**
* optimization V8: \e in h(x) => xx*=x*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V8( FormalRegExpElement < SymbolType > * & /* node */) {
return false; //TODO
}
/**
* optimization V9: (xy)*x = x(yx)*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V9( FormalRegExpElement < SymbolType > * & /* node */) {
return false; //TODO
}
/**
* optimization V10: (x+y)* = (x*+y*)*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V10( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpIteration < SymbolType > * n = dynamic_cast < FormalRegExpIteration < SymbolType > *>( node );
if( ! n ) return false;
if( FormalRegExpAlternation < SymbolType > * alt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getChild ( ) ); alt ) {
if( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getLeft ( ) ); leftIte ) {
if( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getRight ( ) ); rightIte ) {
alt->setLeft ( std::move ( leftIte->getChild ( ) ) );
alt->setRight ( std::move ( rightIte->getChild ( ) ) );
}
}
}
return true;
}
/**
* optimization X1: a* + \e = a*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::X1( FormalRegExpElement < SymbolType > * & node ) {
FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
if( ! n ) return false;
if ( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getLeft ( ) ); leftIte && dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
node = std::move ( n->getLeft ( ) ).clone ( );
delete n;
return true;
if ( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getRight ( ) ); rightIte && dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
node = std::move ( n->getRight ( ) ).clone ( );
delete n;
return true;
} /* namespace regexp::simplify */