Newer
Older
/*
* RegExpOptimize.cpp
*
* Created on: 20. 1. 2014
* Author: Jan Travnicek
*/
namespace regexp {
namespace simplify {
template < class SymbolType >
void RegExpOptimize::optimize( FormalRegExpElement < SymbolType > & element ) {
ext::smart_ptr < FormalRegExpElement < SymbolType > > optimized = optimizeInner ( element );
FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( & element );
if( alternation ) {
FormalRegExpAlternation < SymbolType > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( optimized.get ( ) );
if( alternationOptimized ) {
* alternation = std::move( * alternationOptimized );
} else {
* alternation = FormalRegExpAlternation < SymbolType > { * optimized, FormalRegExpEmpty < SymbolType > { } };
}
return;
}
FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( & element );
if( concatenation ) {
FormalRegExpConcatenation < SymbolType > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < SymbolType > * > ( optimized.get ( ) );
if( concatenationOptimized ) {
* concatenation = std::move( * concatenationOptimized );
} else {
* concatenation = FormalRegExpConcatenation < SymbolType > { * optimized, FormalRegExpEpsilon < SymbolType > { } };
}
return;
}
FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType > *>( & element );
if( iteration ) {
FormalRegExpIteration < SymbolType > * iterationOptimized = dynamic_cast<FormalRegExpIteration < SymbolType > *>( optimized.get ( ) );
if( iterationOptimized ) {
* iteration = std::move( * iterationOptimized );
} else {
* iteration = FormalRegExpIteration < SymbolType > { optimized };
}
return;
}
// Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty
return;
}
template < class SymbolType >
ext::smart_ptr < FormalRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const FormalRegExpElement < SymbolType > & node ) {
FormalRegExpElement < SymbolType > * elem = node.clone();
// optimize while you can
while( A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem )
|| A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem )
|| A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) );
return ext::smart_ptr < FormalRegExpElement < SymbolType > > ( elem );
}
template < class SymbolType >
bool RegExpOptimize::S( FormalRegExpElement < SymbolType > * & node ) {
bool optimized = false;
FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType >*>( node );
if( alternation ) {
ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getLeftElement ( ) );
if(* tmp != alternation->getLeftElement ( ) ) {
optimized = true;
}
tmp = optimizeInner ( alternation->getRightElement ( ) );
if(* tmp != alternation->getRightElement ( ) ) {
optimized = true;
}
return optimized;
}
FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType >*>( node );
if( concatenation ) {
ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getLeftElement() );
if(* tmp != concatenation->getLeftElement ( ) ) {
optimized = true;
}
tmp = optimizeInner ( concatenation->getRightElement ( ));
if(* tmp != concatenation->getRightElement ( )) {
optimized = true;
concatenation->setRightElement ( * tmp );
}
return optimized;
}
FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node );
if( iteration ) {
ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( iteration->getElement() );
if(* tmp != iteration->getElement ( ) ) {
optimized = true;
}
return optimized;
}
return optimized;
}
/**
* optimization A1: ( x + y ) + z = x + ( y + z )
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A1( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n );
if( ! node ) return false;
if( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getLeft ( ) ) ) {
FormalRegExpAlternation < SymbolType > leftAlt ( std::move ( static_cast < FormalRegExpAlternation < SymbolType > & > ( node->getLeft ( ) ) ) );
node->setLeft ( std::move ( leftAlt.getLeft ( ) ) );
leftAlt.setLeft ( std::move ( leftAlt.getRight ( ) ) );
leftAlt.setRight ( std::move ( node->getRight ( ) ) );
node->setRight ( std::move ( leftAlt ) );
return true;
}
return false;
}
/**
* optimization A2: x + y = y + x (sort)
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A2 ( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n );
if( ! node ) return false;
if ( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getRight ( ) ) ) {
FormalRegExpAlternation < SymbolType > & rightAlt = static_cast < FormalRegExpAlternation < SymbolType > & > ( node->getRight ( ) );
if ( node->getLeft ( ) > rightAlt.getLeft ( ) ) {
ext::ptr_value < FormalRegExpElement < SymbolType > > tmp ( std::move ( node->getLeft ( ) ) );
node->setLeft ( std::move ( rightAlt.getLeft ( ) ) );
rightAlt.setLeft ( std::move ( tmp ) );
return true;
} else {
return false;
}
}
return false;
}
/**
* optimization A3: x + \0 = x
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A3 ( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpAlternation < SymbolType > * node = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( n );
if( ! node ) return false;
// input can be \0 + \0, so at least one element must be preserved
if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getRight ( ) ) ) {
n = std::move ( node->getLeft ( ) ).clone ( );
if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getLeft ( ) ) ) {
n = std::move ( node->getRight ( ) ).clone ( );
delete node;
return true;
}
return false;
}
/**
* optimization A4: x + x = x
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A4( FormalRegExpElement < SymbolType > * & n ) {
/*
* two ways of implementing this opitimization:
* - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
* - check every element against other ( O(n*n) )
*
* As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
*/
FormalRegExpAlternation < SymbolType > * node = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( n );
if ( ! node ) return false;
if ( node->getLeftElement() == node->getRightElement() ) {
n = std::move ( node->getRight ( ) ).clone ( );
delete node;
return true;
}
return false;
}
/**
* optimization A5: x.(y.z) = (x.y).z = x.y.z
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A5( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n );
if( ! node ) return false;
if( dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getLeft ( ) ) ) {
FormalRegExpConcatenation < SymbolType > leftCon ( std::move ( static_cast < FormalRegExpConcatenation < SymbolType > & > ( node->getLeft ( ) ) ) );
node->setLeft ( std::move ( leftCon.getLeft ( ) ) );
leftCon.setLeft ( std::move ( leftCon.getRight ( ) ) );
leftCon.setRight ( std::move ( node->getRight ( ) ) );
node->setRight ( std::move ( leftCon ) );
return true;
}
return false;
}
/**
* optimization A6: \e.x = x.\e = x
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A6( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n );
if( ! node ) return false;
// input can be \e + \e, so at least one element must be preserved
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) {
n = std::move ( node->getLeft ( ) ).clone ( );
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) {
n = std::move ( node->getRight ( ) ).clone ( );
delete node;
return true;
}
return false;
}
/**
* optimization A7: \0.x = x.\0 = \0
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A7( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n );
if( ! node ) return false;
if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getRight ( ) ) || dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getLeft ( ) ) ) {
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
delete node;
n = new FormalRegExpEmpty < SymbolType > { };
return true;
}
return false;
}
/**
* optimization A8: x.(y+z) = x.y + x.z
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A8( FormalRegExpElement < SymbolType > * & /* n */) {
return false; //TODO
}
/**
* optimization A9: (x+y).z = x.z + y.z
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A9( FormalRegExpElement < SymbolType > * & /* n */) {
return false; //TODO
}
/**
* optimization A10: x* = \e + x*x
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A10( FormalRegExpElement < SymbolType > * & n ) {
/*
* problem:
* - \e + x*x = x*
* - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node).
*/
FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( n );
if ( ! node ) return false;
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) {
FormalRegExpConcatenation < SymbolType > * rightCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getRight ( ) );
if ( ! rightCon ) return false;
FormalRegExpIteration < SymbolType > * rightLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getLeft ( ) );
if ( rightLeftIte ) {
if ( rightLeftIte->getElement ( ) == rightCon->getRightElement ( ) ) {
n = std::move ( rightCon->getLeft ( ) ).clone ( );
FormalRegExpIteration < SymbolType > * rightRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getRight ( ) );
if ( rightRightIte ) {
if ( rightRightIte->getElement ( ) == rightCon->getLeftElement ( ) ) {
n = std::move ( rightCon->getRight ( ) ).clone ( );
delete node;
return true;
}
}
}
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) {
FormalRegExpConcatenation < SymbolType > * leftCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getLeft ( ) );
if ( ! leftCon ) return false;
FormalRegExpIteration < SymbolType > * leftLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getLeft ( ) );
if ( leftLeftIte ) {
if ( leftLeftIte->getElement ( ) == leftCon->getRightElement ( ) ) {
n = std::move ( leftCon->getLeft ( ) ).clone ( );
FormalRegExpIteration < SymbolType > * leftRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getRight ( ) );
if ( leftRightIte ) {
if ( leftRightIte->getElement ( ) == leftCon->getLeftElement ( ) ) {
n = std::move ( leftCon->getRight ( ) ).clone ( );
delete node;
return true;
}
}
}
return false;
}
/**
* optimization A11: x* = (\e + x)*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::A11( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n );
if( ! node ) return false;
FormalRegExpAlternation < SymbolType > * childAlt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getChild ( ) );
if ( childAlt ) {
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getLeft ( ) ) ) {
node->setChild ( std::move ( childAlt->getRight ( ) ) );
return true;
}
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getRight ( ) ) ) {
node->setChild ( std::move ( childAlt->getLeft ( ) ) );
return true;
}
}
return false;
}
/**
* optimization V1: \0* = \e
* optimization T1: \e* = \e
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V1( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n );
if( ! node ) return false;
if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getChild ( ) ) ) {
delete node;
n = new FormalRegExpEpsilon < SymbolType > ( );
return true;
}
if ( dynamic_cast<FormalRegExpEpsilon < SymbolType > * > ( & node->getChild ( ) ) ) {
delete node;
n = new FormalRegExpEpsilon < SymbolType > ( );
return true;
}
return false;
}
/**
* optimization V2: x* + x = x*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V2( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n );
if( ! node ) return false;
FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getLeft ( ) );
if ( leftIte ) {
if ( leftIte->getElement ( ) == node->getRightElement ( ) ) {
n = std::move ( node->getLeft ( ) ).clone ( );
FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getRight ( ) );
if ( rightIte ) {
if ( rightIte->getElement ( ) == node->getLeftElement ( ) ) {
n = std::move ( node->getRight ( ) ).clone ( );
delete node;
return true;
}
}
return false;
}
/**
* optimization V3: x** = x*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V3( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n );
if( ! node ) return false;
FormalRegExpIteration < SymbolType > * childIter = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getChild ( ) );
if( childIter ) {
node->setChild ( std::move ( childIter->getChild ( ) ) );
return true;
}
return false;
}
/**
* optimization V4: (x+y)* = (x*y*)*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V4( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpIteration < SymbolType > * node = dynamic_cast < FormalRegExpIteration < SymbolType > *>( n );
FormalRegExpConcatenation < SymbolType > * child = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getChild ( ) );
FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getLeft ( ) );
FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getRight ( ) );
node->setChild ( FormalRegExpAlternation < SymbolType >( std::move ( leftIte->getElement ( ) ), std::move ( rightIte->getElement ( ) ) ) );
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
return true;
}
/**
* optimization V5: x*y = y + x*xy
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V5( FormalRegExpElement < SymbolType > * & /* n */) {
return false; //TODO
}
/**
* optimization V6: x*y = y + xx*y
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V6( FormalRegExpElement < SymbolType > * & /* n */) {
return false; //TODO
}
/**
* optimization V8: \e in h(x) => xx*=x*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V8( FormalRegExpElement < SymbolType > * & /* n */) {
return false; //TODO
}
/**
* optimization V9: (xy)*x = x(yx)*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V9( FormalRegExpElement < SymbolType > * & /* n */) {
return false; //TODO
}
/**
* optimization V10: (x+y)* = (x*+y*)*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::V10( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpIteration < SymbolType > * node = dynamic_cast < FormalRegExpIteration < SymbolType > *>( n );
FormalRegExpAlternation < SymbolType > * alt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getChild ( ) );
if( ! alt ) return false;
FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getLeft ( ) );
FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getRight ( ) );
alt->setLeft ( std::move ( leftIte->getChild ( ) ) );
alt->setRight ( std::move ( rightIte->getChild ( ) ) );
return true;
}
/**
* optimization X1: a* + \e = a*
* @param node FormalRegExpElement < SymbolType > node
* @return bool true if optimization applied else false
*/
template < class SymbolType >
bool RegExpOptimize::X1( FormalRegExpElement < SymbolType > * & n ) {
FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n );
if( ! node ) return false;
FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getLeft ( ) );
if ( leftIte ) {
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) {
n = std::move ( node->getLeft ( ) ).clone ( );
FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getRight ( ) );
if ( rightIte ) {
if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) {
n = std::move ( node->getRight ( ) ).clone ( );