/* * RegExpOptimize.cpp * * Created on: 20. 1. 2014 * Author: Jan Travnicek */ namespace regexp { namespace simplify { template < class SymbolType > void RegExpOptimize::optimize( FormalRegExpElement < SymbolType > & element ) { ext::smart_ptr < FormalRegExpElement < SymbolType > > optimized = optimizeInner ( element ); FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( & element ); if( alternation ) { FormalRegExpAlternation < SymbolType > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( optimized.get ( ) ); if( alternationOptimized ) { * alternation = std::move( * alternationOptimized ); } else { * alternation = FormalRegExpAlternation < SymbolType > { * optimized, FormalRegExpEmpty < SymbolType > { } }; } return; } FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( & element ); if( concatenation ) { FormalRegExpConcatenation < SymbolType > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < SymbolType > * > ( optimized.get ( ) ); if( concatenationOptimized ) { * concatenation = std::move( * concatenationOptimized ); } else { * concatenation = FormalRegExpConcatenation < SymbolType > { * optimized, FormalRegExpEpsilon < SymbolType > { } }; } return; } FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType > *>( & element ); if( iteration ) { FormalRegExpIteration < SymbolType > * iterationOptimized = dynamic_cast<FormalRegExpIteration < SymbolType > *>( optimized.get ( ) ); if( iterationOptimized ) { * iteration = std::move( * iterationOptimized ); } else { * iteration = FormalRegExpIteration < SymbolType > { optimized }; } return; } // Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty return; } template < class SymbolType > ext::smart_ptr < FormalRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const FormalRegExpElement < SymbolType > & node ) { FormalRegExpElement < SymbolType > * elem = node.clone(); // optimize while you can while( A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem ) || A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem ) || A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) ); return ext::smart_ptr < FormalRegExpElement < SymbolType > > ( elem ); } template < class SymbolType > bool RegExpOptimize::S( FormalRegExpElement < SymbolType > * & node ) { bool optimized = false; FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType >*>( node ); if( alternation ) { ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getLeftElement ( ) ); if(* tmp != alternation->getLeftElement ( ) ) { optimized = true; alternation->setLeftElement ( * tmp ); } tmp = optimizeInner ( alternation->getRightElement ( ) ); if(* tmp != alternation->getRightElement ( ) ) { optimized = true; alternation->setRightElement ( * tmp ); } return optimized; } FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType >*>( node ); if( concatenation ) { ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getLeftElement() ); if(* tmp != concatenation->getLeftElement ( ) ) { optimized = true; concatenation->setLeftElement ( * tmp ); } tmp = optimizeInner ( concatenation->getRightElement ( )); if(* tmp != concatenation->getRightElement ( )) { optimized = true; concatenation->setRightElement ( * tmp ); } return optimized; } FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node ); if( iteration ) { ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( iteration->getElement() ); if(* tmp != iteration->getElement ( ) ) { optimized = true; iteration->setElement ( * tmp ); } return optimized; } return optimized; } /** * optimization A1: ( x + y ) + z = x + ( y + z ) * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A1( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; if( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getLeft ( ) ) ) { FormalRegExpAlternation < SymbolType > leftAlt ( std::move ( static_cast < FormalRegExpAlternation < SymbolType > & > ( node->getLeft ( ) ) ) ); node->setLeft ( std::move ( leftAlt.getLeft ( ) ) ); leftAlt.setLeft ( std::move ( leftAlt.getRight ( ) ) ); leftAlt.setRight ( std::move ( node->getRight ( ) ) ); node->setRight ( std::move ( leftAlt ) ); return true; } return false; } /** * optimization A2: x + y = y + x (sort) * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A2 ( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; if ( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getRight ( ) ) ) { FormalRegExpAlternation < SymbolType > & rightAlt = static_cast < FormalRegExpAlternation < SymbolType > & > ( node->getRight ( ) ); if ( node->getLeft ( ) > rightAlt.getLeft ( ) ) { ext::ptr_value < FormalRegExpElement < SymbolType > > tmp ( std::move ( node->getLeft ( ) ) ); node->setLeft ( std::move ( rightAlt.getLeft ( ) ) ); rightAlt.setLeft ( std::move ( tmp ) ); return true; } else { return false; } } return false; } /** * optimization A3: x + \0 = x * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A3 ( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( n ); if( ! node ) return false; // input can be \0 + \0, so at least one element must be preserved if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getRight ( ) ) ) { n = std::move ( node->getLeft ( ) ).clone ( ); delete node; return true; } if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getLeft ( ) ) ) { n = std::move ( node->getRight ( ) ).clone ( ); delete node; return true; } return false; } /** * optimization A4: x + x = x * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A4( FormalRegExpElement < SymbolType > * & n ) { /* * two ways of implementing this opitimization: * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts... * - check every element against other ( O(n*n) ) * * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2 */ FormalRegExpAlternation < SymbolType > * node = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( n ); if ( ! node ) return false; if ( node->getLeftElement() == node->getRightElement() ) { n = std::move ( node->getRight ( ) ).clone ( ); delete node; return true; } return false; } /** * optimization A5: x.(y.z) = (x.y).z = x.y.z * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A5( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); if( ! node ) return false; if( dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getLeft ( ) ) ) { FormalRegExpConcatenation < SymbolType > leftCon ( std::move ( static_cast < FormalRegExpConcatenation < SymbolType > & > ( node->getLeft ( ) ) ) ); node->setLeft ( std::move ( leftCon.getLeft ( ) ) ); leftCon.setLeft ( std::move ( leftCon.getRight ( ) ) ); leftCon.setRight ( std::move ( node->getRight ( ) ) ); node->setRight ( std::move ( leftCon ) ); return true; } return false; } /** * optimization A6: \e.x = x.\e = x * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A6( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); if( ! node ) return false; // input can be \e + \e, so at least one element must be preserved if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) { n = std::move ( node->getLeft ( ) ).clone ( ); delete node; return true; } if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) { n = std::move ( node->getRight ( ) ).clone ( ); delete node; return true; } return false; } /** * optimization A7: \0.x = x.\0 = \0 * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A7( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); if( ! node ) return false; if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getRight ( ) ) || dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getLeft ( ) ) ) { delete node; n = new FormalRegExpEmpty < SymbolType > { }; return true; } return false; } /** * optimization A8: x.(y+z) = x.y + x.z * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A8( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization A9: (x+y).z = x.z + y.z * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A9( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization A10: x* = \e + x*x * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A10( FormalRegExpElement < SymbolType > * & n ) { /* * problem: * - \e + x*x = x* * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node). */ FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( n ); if ( ! node ) return false; if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) { FormalRegExpConcatenation < SymbolType > * rightCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getRight ( ) ); if ( ! rightCon ) return false; FormalRegExpIteration < SymbolType > * rightLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getLeft ( ) ); if ( rightLeftIte ) { if ( rightLeftIte->getElement ( ) == rightCon->getRightElement ( ) ) { n = std::move ( rightCon->getLeft ( ) ).clone ( ); delete node; return true; } } FormalRegExpIteration < SymbolType > * rightRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getRight ( ) ); if ( rightRightIte ) { if ( rightRightIte->getElement ( ) == rightCon->getLeftElement ( ) ) { n = std::move ( rightCon->getRight ( ) ).clone ( ); delete node; return true; } } } if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) { FormalRegExpConcatenation < SymbolType > * leftCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getLeft ( ) ); if ( ! leftCon ) return false; FormalRegExpIteration < SymbolType > * leftLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getLeft ( ) ); if ( leftLeftIte ) { if ( leftLeftIte->getElement ( ) == leftCon->getRightElement ( ) ) { n = std::move ( leftCon->getLeft ( ) ).clone ( ); delete node; return true; } } FormalRegExpIteration < SymbolType > * leftRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getRight ( ) ); if ( leftRightIte ) { if ( leftRightIte->getElement ( ) == leftCon->getLeftElement ( ) ) { n = std::move ( leftCon->getRight ( ) ).clone ( ); delete node; return true; } } } return false; } /** * optimization A11: x* = (\e + x)* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A11( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpAlternation < SymbolType > * childAlt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getChild ( ) ); if ( childAlt ) { if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getLeft ( ) ) ) { node->setChild ( std::move ( childAlt->getRight ( ) ) ); return true; } if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getRight ( ) ) ) { node->setChild ( std::move ( childAlt->getLeft ( ) ) ); return true; } } return false; } /** * optimization V1: \0* = \e * optimization T1: \e* = \e * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V1( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getChild ( ) ) ) { delete node; n = new FormalRegExpEpsilon < SymbolType > ( ); return true; } if ( dynamic_cast<FormalRegExpEpsilon < SymbolType > * > ( & node->getChild ( ) ) ) { delete node; n = new FormalRegExpEpsilon < SymbolType > ( ); return true; } return false; } /** * optimization V2: x* + x = x* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V2( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getLeft ( ) ); if ( leftIte ) { if ( leftIte->getElement ( ) == node->getRightElement ( ) ) { n = std::move ( node->getLeft ( ) ).clone ( ); delete node; return true; } } FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getRight ( ) ); if ( rightIte ) { if ( rightIte->getElement ( ) == node->getLeftElement ( ) ) { n = std::move ( node->getRight ( ) ).clone ( ); delete node; return true; } } return false; } /** * optimization V3: x** = x* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V3( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpIteration < SymbolType > * childIter = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getChild ( ) ); if( childIter ) { node->setChild ( std::move ( childIter->getChild ( ) ) ); return true; } return false; } /** * optimization V4: (x+y)* = (x*y*)* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V4( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast < FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpConcatenation < SymbolType > * child = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getChild ( ) ); if( ! child ) return false; FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getLeft ( ) ); if( ! leftIte ) return false; FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getRight ( ) ); if( ! rightIte ) return false; node->setChild ( FormalRegExpAlternation < SymbolType >( std::move ( leftIte->getElement ( ) ), std::move ( rightIte->getElement ( ) ) ) ); return true; } /** * optimization V5: x*y = y + x*xy * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V5( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization V6: x*y = y + xx*y * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V6( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization V8: \e in h(x) => xx*=x* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V8( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization V9: (xy)*x = x(yx)* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V9( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization V10: (x+y)* = (x*+y*)* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V10( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast < FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpAlternation < SymbolType > * alt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getChild ( ) ); if( ! alt ) return false; FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getLeft ( ) ); if( ! leftIte ) return false; FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getRight ( ) ); if( ! rightIte ) return false; alt->setLeft ( std::move ( leftIte->getChild ( ) ) ); alt->setRight ( std::move ( rightIte->getChild ( ) ) ); return true; } /** * optimization X1: a* + \e = a* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::X1( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getLeft ( ) ); if ( leftIte ) { if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) { n = std::move ( node->getLeft ( ) ).clone ( ); delete node; return true; } } FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getRight ( ) ); if ( rightIte ) { if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) { n = std::move ( node->getRight ( ) ).clone ( ); delete node; return true; } } return false; } } /* namespace simplify */ } /* namespace regexp */