/* * RegExpOptimize.cpp * * Created on: 20. 1. 2014 * Author: Jan Travnicek */ namespace regexp { namespace simplify { template < class SymbolType > void RegExpOptimize::optimize( FormalRegExpElement < SymbolType > & element ) { FormalRegExpElement < SymbolType >* optimized = optimizeInner( element ); FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( & element ); if( alternation ) { FormalRegExpAlternation < SymbolType > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( optimized ); if( alternationOptimized ) { * alternation = std::move( * alternationOptimized ); delete alternationOptimized; } else { * alternation = FormalRegExpAlternation < SymbolType > { ext::manage_move ( optimized ), FormalRegExpEmpty < SymbolType > { } }; } return; } FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( & element ); if( concatenation ) { FormalRegExpConcatenation < SymbolType > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( optimized ); if( concatenationOptimized ) { * concatenation = std::move( * concatenationOptimized ); delete concatenationOptimized; } else { * concatenation = FormalRegExpConcatenation < SymbolType > { ext::manage_move ( optimized ), FormalRegExpEpsilon < SymbolType > { } }; } return; } FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType > *>( & element ); if( iteration ) { FormalRegExpIteration < SymbolType > * iterationOptimized = dynamic_cast<FormalRegExpIteration < SymbolType > *>( optimized ); if( iterationOptimized ) { * iteration = std::move( * iterationOptimized ); delete iterationOptimized; } else { * iteration = FormalRegExpIteration < SymbolType > { ext::manage_move ( optimized ) }; } return; } // Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty return; } template < class SymbolType > FormalRegExpElement < SymbolType >* RegExpOptimize::optimizeInner( const FormalRegExpElement < SymbolType > & node ) { FormalRegExpElement < SymbolType >* elem = node.clone(); // optimize while you can while( A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem ) || A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem ) || A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) ); return elem; } template < class SymbolType > bool RegExpOptimize::S( FormalRegExpElement < SymbolType > * & node ) { bool optimized = false; FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType >*>( node ); if( alternation ) { FormalRegExpElement < SymbolType > * tmp = optimizeInner ( alternation->getLeftElement ( ) ); if(* tmp != alternation->getLeftElement ( ) ) { optimized = true; alternation->setLeftElement ( * ext::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); } tmp = optimizeInner ( alternation->getRightElement ( ) ); if(* tmp != alternation->getRightElement ( ) ) { optimized = true; alternation->setRightElement ( * ext::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); } return optimized; } FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType >*>( node ); if( concatenation ) { FormalRegExpElement < SymbolType >* tmp = optimizeInner ( concatenation->getLeftElement() ); if(* tmp != concatenation->getLeftElement ( ) ) { optimized = true; concatenation->setLeftElement ( * ext::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); } tmp = optimizeInner ( concatenation->getRightElement ( )); if(* tmp != concatenation->getRightElement ( )) { optimized = true; concatenation->setRightElement ( * ext::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); } return optimized; } FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node ); if( iteration ) { FormalRegExpElement < SymbolType >* tmp = optimizeInner ( iteration->getElement() ); if(* tmp != iteration->getElement ( ) ) { optimized = true; iteration->setElement ( * ext::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); } return optimized; } return optimized; } /** * optimization A1: ( x + y ) + z = x + ( y + z ) * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A1( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; if( dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node->getLeft().get() ) ) { ext::smart_ptr < FormalRegExpAlternation < SymbolType > > leftAlt ( static_cast<FormalRegExpAlternation < SymbolType > *>( node->getLeft().release() ) ); ext::smart_ptr < FormalRegExpElement < SymbolType > > x = std::move ( leftAlt->getLeft() ); ext::smart_ptr < FormalRegExpElement < SymbolType > > y = std::move ( leftAlt->getRight() ); ext::smart_ptr < FormalRegExpElement < SymbolType > > z = std::move ( node->getRight() ); leftAlt->setLeft ( std::move ( y ) ); leftAlt->setRight ( std::move ( z ) ); node->setLeft ( std::move ( x ) ); node->setRight ( std::move ( leftAlt ) ); return true; } return false; } /** * optimization A2: x + y = y + x (sort) * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A2( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; if( dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node->getRight().get() ) ) { FormalRegExpAlternation < SymbolType > * rightAlt = static_cast < FormalRegExpAlternation < SymbolType > * > ( node->getRight ( ).get ( ) ); ext::smart_ptr < FormalRegExpElement < SymbolType > > x = std::move ( node->getLeft ( ) ); ext::smart_ptr < FormalRegExpElement < SymbolType > > y = std::move ( rightAlt->getLeft ( ) ); if(*x > *y) { node->setLeft ( std::move ( y ) ); rightAlt->setLeft ( std::move ( x ) ); return true; } else { node->setLeft ( std::move ( x ) ); rightAlt->setLeft ( std::move ( y ) ); return false; } } return false; } /** * optimization A3: x + \0 = x * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A3( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; // input can be \0 + \0, so at least one element must be preserved if( dynamic_cast<FormalRegExpEmpty < SymbolType > *>( node->getRight().get() ) ) { n = node->getLeft().release(); delete node; return true; } if( dynamic_cast<FormalRegExpEmpty < SymbolType > *>( node->getLeft().get() ) ) { n = node->getRight().release(); delete node; return true; } return false; } /** * optimization A4: x + x = x * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A4( FormalRegExpElement < SymbolType > * & n ) { /* * two ways of implementing this opitimization: * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts... * - check every element against other ( O(n*n) ) * * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2 */ FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; if( node->getLeftElement() == node->getRightElement() ) { n = node->getRight().release(); delete node; return true; } return false; } /** * optimization A5: x.(y.z) = (x.y).z = x.y.z * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A5( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); if( ! node ) return false; if( dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node->getLeft().get() ) ) { ext::smart_ptr < FormalRegExpConcatenation < SymbolType > > leftCon ( static_cast<FormalRegExpConcatenation < SymbolType > *>( node->getLeft().release() ) ); ext::smart_ptr < FormalRegExpElement < SymbolType > > x = std::move ( leftCon->getLeft() ); ext::smart_ptr < FormalRegExpElement < SymbolType > > y = std::move ( leftCon->getRight() ); ext::smart_ptr < FormalRegExpElement < SymbolType > > z = std::move ( node->getRight() ); leftCon->setLeft ( std::move ( y ) ); leftCon->setRight ( std::move ( z ) ); node->setLeft ( std::move ( x ) ); node->setRight ( std::move ( leftCon ) ); return true; } return false; } /** * optimization A6: \e.x = x.\e = x * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A6( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); if( ! node ) return false; // input can be \e + \e, so at least one element must be preserved if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getRight().get() ) ) { n = node->getLeft().release(); delete node; return true; } if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getLeft().get() ) ) { n = node->getRight().release(); delete node; return true; } return false; } /** * optimization A7: \0.x = x.\0 = \0 * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A7( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); if( ! node ) return false; if( dynamic_cast<FormalRegExpEmpty < SymbolType > *>( node->getRight().get() ) || dynamic_cast<FormalRegExpEmpty < SymbolType > *>( node->getLeft().get() ) ) { delete node; n = new FormalRegExpEmpty < SymbolType > { }; return true; } return false; } /** * optimization A8: x.(y+z) = x.y + x.z * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A8( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization A9: (x+y).z = x.z + y.z * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A9( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization A10: x* = \e + x*x * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A10( FormalRegExpElement < SymbolType > * & n ) { /* * problem: * - \e + x*x = x* * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node). */ FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getLeft().get() ) ) { FormalRegExpConcatenation < SymbolType > * rightCon = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node->getRight().get() ); if( ! rightCon ) return false; FormalRegExpIteration < SymbolType > * rightLeftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( rightCon->getLeft().get() ); if( rightLeftIte ) { if(rightLeftIte->getElement() == rightCon->getRightElement()) { n = rightCon->getLeft().release(); delete node; return true; } } FormalRegExpIteration < SymbolType > * rightRightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( rightCon->getRight().get() ); if( rightRightIte ) { if(rightRightIte->getElement() == rightCon->getLeftElement()) { n = rightCon->getRight().release(); delete node; return true; } } } if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getRight().get() ) ) { FormalRegExpConcatenation < SymbolType > * leftCon = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node->getLeft().get() ); if( ! leftCon ) return false; FormalRegExpIteration < SymbolType > * leftLeftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( leftCon->getLeft().get() ); if( leftLeftIte ) { if(leftLeftIte->getElement() == leftCon->getRightElement()) { n = leftCon->getLeft().release(); delete node; return true; } } FormalRegExpIteration < SymbolType > * leftRightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( leftCon->getRight().get() ); if( leftRightIte ) { if(leftRightIte->getElement() == leftCon->getLeftElement()) { n = leftCon->getRight().release(); delete node; return true; } } } return false; } /** * optimization A11: x* = (\e + x)* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::A11( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpAlternation < SymbolType > * childAlt = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node->getChild().get() ); if( childAlt ) { if( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( childAlt->getLeft ( ).get ( ) ) ) { node->setChild ( std::move ( childAlt->getRight ( ) ) ); return true; } if( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( childAlt->getRight ( ).get ( ) ) ) { node->setChild ( std::move ( childAlt->getLeft ( ) ) ); return true; } } return false; } /** * optimization V1: \0* = \e * optimization T1: \e* = \e * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V1( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; if( dynamic_cast<FormalRegExpEmpty< SymbolType > *>( node->getChild ( ).get() ) ) { delete node; n = new FormalRegExpEpsilon < SymbolType > ( ); return true; } if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getChild ( ).get() ) ) { delete node; n = new FormalRegExpEpsilon < SymbolType > ( ); return true; } return false; } /** * optimization V2: x* + x = x* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V2( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getLeft().get() ); if( leftIte ) { if(leftIte->getElement() == node->getRightElement()) { n = node->getLeft().release(); delete node; return true; } } FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getRight().get() ); if( rightIte ) { if(rightIte->getElement() == node->getLeftElement()) { n = node->getRight().release(); delete node; return true; } } return false; } /** * optimization V3: x** = x* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V3( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpIteration < SymbolType >* childIter = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node->getChild().get() ); if( childIter ) { node->setChild ( std::move ( childIter->getChild ( ) ) ); return true; } return false; } /** * optimization V4: (x+y)* = (x*y*)* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V4( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpConcatenation < SymbolType > * child = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node->getChild().get() ); if( ! child ) return false; FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( child->getLeft().get() ); if( ! leftIte ) return false; FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( child->getRight().get() ); if( ! rightIte ) return false; n = new FormalRegExpIteration < SymbolType >( FormalRegExpAlternation < SymbolType >(std::move( leftIte->getElement ( ) ), std::move( rightIte->getElement ( ) ) ) ); delete node; return true; } /** * optimization V5: x*y = y + x*xy * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V5( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization V6: x*y = y + xx*y * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V6( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization V8: \e in h(x) => xx*=x* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V8( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization V9: (xy)*x = x(yx)* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V9( FormalRegExpElement < SymbolType > * & /* n */) { return false; //TODO } /** * optimization V10: (x+y)* = (x*+y*)* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::V10( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getLeft().get() ); if( ! leftIte ) return false; FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getRight().get() ); if( ! rightIte ) return false; n = new FormalRegExpConcatenation < SymbolType >(std::move( leftIte->getElement() ), std::move( rightIte->getElement() ) ); delete node; return true; } /** * optimization X1: a* + \e = a* * @param node FormalRegExpElement < SymbolType > node * @return bool true if optimization applied else false */ template < class SymbolType > bool RegExpOptimize::X1( FormalRegExpElement < SymbolType > * & n ) { FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); if( ! node ) return false; FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getLeft().get() ); if( leftIte ) { if(dynamic_cast<FormalRegExpEpsilon < SymbolType > *>(node->getRight().get())) { n = node->getLeft().release(); delete node; return true; } } FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getRight().get() ); if( rightIte ) { if(dynamic_cast<FormalRegExpEpsilon < SymbolType > *>(node->getLeft().get())) { n = node->getRight().release(); delete node; return true; } } return false; } } /* namespace simplify */ } /* namespace regexp */