Skip to content
Snippets Groups Projects
RegExpOptimizeFormalPart.hpp 20.5 KiB
Newer Older
/*
 * RegExpOptimize.cpp
 *
 *  Created on: 20. 1. 2014
 *	  Author: Jan Travnicek
 */

namespace regexp {

namespace simplify {

template < class SymbolType >
void RegExpOptimize::optimize( FormalRegExpElement < SymbolType > & element ) {
	ext::smart_ptr < FormalRegExpElement < SymbolType > > optimized = optimizeInner ( element );

	FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( & element );
	if( alternation ) {
		FormalRegExpAlternation < SymbolType > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( optimized.get ( ) );
		if( alternationOptimized ) {
			* alternation = std::move( * alternationOptimized );
		} else {
			* alternation = FormalRegExpAlternation < SymbolType > { * optimized, FormalRegExpEmpty < SymbolType > { } };
		}
		return;
	}

	FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( & element );
	if( concatenation ) {
		FormalRegExpConcatenation < SymbolType > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < SymbolType > * > ( optimized.get ( ) );
		if( concatenationOptimized ) {
			* concatenation = std::move( * concatenationOptimized );
		} else {
			* concatenation = FormalRegExpConcatenation < SymbolType > { * optimized, FormalRegExpEpsilon < SymbolType > { } };
		}
		return;
	}

	FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType > *>( & element );
	if( iteration ) {
		FormalRegExpIteration < SymbolType > * iterationOptimized = dynamic_cast<FormalRegExpIteration < SymbolType > *>( optimized.get ( ) );
		if( iterationOptimized ) {
			* iteration = std::move( * iterationOptimized );
		} else {
			* iteration = FormalRegExpIteration < SymbolType > { optimized };
		}
		return;
	}

	// Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty
	return;
}

template < class SymbolType >
ext::smart_ptr < FormalRegExpElement < SymbolType > > RegExpOptimize::optimizeInner( const FormalRegExpElement < SymbolType > & node ) {
	FormalRegExpElement < SymbolType > * elem = node.clone();

	// optimize while you can
	while(    A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem )
	       || A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem )
	       || A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) );

	return ext::smart_ptr < FormalRegExpElement < SymbolType > > ( elem );
}

template < class SymbolType >
bool RegExpOptimize::S( FormalRegExpElement < SymbolType > * & node ) {
	bool optimized = false;
	FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType >*>( node );
	if( alternation ) {
		ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getLeftElement ( ) );
		if(* tmp != alternation->getLeftElement ( ) ) {
			optimized = true;
			alternation->setLeftElement ( * tmp );
		}

		tmp = optimizeInner ( alternation->getRightElement ( ) );
		if(* tmp != alternation->getRightElement ( ) ) {
			optimized = true;
			alternation->setRightElement ( * tmp );
		}

		return optimized;
	}

	FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType >*>( node );
	if( concatenation ) {
		ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getLeftElement() );
		if(* tmp != concatenation->getLeftElement ( ) ) {
			optimized = true;
			concatenation->setLeftElement ( * tmp );
		}

		tmp = optimizeInner ( concatenation->getRightElement ( ));
		if(* tmp != concatenation->getRightElement ( )) {
			optimized = true;
			concatenation->setRightElement ( * tmp );
		}

		return optimized;
	}

	FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node );
	if( iteration ) {
		ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( iteration->getElement() );

		if(* tmp != iteration->getElement ( ) ) {
			optimized = true;
			iteration->setElement ( * tmp );
		}
		return optimized;
	}

	return optimized;
}


/**
  * optimization A1: ( x + y ) + z = x + ( y + z )
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A1( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n );
	if( ! node ) return false;

	if( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getLeft ( ) ) ) {
		FormalRegExpAlternation < SymbolType > leftAlt ( std::move ( static_cast < FormalRegExpAlternation < SymbolType > & > ( node->getLeft ( ) ) ) );
		node->setLeft ( std::move ( leftAlt.getLeft ( ) ) );
		leftAlt.setLeft ( std::move ( leftAlt.getRight ( ) ) );
		leftAlt.setRight ( std::move ( node->getRight ( ) ) );
		node->setRight ( std::move ( leftAlt ) );

		return true;
	}

	return false;
}

/**
  * optimization A2: x + y = y + x (sort)
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A2 ( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n );
	if( ! node ) return false;

	if ( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getRight ( ) ) ) {
		FormalRegExpAlternation < SymbolType > & rightAlt = static_cast < FormalRegExpAlternation < SymbolType > & > ( node->getRight ( ) );
		if ( node->getLeft ( ) > rightAlt.getLeft ( ) ) {
			ext::ptr_value < FormalRegExpElement < SymbolType > > tmp ( std::move ( node->getLeft ( ) ) );
			node->setLeft ( std::move ( rightAlt.getLeft ( ) ) );
			rightAlt.setLeft ( std::move ( tmp ) );
			return true;
		} else {
			return false;
		}
	}

	return false;
}

/**
  * optimization A3: x + \0 = x
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A3 ( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpAlternation < SymbolType > * node = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( n );
	if( ! node ) return false;

	// input can be \0 + \0, so at least one element must be preserved

	if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getRight ( ) ) ) {
		n = std::move ( node->getLeft ( ) ).clone ( );
		delete node;
		return true;
	}

	if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getLeft ( ) ) ) {
		n = std::move ( node->getRight ( ) ).clone ( );
		delete node;
		return true;
	}

	return false;
}

/**
  * optimization A4: x + x = x
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A4( FormalRegExpElement < SymbolType > * & n ) {
	/*
	 * two ways of implementing this opitimization:
	 * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
	 * - check every element against other ( O(n*n) )
	 *
	 * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
	 */

	FormalRegExpAlternation < SymbolType > * node = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( n );
	if ( ! node ) return false;
	if ( node->getLeftElement() == node->getRightElement() ) {
		n = std::move ( node->getRight ( ) ).clone ( );
		delete node;
		return true;
	}

	return false;
}

/**
  * optimization A5: x.(y.z) = (x.y).z = x.y.z
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A5( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n );
	if( ! node ) return false;

	if( dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getLeft ( ) ) ) {
		FormalRegExpConcatenation < SymbolType > leftCon ( std::move ( static_cast < FormalRegExpConcatenation < SymbolType > & > ( node->getLeft ( ) ) ) );
		node->setLeft ( std::move ( leftCon.getLeft ( ) ) );
		leftCon.setLeft ( std::move ( leftCon.getRight ( ) ) );
		leftCon.setRight ( std::move ( node->getRight ( ) ) );
		node->setRight ( std::move ( leftCon ) );

		return true;
	}

	return false;
}

/**
  * optimization A6: \e.x = x.\e = x
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A6( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n );
	if( ! node ) return false;

	// input can be \e + \e, so at least one element must be preserved

	if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) {
		n = std::move ( node->getLeft ( ) ).clone ( );
		delete node;
		return true;
	}

	if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) {
		n = std::move ( node->getRight ( ) ).clone ( );
		delete node;
		return true;
	}

	return false;
}

/**
  * optimization A7: \0.x = x.\0 = \0
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A7( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n );
	if( ! node ) return false;

	if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getRight ( ) ) || dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getLeft ( ) ) ) {
		delete node;
		n = new FormalRegExpEmpty < SymbolType > { };
		return true;
	}

	return false;
}

/**
  * optimization A8: x.(y+z) = x.y + x.z
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A8( FormalRegExpElement < SymbolType > * & /* n */) {
	return false; //TODO
}

/**
  * optimization A9: (x+y).z = x.z + y.z
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A9( FormalRegExpElement < SymbolType > * & /* n */) {
	return false; //TODO
}

/**
  * optimization A10: x* = \e + x*x
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A10( FormalRegExpElement < SymbolType > * & n ) {
	/*
	 * problem:
	 * - \e + x*x = x*
	 * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node).
	 */

	FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( n );
	if ( ! node ) return false;
	if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) {
		FormalRegExpConcatenation < SymbolType > * rightCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getRight ( ) );
		if ( ! rightCon ) return false;
		FormalRegExpIteration < SymbolType > * rightLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getLeft ( ) );
		if ( rightLeftIte ) {
			if ( rightLeftIte->getElement ( ) == rightCon->getRightElement ( ) ) {
				n = std::move ( rightCon->getLeft ( ) ).clone ( );
				delete node;
				return true;
			}
		}

		FormalRegExpIteration < SymbolType > * rightRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getRight ( ) );
		if ( rightRightIte ) {
			if ( rightRightIte->getElement ( ) == rightCon->getLeftElement ( ) ) {
				n = std::move ( rightCon->getRight ( ) ).clone ( );
				delete node;
				return true;
			}
		}
	}

	if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) {
		FormalRegExpConcatenation < SymbolType > * leftCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getLeft ( ) );
		if ( ! leftCon ) return false;
		FormalRegExpIteration < SymbolType > * leftLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getLeft ( ) );
		if ( leftLeftIte ) {
			if ( leftLeftIte->getElement ( ) == leftCon->getRightElement ( ) ) {
				n = std::move ( leftCon->getLeft ( ) ).clone ( );
				delete node;
				return true;
			}
		}

		FormalRegExpIteration < SymbolType > * leftRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getRight ( ) );
		if ( leftRightIte ) {
			if ( leftRightIte->getElement ( ) == leftCon->getLeftElement ( ) ) {
				n = std::move ( leftCon->getRight ( ) ).clone ( );
				delete node;
				return true;
			}
		}
	}

	return false;
}

/**
  * optimization A11: x* = (\e + x)*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::A11( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n );
	if( ! node ) return false;

	FormalRegExpAlternation < SymbolType > * childAlt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getChild ( ) );
	if ( childAlt ) {
		if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getLeft ( ) ) ) {
			node->setChild ( std::move ( childAlt->getRight ( ) ) );
			return true;
		}
		if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getRight ( ) ) ) {
			node->setChild ( std::move ( childAlt->getLeft ( ) ) );
			return true;
		}
	}

	return false;
}

/**
  * optimization V1: \0* = \e
  * optimization T1: \e* = \e
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V1( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n );
	if( ! node ) return false;

	if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & node->getChild ( ) ) ) {
		delete node;
		n = new FormalRegExpEpsilon < SymbolType > ( );
		return true;
	}
	if ( dynamic_cast<FormalRegExpEpsilon < SymbolType > * > ( & node->getChild ( ) ) ) {
		delete node;
		n = new FormalRegExpEpsilon < SymbolType > ( );
		return true;
	}
	return false;
}

/**
  * optimization V2: x* + x = x*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V2( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n );
	if( ! node ) return false;

	FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getLeft ( ) );
	if ( leftIte ) {
		if ( leftIte->getElement ( ) == node->getRightElement ( ) ) {
			n = std::move ( node->getLeft ( ) ).clone ( );
			delete node;
			return true;
		}
	}

	FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getRight ( ) );
	if ( rightIte ) {
		if ( rightIte->getElement ( ) == node->getLeftElement ( ) ) {
			n = std::move ( node->getRight ( ) ).clone ( );
			delete node;
			return true;
		}
	}

	return false;
}

/**
  * optimization V3: x** = x*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V3( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n );
	if( ! node ) return false;

	FormalRegExpIteration < SymbolType > * childIter = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getChild ( ) );
	if( childIter ) {
		node->setChild ( std::move ( childIter->getChild ( ) ) );
		return true;
	}

	return false;
}

/**
  * optimization V4: (x+y)* = (x*y*)*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V4( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpIteration < SymbolType > * node = dynamic_cast < FormalRegExpIteration < SymbolType > *>( n );
	if( ! node ) return false;

	FormalRegExpConcatenation < SymbolType > * child = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & node->getChild ( ) );
	if( ! child ) return false;

	FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getLeft ( ) );
	if( ! leftIte ) return false;

	FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getRight ( ) );
	if( ! rightIte ) return false;

	node->setChild ( FormalRegExpAlternation < SymbolType >( std::move ( leftIte->getElement ( ) ), std::move ( rightIte->getElement ( ) ) ) );

	return true;
}

/**
  * optimization V5: x*y = y + x*xy
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V5( FormalRegExpElement < SymbolType > * & /* n */) {
	return false; //TODO
}

/**
  * optimization V6: x*y = y + xx*y
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V6( FormalRegExpElement < SymbolType > * & /* n */) {
	return false; //TODO
}

/**
  * optimization V8: \e in h(x) => xx*=x*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V8( FormalRegExpElement < SymbolType > * & /* n */) {
	return false; //TODO
}

/**
  * optimization V9: (xy)*x = x(yx)*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V9( FormalRegExpElement < SymbolType > * & /* n */) {
	return false; //TODO
}

/**
  * optimization V10: (x+y)* = (x*+y*)*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::V10( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpIteration < SymbolType > * node = dynamic_cast < FormalRegExpIteration < SymbolType > *>( n );
	if( ! node ) return false;

	FormalRegExpAlternation < SymbolType > * alt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & node->getChild ( ) );
	if( ! alt ) return false;

	FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getLeft ( ) );
	if( ! leftIte ) return false;

	FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getRight ( ) );
	if( ! rightIte ) return false;

	alt->setLeft ( std::move ( leftIte->getChild ( ) ) );
	alt->setRight ( std::move ( rightIte->getChild ( ) ) );

	return true;
}

/**
  * optimization X1: a* + \e = a*
  * @param node FormalRegExpElement < SymbolType > node
  * @return bool true if optimization applied else false
  */
template < class SymbolType >
bool RegExpOptimize::X1( FormalRegExpElement < SymbolType > * & n ) {
	FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n );
	if( ! node ) return false;

	FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getLeft ( ) );
	if ( leftIte ) {
		if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getRight ( ) ) ) {
			n = std::move ( node->getLeft ( ) ).clone ( );
			delete node;
			return true;
		}
	}

	FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & node->getRight ( ) );
	if ( rightIte ) {
		if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & node->getLeft ( ) ) ) {
			n = std::move ( node->getRight ( ) ).clone ( );
			delete node;
			return true;
		}
	}

	return false;

}

} /* namespace simplify */

} /* namespace regexp */