From c36708af233c068ced1d704ea7a638539e89d273 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Sun, 4 Dec 2016 19:55:24 +0100 Subject: [PATCH] template regexp optimize --- .../src/regexp/simplify/RegExpOptimize.cpp | 31 - .../src/regexp/simplify/RegExpOptimize.h | 220 +++++-- .../simplify/RegExpOptimizeFormalPart.cxx | 587 ----------------- .../simplify/RegExpOptimizeFormalPart.hpp | 611 ++++++++++++++++++ ...rt.cxx => RegExpOptimizeUnboundedPart.hpp} | 355 +++++----- 5 files changed, 959 insertions(+), 845 deletions(-) delete mode 100644 alib2algo/src/regexp/simplify/RegExpOptimizeFormalPart.cxx create mode 100644 alib2algo/src/regexp/simplify/RegExpOptimizeFormalPart.hpp rename alib2algo/src/regexp/simplify/{RegExpOptimizeUnboundedPart.cxx => RegExpOptimizeUnboundedPart.hpp} (55%) diff --git a/alib2algo/src/regexp/simplify/RegExpOptimize.cpp b/alib2algo/src/regexp/simplify/RegExpOptimize.cpp index 093c0806d8..89c3430354 100644 --- a/alib2algo/src/regexp/simplify/RegExpOptimize.cpp +++ b/alib2algo/src/regexp/simplify/RegExpOptimize.cpp @@ -7,13 +7,6 @@ #include "RegExpOptimize.h" -#include "../properties/RegExpEpsilon.h" -#include <exception/CommonException.h> - -#include <cassert> -#include <iostream> -#include <iostream> - namespace regexp { namespace simplify { @@ -22,33 +15,9 @@ regexp::RegExp RegExpOptimize::optimize(const regexp::RegExp& regexp) { return dispatch(regexp.getData()); } -FormalRegExp < > RegExpOptimize::optimize( const FormalRegExp < > & regexp ) { - return regexp::FormalRegExp < > ( RegExpOptimize::optimize ( regexp.getRegExp ( ) ) ); -} - auto RegExpOptimizeFormalRegExp = RegExpOptimize::RegistratorWrapper<FormalRegExp < >, FormalRegExp < > >(RegExpOptimize::optimize); - -FormalRegExpStructure < alphabet::Symbol > RegExpOptimize::optimize( const FormalRegExpStructure < alphabet::Symbol > & regexp ) { - FormalRegExpElement < alphabet::Symbol > * optimized = optimizeInner( regexp.getStructure ( ) ); - - return regexp::FormalRegExpStructure < alphabet::Symbol > ( std::manage_move ( optimized ) ); -} - -UnboundedRegExp < > RegExpOptimize::optimize( const UnboundedRegExp < > & regexp ) { - return regexp::UnboundedRegExp < > ( RegExpOptimize::optimize ( regexp.getRegExp ( ) ) ); -} - auto RegExpOptimizeUnboundedRegExp = RegExpOptimize::RegistratorWrapper<UnboundedRegExp < >, UnboundedRegExp < > >(RegExpOptimize::optimize); -UnboundedRegExpStructure < alphabet::Symbol > RegExpOptimize::optimize( const UnboundedRegExpStructure < alphabet::Symbol > & regexp ) { - UnboundedRegExpElement < alphabet::Symbol > * optimized = optimizeInner( regexp.getStructure( ) ); - - return regexp::UnboundedRegExpStructure < alphabet::Symbol > ( std::manage_move ( optimized ) ); -} - } /* namespace regexp */ } /* namespace simplify */ - -#include "RegExpOptimizeUnboundedPart.cxx" -#include "RegExpOptimizeFormalPart.cxx" diff --git a/alib2algo/src/regexp/simplify/RegExpOptimize.h b/alib2algo/src/regexp/simplify/RegExpOptimize.h index 640f031f1d..1455f12756 100644 --- a/alib2algo/src/regexp/simplify/RegExpOptimize.h +++ b/alib2algo/src/regexp/simplify/RegExpOptimize.h @@ -11,6 +11,7 @@ #include <algorithm> #include <functional> #include <iterator> +#include <iostream> #include <core/multipleDispatch.hpp> @@ -22,6 +23,9 @@ #include <regexp/formal/FormalRegExp.h> #include <regexp/formal/FormalRegExpElements.h> +#include <regexp/properties/RegExpEpsilon.h> +#include <exception/CommonException.h> + namespace regexp { namespace simplify { @@ -65,76 +69,162 @@ class RegExpOptimize : public std::SingleDispatch<RegExpOptimize, regexp::RegExp public: static regexp::RegExp optimize( const regexp::RegExp & regexp ); - static regexp::UnboundedRegExp < > optimize( const regexp::UnboundedRegExp < > & regexp ); - static regexp::UnboundedRegExpStructure < alphabet::Symbol > optimize( const regexp::UnboundedRegExpStructure < alphabet::Symbol > & regexp ); - static void optimize( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & regexp ); - static void optimize( regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & regexp ); - static void optimize( regexp::UnboundedRegExpIteration < alphabet::Symbol > & regexp ); - - static regexp::FormalRegExp < > optimize( const regexp::FormalRegExp < > & regexp ); - static regexp::FormalRegExpStructure < alphabet::Symbol > optimize( const regexp::FormalRegExpStructure < alphabet::Symbol > & regexp ); - static void optimize( regexp::FormalRegExpElement < alphabet::Symbol > & regexp ); + template < class SymbolType > + static regexp::UnboundedRegExp < SymbolType > optimize( const regexp::UnboundedRegExp < SymbolType > & regexp ); + template < class SymbolType > + static regexp::UnboundedRegExpStructure < SymbolType > optimize( const regexp::UnboundedRegExpStructure < SymbolType > & regexp ); + template < class SymbolType > + static void optimize( regexp::UnboundedRegExpAlternation < SymbolType > & regexp ); + template < class SymbolType > + static void optimize( regexp::UnboundedRegExpConcatenation < SymbolType > & regexp ); + template < class SymbolType > + static void optimize( regexp::UnboundedRegExpIteration < SymbolType > & regexp ); + + template < class SymbolType > + static regexp::FormalRegExp < SymbolType > optimize( const regexp::FormalRegExp < SymbolType > & regexp ); + template < class SymbolType > + static regexp::FormalRegExpStructure < SymbolType > optimize( const regexp::FormalRegExpStructure < SymbolType > & regexp ); + template < class SymbolType > + static void optimize( regexp::FormalRegExpElement < SymbolType > & regexp ); private: - static regexp::FormalRegExpElement < alphabet::Symbol > * optimizeInner( const regexp::FormalRegExpElement < alphabet::Symbol > & node ); - - static regexp::UnboundedRegExpElement < alphabet::Symbol > * optimizeInner( const regexp::UnboundedRegExpElement < alphabet::Symbol > & node ); - static regexp::UnboundedRegExpElement < alphabet::Symbol > * optimizeInner( const regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static regexp::UnboundedRegExpElement < alphabet::Symbol > * optimizeInner( const regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); - static regexp::UnboundedRegExpElement < alphabet::Symbol > * optimizeInner( const regexp::UnboundedRegExpIteration < alphabet::Symbol > & node ); - static regexp::UnboundedRegExpElement < alphabet::Symbol > * optimizeInner( const regexp::UnboundedRegExpSymbol < alphabet::Symbol > & node ); - static regexp::UnboundedRegExpElement < alphabet::Symbol > * optimizeInner( const regexp::UnboundedRegExpEpsilon < alphabet::Symbol > & node ); - static regexp::UnboundedRegExpElement < alphabet::Symbol > * optimizeInner( const regexp::UnboundedRegExpEmpty < alphabet::Symbol > & node ); - - static bool A1( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static bool A2( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static bool A3( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static bool A4( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static bool A5( regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); - static bool A6( regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); - static bool A7( regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); - static bool A8( regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); - static bool A9( regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); - static bool A10( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static bool A11( regexp::UnboundedRegExpIteration < alphabet::Symbol > & node ); - static bool V1( regexp::UnboundedRegExpIteration < alphabet::Symbol > & node ); - static bool V2( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static bool V3( regexp::UnboundedRegExpIteration < alphabet::Symbol > & node ); - static bool V4( regexp::UnboundedRegExpIteration < alphabet::Symbol > & node ); - static bool V5( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static bool V6( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - static bool V8( regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); - static bool V9( regexp::UnboundedRegExpConcatenation < alphabet::Symbol > & node ); - static bool V10( regexp::UnboundedRegExpIteration < alphabet::Symbol > & node ); - - static bool X1( regexp::UnboundedRegExpAlternation < alphabet::Symbol > & node ); - - static bool S( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A1( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A2( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A3( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A4( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A5( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A6( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A7( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A8( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A9( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A10( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool A11( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V1( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V2( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V3( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V4( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V5( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V6( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V8( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V9( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - static bool V10( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); - - static bool X1( regexp::FormalRegExpElement < alphabet::Symbol > * & node ); + template < class SymbolType > + static regexp::FormalRegExpElement < SymbolType > * optimizeInner( const regexp::FormalRegExpElement < SymbolType > & node ); + + template < class SymbolType > + static regexp::UnboundedRegExpElement < SymbolType > * optimizeInner( const regexp::UnboundedRegExpElement < SymbolType > & node ); + template < class SymbolType > + static regexp::UnboundedRegExpElement < SymbolType > * optimizeInner( const regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static regexp::UnboundedRegExpElement < SymbolType > * optimizeInner( const regexp::UnboundedRegExpConcatenation < SymbolType > & node ); + template < class SymbolType > + static regexp::UnboundedRegExpElement < SymbolType > * optimizeInner( const regexp::UnboundedRegExpIteration < SymbolType > & node ); + template < class SymbolType > + static regexp::UnboundedRegExpElement < SymbolType > * optimizeInner( const regexp::UnboundedRegExpSymbol < SymbolType > & node ); + template < class SymbolType > + static regexp::UnboundedRegExpElement < SymbolType > * optimizeInner( const regexp::UnboundedRegExpEpsilon < SymbolType > & node ); + template < class SymbolType > + static regexp::UnboundedRegExpElement < SymbolType > * optimizeInner( const regexp::UnboundedRegExpEmpty < SymbolType > & node ); + + template < class SymbolType > + static bool A1( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static bool A2( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static bool A3( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static bool A4( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static bool A5( regexp::UnboundedRegExpConcatenation < SymbolType > & node ); + template < class SymbolType > + static bool A6( regexp::UnboundedRegExpConcatenation < SymbolType > & node ); + template < class SymbolType > + static bool A7( regexp::UnboundedRegExpConcatenation < SymbolType > & node ); + template < class SymbolType > + static bool A8( regexp::UnboundedRegExpConcatenation < SymbolType > & node ); + template < class SymbolType > + static bool A9( regexp::UnboundedRegExpConcatenation < SymbolType > & node ); + template < class SymbolType > + static bool A10( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static bool A11( regexp::UnboundedRegExpIteration < SymbolType > & node ); + template < class SymbolType > + static bool V1( regexp::UnboundedRegExpIteration < SymbolType > & node ); + template < class SymbolType > + static bool V2( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static bool V3( regexp::UnboundedRegExpIteration < SymbolType > & node ); + template < class SymbolType > + static bool V4( regexp::UnboundedRegExpIteration < SymbolType > & node ); + template < class SymbolType > + static bool V5( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static bool V6( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + template < class SymbolType > + static bool V8( regexp::UnboundedRegExpConcatenation < SymbolType > & node ); + template < class SymbolType > + static bool V9( regexp::UnboundedRegExpConcatenation < SymbolType > & node ); + template < class SymbolType > + static bool V10( regexp::UnboundedRegExpIteration < SymbolType > & node ); + + template < class SymbolType > + static bool X1( regexp::UnboundedRegExpAlternation < SymbolType > & node ); + + template < class SymbolType > + static bool S( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A1( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A2( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A3( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A4( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A5( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A6( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A7( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A8( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A9( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A10( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool A11( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V1( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V2( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V3( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V4( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V5( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V6( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V8( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V9( regexp::FormalRegExpElement < SymbolType > * & node ); + template < class SymbolType > + static bool V10( regexp::FormalRegExpElement < SymbolType > * & node ); + + template < class SymbolType > + static bool X1( regexp::FormalRegExpElement < SymbolType > * & node ); }; +template < class SymbolType > +FormalRegExp < SymbolType > RegExpOptimize::optimize( const FormalRegExp < SymbolType > & regexp ) { + return regexp::FormalRegExp < SymbolType > ( RegExpOptimize::optimize ( regexp.getRegExp ( ) ) ); +} + +template < class SymbolType > +FormalRegExpStructure < SymbolType > RegExpOptimize::optimize( const FormalRegExpStructure < SymbolType > & regexp ) { + FormalRegExpElement < SymbolType > * optimized = optimizeInner( regexp.getStructure ( ) ); + + return regexp::FormalRegExpStructure < SymbolType > ( std::manage_move ( optimized ) ); +} + +template < class SymbolType > +UnboundedRegExp < SymbolType > RegExpOptimize::optimize( const UnboundedRegExp < SymbolType > & regexp ) { + return regexp::UnboundedRegExp < SymbolType > ( RegExpOptimize::optimize ( regexp.getRegExp ( ) ) ); +} + +template < class SymbolType > +UnboundedRegExpStructure < SymbolType > RegExpOptimize::optimize( const UnboundedRegExpStructure < SymbolType > & regexp ) { + UnboundedRegExpElement < SymbolType > * optimized = optimizeInner( regexp.getStructure( ) ); + + return regexp::UnboundedRegExpStructure < SymbolType > ( std::manage_move ( optimized ) ); +} + } /* namespace simplify */ } /* namespace regexp */ +#include "RegExpOptimizeUnboundedPart.hpp" +#include "RegExpOptimizeFormalPart.hpp" + #endif /* REGEXPNORMALIZE_H_ */ diff --git a/alib2algo/src/regexp/simplify/RegExpOptimizeFormalPart.cxx b/alib2algo/src/regexp/simplify/RegExpOptimizeFormalPart.cxx deleted file mode 100644 index 88acbde88c..0000000000 --- a/alib2algo/src/regexp/simplify/RegExpOptimizeFormalPart.cxx +++ /dev/null @@ -1,587 +0,0 @@ -/* - * RegExpOptimize.cpp - * - * Created on: 20. 1. 2014 - * Author: Jan Travnicek - */ - -namespace regexp { - -namespace simplify { - -void RegExpOptimize::optimize( FormalRegExpElement < alphabet::Symbol > & element ) { - FormalRegExpElement < alphabet::Symbol >* optimized = optimizeInner( element ); - - FormalRegExpAlternation < alphabet::Symbol > * alternation = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( & element ); - if( alternation ) { - FormalRegExpAlternation < alphabet::Symbol > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( optimized ); - if( alternationOptimized ) { - * alternation = std::move( * alternationOptimized ); - delete alternationOptimized; - } else { - * alternation = FormalRegExpAlternation < alphabet::Symbol > { std::manage_move ( optimized ), FormalRegExpEmpty < alphabet::Symbol > { } }; - } - return; - } - - FormalRegExpConcatenation < alphabet::Symbol > * concatenation = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( & element ); - if( concatenation ) { - FormalRegExpConcatenation < alphabet::Symbol > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( optimized ); - if( concatenationOptimized ) { - * concatenation = std::move( * concatenationOptimized ); - delete concatenationOptimized; - } else { - * concatenation = FormalRegExpConcatenation < alphabet::Symbol > { std::manage_move ( optimized ), FormalRegExpEpsilon < alphabet::Symbol > { } }; - } - return; - } - - FormalRegExpIteration < alphabet::Symbol > * iteration = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( & element ); - if( iteration ) { - FormalRegExpIteration < alphabet::Symbol > * iterationOptimized = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( optimized ); - if( iterationOptimized ) { - * iteration = std::move( * iterationOptimized ); - delete iterationOptimized; - } else { - * iteration = FormalRegExpIteration < alphabet::Symbol > { std::manage_move ( optimized ) }; - } - return; - } - - // Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty - return; -} - -FormalRegExpElement < alphabet::Symbol >* RegExpOptimize::optimizeInner( const FormalRegExpElement < alphabet::Symbol > & node ) { - FormalRegExpElement < alphabet::Symbol >* elem = node.clone(); - - // optimize while you can - while( A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem ) - || A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem ) - || A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) ); - - return elem; -} - -bool RegExpOptimize::S( FormalRegExpElement < alphabet::Symbol > * & node ) { - bool optimized = false; - FormalRegExpAlternation < alphabet::Symbol > * alternation = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol >*>( node ); - if( alternation ) { - FormalRegExpElement < alphabet::Symbol > * tmp = optimizeInner ( alternation->getLeftElement ( ) ); - if(* tmp != alternation->getLeftElement ( ) ) { - optimized = true; - alternation->setLeftElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) ); - } - - tmp = optimizeInner ( alternation->getRightElement ( ) ); - if(* tmp != alternation->getRightElement ( ) ) { - optimized = true; - alternation->setRightElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) ); - } - - return optimized; - } - - FormalRegExpConcatenation < alphabet::Symbol > * concatenation = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol >*>( node ); - if( concatenation ) { - FormalRegExpElement < alphabet::Symbol >* tmp = optimizeInner ( concatenation->getLeftElement() ); - if(* tmp != concatenation->getLeftElement ( ) ) { - optimized = true; - concatenation->setLeftElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) ); - } - - tmp = optimizeInner ( concatenation->getRightElement ( )); - if(* tmp != concatenation->getRightElement ( )) { - optimized = true; - concatenation->setRightElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) ); - } - - return optimized; - } - - FormalRegExpIteration < alphabet::Symbol > * iteration = dynamic_cast<FormalRegExpIteration < alphabet::Symbol >*>( node ); - if( iteration ) { - FormalRegExpElement < alphabet::Symbol >* tmp = optimizeInner ( iteration->getElement() ); - - if(* tmp != iteration->getElement ( ) ) { - optimized = true; - iteration->setElement ( * std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > ( tmp ) ); - } - return optimized; - } - - return optimized; -} - - -/** - * optimization A1: ( x + y ) + z = x + ( y + z ) - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A1( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - if( dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( node->getLeft().get() ) ) { - std::smart_ptr < FormalRegExpAlternation < alphabet::Symbol > > leftAlt ( static_cast<FormalRegExpAlternation < alphabet::Symbol > *>( node->getLeft().release() ) ); - - std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > x = std::move ( leftAlt->getLeft() ); - std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > y = std::move ( leftAlt->getRight() ); - std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > z = std::move ( node->getRight() ); - - leftAlt->setLeft ( std::move ( y ) ); - leftAlt->setRight ( std::move ( z ) ); - node->setLeft ( std::move ( x ) ); - node->setRight ( std::move ( leftAlt ) ); - - return true; - } - - return false; -} - -/** - * optimization A2: x + y = y + x (sort) - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A2( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - if( dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( node->getRight().get() ) ) { - FormalRegExpAlternation < alphabet::Symbol > * rightAlt = static_cast < FormalRegExpAlternation < alphabet::Symbol > * > ( node->getRight ( ).get ( ) ); - - std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > x = std::move ( node->getLeft ( ) ); - std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > y = std::move ( rightAlt->getLeft ( ) ); - - if(*x > *y) { - node->setLeft ( std::move ( y ) ); - rightAlt->setLeft ( std::move ( x ) ); - return true; - } else { - node->setLeft ( std::move ( x ) ); - rightAlt->setLeft ( std::move ( y ) ); - return false; - } - } - - return false; -} - -/** - * optimization A3: x + \0 = x - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A3( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - // input can be \0 + \0, so at least one element must be preserved - - if( dynamic_cast<FormalRegExpEmpty < alphabet::Symbol > *>( node->getRight().get() ) ) { - n = node->getLeft().release(); - delete node; - return true; - } - - if( dynamic_cast<FormalRegExpEmpty < alphabet::Symbol > *>( node->getLeft().get() ) ) { - n = node->getRight().release(); - delete node; - return true; - } - - return false; -} - -/** - * optimization A4: x + x = x - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A4( FormalRegExpElement < alphabet::Symbol > * & n ) { - /* - * two ways of implementing this opitimization: - * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts... - * - check every element against other ( O(n*n) ) - * - * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2 - */ - - FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - if( node->getLeftElement() == node->getRightElement() ) { - n = node->getRight().release(); - delete node; - return true; - } - - return false; -} - -/** - * optimization A5: x.(y.z) = (x.y).z = x.y.z - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A5( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpConcatenation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - if( dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getLeft().get() ) ) { - std::smart_ptr < FormalRegExpConcatenation < alphabet::Symbol > > leftCon ( static_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getLeft().release() ) ); - - std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > x = std::move ( leftCon->getLeft() ); - std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > y = std::move ( leftCon->getRight() ); - std::smart_ptr < FormalRegExpElement < alphabet::Symbol > > z = std::move ( node->getRight() ); - - leftCon->setLeft ( std::move ( y ) ); - leftCon->setRight ( std::move ( z ) ); - node->setLeft ( std::move ( x ) ); - node->setRight ( std::move ( leftCon ) ); - - return true; - } - - return false; -} - -/** - * optimization A6: \e.x = x.\e = x - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A6( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpConcatenation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - // input can be \e + \e, so at least one element must be preserved - - if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getRight().get() ) ) { - n = node->getLeft().release(); - delete node; - return true; - } - - if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getLeft().get() ) ) { - n = node->getRight().release(); - delete node; - return true; - } - - return false; -} - -/** - * optimization A7: \0.x = x.\0 = \0 - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A7( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpConcatenation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - if( dynamic_cast<FormalRegExpEmpty < alphabet::Symbol > *>( node->getRight().get() ) || dynamic_cast<FormalRegExpEmpty < alphabet::Symbol > *>( node->getLeft().get() ) ) { - delete node; - n = new FormalRegExpEmpty < alphabet::Symbol > { }; - return true; - } - - return false; -} - -/** - * optimization A8: x.(y+z) = x.y + x.z - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A8( FormalRegExpElement < alphabet::Symbol > * & /* n */) { - return false; //TODO -} - -/** - * optimization A9: (x+y).z = x.z + y.z - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A9( FormalRegExpElement < alphabet::Symbol > * & /* n */) { - return false; //TODO -} - -/** - * optimization A10: x* = \e + x*x - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A10( FormalRegExpElement < alphabet::Symbol > * & n ) { - /* - * problem: - * - \e + x*x = x* - * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node). - */ - - FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getLeft().get() ) ) { - FormalRegExpConcatenation < alphabet::Symbol > * rightCon = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getRight().get() ); - if( ! rightCon ) return false; - - FormalRegExpIteration < alphabet::Symbol > * rightLeftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( rightCon->getLeft().get() ); - if( rightLeftIte ) { - if(rightLeftIte->getElement() == rightCon->getRightElement()) { - n = rightCon->getLeft().release(); - delete node; - return true; - } - } - - FormalRegExpIteration < alphabet::Symbol > * rightRightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( rightCon->getRight().get() ); - if( rightRightIte ) { - if(rightRightIte->getElement() == rightCon->getLeftElement()) { - n = rightCon->getRight().release(); - delete node; - return true; - } - } - } - - if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getRight().get() ) ) { - FormalRegExpConcatenation < alphabet::Symbol > * leftCon = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getLeft().get() ); - if( ! leftCon ) return false; - - FormalRegExpIteration < alphabet::Symbol > * leftLeftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( leftCon->getLeft().get() ); - if( leftLeftIte ) { - if(leftLeftIte->getElement() == leftCon->getRightElement()) { - n = leftCon->getLeft().release(); - delete node; - return true; - } - } - - FormalRegExpIteration < alphabet::Symbol > * leftRightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( leftCon->getRight().get() ); - if( leftRightIte ) { - if(leftRightIte->getElement() == leftCon->getLeftElement()) { - n = leftCon->getRight().release(); - delete node; - return true; - } - } - } - - return false; -} - -/** - * optimization A11: x* = (\e + x)* - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::A11( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpIteration < alphabet::Symbol > * node = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - FormalRegExpAlternation < alphabet::Symbol > * childAlt = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( node->getChild().get() ); - if( childAlt ) { - if( dynamic_cast < FormalRegExpEpsilon < alphabet::Symbol > * > ( childAlt->getLeft ( ).get ( ) ) ) { - node->setChild ( std::move ( childAlt->getRight ( ) ) ); - return true; - } - if( dynamic_cast < FormalRegExpEpsilon < alphabet::Symbol > * > ( childAlt->getRight ( ).get ( ) ) ) { - node->setChild ( std::move ( childAlt->getLeft ( ) ) ); - return true; - } - } - - return false; -} - -/** - * optimization V1: \0* = \e - * optimization T1: \e* = \e - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V1( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpIteration < alphabet::Symbol > * node = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - if( dynamic_cast<FormalRegExpEmpty< alphabet::Symbol > *>( node->getChild ( ).get() ) ) { - delete node; - n = new FormalRegExpEpsilon < alphabet::Symbol > ( ); - return true; - } - if( dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>( node->getChild ( ).get() ) ) { - delete node; - n = new FormalRegExpEpsilon < alphabet::Symbol > ( ); - return true; - } - return false; -} - -/** - * optimization V2: x* + x = x* - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V2( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - FormalRegExpIteration < alphabet::Symbol > * leftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getLeft().get() ); - if( leftIte ) { - if(leftIte->getElement() == node->getRightElement()) { - n = node->getLeft().release(); - delete node; - return true; - } - } - - FormalRegExpIteration < alphabet::Symbol > * rightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getRight().get() ); - if( rightIte ) { - if(rightIte->getElement() == node->getLeftElement()) { - n = node->getRight().release(); - delete node; - return true; - } - } - - return false; -} - -/** - * optimization V3: x** = x* - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V3( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpIteration < alphabet::Symbol > * node = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - FormalRegExpIteration < alphabet::Symbol >* childIter = dynamic_cast<FormalRegExpIteration < alphabet::Symbol >*>( node->getChild().get() ); - if( childIter ) { - node->setChild ( std::move ( childIter->getChild ( ) ) ); - return true; - } - - return false; -} - -/** - * optimization V4: (x+y)* = (x*y*)* - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V4( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpIteration < alphabet::Symbol > * node = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - FormalRegExpConcatenation < alphabet::Symbol > * child = dynamic_cast<FormalRegExpConcatenation < alphabet::Symbol > *>( node->getChild().get() ); - if( ! child ) return false; - - FormalRegExpIteration < alphabet::Symbol > * leftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( child->getLeft().get() ); - if( ! leftIte ) return false; - - FormalRegExpIteration < alphabet::Symbol > * rightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( child->getRight().get() ); - if( ! rightIte ) return false; - - n = new FormalRegExpIteration < alphabet::Symbol >( FormalRegExpAlternation < alphabet::Symbol >(std::move( leftIte->getElement ( ) ), std::move( rightIte->getElement ( ) ) ) ); - - delete node; - return true; -} - -/** - * optimization V5: x*y = y + x*xy - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V5( FormalRegExpElement < alphabet::Symbol > * & /* n */) { - return false; //TODO -} - -/** - * optimization V6: x*y = y + xx*y - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V6( FormalRegExpElement < alphabet::Symbol > * & /* n */) { - return false; //TODO -} - -/** - * optimization V8: \e in h(x) => xx*=x* - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V8( FormalRegExpElement < alphabet::Symbol > * & /* n */) { - return false; //TODO -} - -/** - * optimization V9: (xy)*x = x(yx)* - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V9( FormalRegExpElement < alphabet::Symbol > * & /* n */) { - return false; //TODO -} - -/** - * optimization V10: (x+y)* = (x*+y*)* - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::V10( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - FormalRegExpIteration < alphabet::Symbol > * leftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getLeft().get() ); - if( ! leftIte ) return false; - - FormalRegExpIteration < alphabet::Symbol > * rightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getRight().get() ); - if( ! rightIte ) return false; - - n = new FormalRegExpConcatenation < alphabet::Symbol >(std::move( leftIte->getElement() ), std::move( rightIte->getElement() ) ); - - delete node; - return true; -} - -/** - * optimization X1: a* + \e = a* - * @param node FormalRegExpElement < alphabet::Symbol > node - * @return bool true if optimization applied else false - */ -bool RegExpOptimize::X1( FormalRegExpElement < alphabet::Symbol > * & n ) { - FormalRegExpAlternation < alphabet::Symbol > * node = dynamic_cast<FormalRegExpAlternation < alphabet::Symbol > *>( n ); - if( ! node ) return false; - - FormalRegExpIteration < alphabet::Symbol > * leftIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getLeft().get() ); - if( leftIte ) { - if(dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>(node->getRight().get())) { - n = node->getLeft().release(); - delete node; - return true; - } - } - - FormalRegExpIteration < alphabet::Symbol > * rightIte = dynamic_cast<FormalRegExpIteration < alphabet::Symbol > *>( node->getRight().get() ); - if( rightIte ) { - if(dynamic_cast<FormalRegExpEpsilon < alphabet::Symbol > *>(node->getLeft().get())) { - n = node->getRight().release(); - delete node; - return true; - } - } - - return false; - -} - -} /* namespace simplify */ - -} /* namespace regexp */ diff --git a/alib2algo/src/regexp/simplify/RegExpOptimizeFormalPart.hpp b/alib2algo/src/regexp/simplify/RegExpOptimizeFormalPart.hpp new file mode 100644 index 0000000000..0bdcc00213 --- /dev/null +++ b/alib2algo/src/regexp/simplify/RegExpOptimizeFormalPart.hpp @@ -0,0 +1,611 @@ +/* + * RegExpOptimize.cpp + * + * Created on: 20. 1. 2014 + * Author: Jan Travnicek + */ + +namespace regexp { + +namespace simplify { + +template < class SymbolType > +void RegExpOptimize::optimize( FormalRegExpElement < SymbolType > & element ) { + FormalRegExpElement < SymbolType >* optimized = optimizeInner( element ); + + FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( & element ); + if( alternation ) { + FormalRegExpAlternation < SymbolType > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( optimized ); + if( alternationOptimized ) { + * alternation = std::move( * alternationOptimized ); + delete alternationOptimized; + } else { + * alternation = FormalRegExpAlternation < SymbolType > { std::manage_move ( optimized ), FormalRegExpEmpty < SymbolType > { } }; + } + return; + } + + FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( & element ); + if( concatenation ) { + FormalRegExpConcatenation < SymbolType > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( optimized ); + if( concatenationOptimized ) { + * concatenation = std::move( * concatenationOptimized ); + delete concatenationOptimized; + } else { + * concatenation = FormalRegExpConcatenation < SymbolType > { std::manage_move ( optimized ), FormalRegExpEpsilon < SymbolType > { } }; + } + return; + } + + FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType > *>( & element ); + if( iteration ) { + FormalRegExpIteration < SymbolType > * iterationOptimized = dynamic_cast<FormalRegExpIteration < SymbolType > *>( optimized ); + if( iterationOptimized ) { + * iteration = std::move( * iterationOptimized ); + delete iterationOptimized; + } else { + * iteration = FormalRegExpIteration < SymbolType > { std::manage_move ( optimized ) }; + } + return; + } + + // Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty + return; +} + +template < class SymbolType > +FormalRegExpElement < SymbolType >* RegExpOptimize::optimizeInner( const FormalRegExpElement < SymbolType > & node ) { + FormalRegExpElement < SymbolType >* elem = node.clone(); + + // optimize while you can + while( A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem ) + || A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem ) + || A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) ); + + return elem; +} + +template < class SymbolType > +bool RegExpOptimize::S( FormalRegExpElement < SymbolType > * & node ) { + bool optimized = false; + FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType >*>( node ); + if( alternation ) { + FormalRegExpElement < SymbolType > * tmp = optimizeInner ( alternation->getLeftElement ( ) ); + if(* tmp != alternation->getLeftElement ( ) ) { + optimized = true; + alternation->setLeftElement ( * std::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); + } + + tmp = optimizeInner ( alternation->getRightElement ( ) ); + if(* tmp != alternation->getRightElement ( ) ) { + optimized = true; + alternation->setRightElement ( * std::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); + } + + return optimized; + } + + FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType >*>( node ); + if( concatenation ) { + FormalRegExpElement < SymbolType >* tmp = optimizeInner ( concatenation->getLeftElement() ); + if(* tmp != concatenation->getLeftElement ( ) ) { + optimized = true; + concatenation->setLeftElement ( * std::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); + } + + tmp = optimizeInner ( concatenation->getRightElement ( )); + if(* tmp != concatenation->getRightElement ( )) { + optimized = true; + concatenation->setRightElement ( * std::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); + } + + return optimized; + } + + FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node ); + if( iteration ) { + FormalRegExpElement < SymbolType >* tmp = optimizeInner ( iteration->getElement() ); + + if(* tmp != iteration->getElement ( ) ) { + optimized = true; + iteration->setElement ( * std::smart_ptr < FormalRegExpElement < SymbolType > > ( tmp ) ); + } + return optimized; + } + + return optimized; +} + + +/** + * optimization A1: ( x + y ) + z = x + ( y + z ) + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A1( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); + if( ! node ) return false; + + if( dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node->getLeft().get() ) ) { + std::smart_ptr < FormalRegExpAlternation < SymbolType > > leftAlt ( static_cast<FormalRegExpAlternation < SymbolType > *>( node->getLeft().release() ) ); + + std::smart_ptr < FormalRegExpElement < SymbolType > > x = std::move ( leftAlt->getLeft() ); + std::smart_ptr < FormalRegExpElement < SymbolType > > y = std::move ( leftAlt->getRight() ); + std::smart_ptr < FormalRegExpElement < SymbolType > > z = std::move ( node->getRight() ); + + leftAlt->setLeft ( std::move ( y ) ); + leftAlt->setRight ( std::move ( z ) ); + node->setLeft ( std::move ( x ) ); + node->setRight ( std::move ( leftAlt ) ); + + return true; + } + + return false; +} + +/** + * optimization A2: x + y = y + x (sort) + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A2( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); + if( ! node ) return false; + + if( dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node->getRight().get() ) ) { + FormalRegExpAlternation < SymbolType > * rightAlt = static_cast < FormalRegExpAlternation < SymbolType > * > ( node->getRight ( ).get ( ) ); + + std::smart_ptr < FormalRegExpElement < SymbolType > > x = std::move ( node->getLeft ( ) ); + std::smart_ptr < FormalRegExpElement < SymbolType > > y = std::move ( rightAlt->getLeft ( ) ); + + if(*x > *y) { + node->setLeft ( std::move ( y ) ); + rightAlt->setLeft ( std::move ( x ) ); + return true; + } else { + node->setLeft ( std::move ( x ) ); + rightAlt->setLeft ( std::move ( y ) ); + return false; + } + } + + return false; +} + +/** + * optimization A3: x + \0 = x + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A3( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); + if( ! node ) return false; + + // input can be \0 + \0, so at least one element must be preserved + + if( dynamic_cast<FormalRegExpEmpty < SymbolType > *>( node->getRight().get() ) ) { + n = node->getLeft().release(); + delete node; + return true; + } + + if( dynamic_cast<FormalRegExpEmpty < SymbolType > *>( node->getLeft().get() ) ) { + n = node->getRight().release(); + delete node; + return true; + } + + return false; +} + +/** + * optimization A4: x + x = x + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A4( FormalRegExpElement < SymbolType > * & n ) { + /* + * two ways of implementing this opitimization: + * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts... + * - check every element against other ( O(n*n) ) + * + * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2 + */ + + FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); + if( ! node ) return false; + + if( node->getLeftElement() == node->getRightElement() ) { + n = node->getRight().release(); + delete node; + return true; + } + + return false; +} + +/** + * optimization A5: x.(y.z) = (x.y).z = x.y.z + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A5( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); + if( ! node ) return false; + + if( dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node->getLeft().get() ) ) { + std::smart_ptr < FormalRegExpConcatenation < SymbolType > > leftCon ( static_cast<FormalRegExpConcatenation < SymbolType > *>( node->getLeft().release() ) ); + + std::smart_ptr < FormalRegExpElement < SymbolType > > x = std::move ( leftCon->getLeft() ); + std::smart_ptr < FormalRegExpElement < SymbolType > > y = std::move ( leftCon->getRight() ); + std::smart_ptr < FormalRegExpElement < SymbolType > > z = std::move ( node->getRight() ); + + leftCon->setLeft ( std::move ( y ) ); + leftCon->setRight ( std::move ( z ) ); + node->setLeft ( std::move ( x ) ); + node->setRight ( std::move ( leftCon ) ); + + return true; + } + + return false; +} + +/** + * optimization A6: \e.x = x.\e = x + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A6( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); + if( ! node ) return false; + + // input can be \e + \e, so at least one element must be preserved + + if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getRight().get() ) ) { + n = node->getLeft().release(); + delete node; + return true; + } + + if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getLeft().get() ) ) { + n = node->getRight().release(); + delete node; + return true; + } + + return false; +} + +/** + * optimization A7: \0.x = x.\0 = \0 + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A7( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpConcatenation < SymbolType > * node = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( n ); + if( ! node ) return false; + + if( dynamic_cast<FormalRegExpEmpty < SymbolType > *>( node->getRight().get() ) || dynamic_cast<FormalRegExpEmpty < SymbolType > *>( node->getLeft().get() ) ) { + delete node; + n = new FormalRegExpEmpty < SymbolType > { }; + return true; + } + + return false; +} + +/** + * optimization A8: x.(y+z) = x.y + x.z + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A8( FormalRegExpElement < SymbolType > * & /* n */) { + return false; //TODO +} + +/** + * optimization A9: (x+y).z = x.z + y.z + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A9( FormalRegExpElement < SymbolType > * & /* n */) { + return false; //TODO +} + +/** + * optimization A10: x* = \e + x*x + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A10( FormalRegExpElement < SymbolType > * & n ) { + /* + * problem: + * - \e + x*x = x* + * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node). + */ + + FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); + if( ! node ) return false; + + if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getLeft().get() ) ) { + FormalRegExpConcatenation < SymbolType > * rightCon = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node->getRight().get() ); + if( ! rightCon ) return false; + + FormalRegExpIteration < SymbolType > * rightLeftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( rightCon->getLeft().get() ); + if( rightLeftIte ) { + if(rightLeftIte->getElement() == rightCon->getRightElement()) { + n = rightCon->getLeft().release(); + delete node; + return true; + } + } + + FormalRegExpIteration < SymbolType > * rightRightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( rightCon->getRight().get() ); + if( rightRightIte ) { + if(rightRightIte->getElement() == rightCon->getLeftElement()) { + n = rightCon->getRight().release(); + delete node; + return true; + } + } + } + + if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getRight().get() ) ) { + FormalRegExpConcatenation < SymbolType > * leftCon = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node->getLeft().get() ); + if( ! leftCon ) return false; + + FormalRegExpIteration < SymbolType > * leftLeftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( leftCon->getLeft().get() ); + if( leftLeftIte ) { + if(leftLeftIte->getElement() == leftCon->getRightElement()) { + n = leftCon->getLeft().release(); + delete node; + return true; + } + } + + FormalRegExpIteration < SymbolType > * leftRightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( leftCon->getRight().get() ); + if( leftRightIte ) { + if(leftRightIte->getElement() == leftCon->getLeftElement()) { + n = leftCon->getRight().release(); + delete node; + return true; + } + } + } + + return false; +} + +/** + * optimization A11: x* = (\e + x)* + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::A11( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); + if( ! node ) return false; + + FormalRegExpAlternation < SymbolType > * childAlt = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node->getChild().get() ); + if( childAlt ) { + if( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( childAlt->getLeft ( ).get ( ) ) ) { + node->setChild ( std::move ( childAlt->getRight ( ) ) ); + return true; + } + if( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( childAlt->getRight ( ).get ( ) ) ) { + node->setChild ( std::move ( childAlt->getLeft ( ) ) ); + return true; + } + } + + return false; +} + +/** + * optimization V1: \0* = \e + * optimization T1: \e* = \e + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V1( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); + if( ! node ) return false; + + if( dynamic_cast<FormalRegExpEmpty< SymbolType > *>( node->getChild ( ).get() ) ) { + delete node; + n = new FormalRegExpEpsilon < SymbolType > ( ); + return true; + } + if( dynamic_cast<FormalRegExpEpsilon < SymbolType > *>( node->getChild ( ).get() ) ) { + delete node; + n = new FormalRegExpEpsilon < SymbolType > ( ); + return true; + } + return false; +} + +/** + * optimization V2: x* + x = x* + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V2( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); + if( ! node ) return false; + + FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getLeft().get() ); + if( leftIte ) { + if(leftIte->getElement() == node->getRightElement()) { + n = node->getLeft().release(); + delete node; + return true; + } + } + + FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getRight().get() ); + if( rightIte ) { + if(rightIte->getElement() == node->getLeftElement()) { + n = node->getRight().release(); + delete node; + return true; + } + } + + return false; +} + +/** + * optimization V3: x** = x* + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V3( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); + if( ! node ) return false; + + FormalRegExpIteration < SymbolType >* childIter = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node->getChild().get() ); + if( childIter ) { + node->setChild ( std::move ( childIter->getChild ( ) ) ); + return true; + } + + return false; +} + +/** + * optimization V4: (x+y)* = (x*y*)* + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V4( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpIteration < SymbolType > * node = dynamic_cast<FormalRegExpIteration < SymbolType > *>( n ); + if( ! node ) return false; + + FormalRegExpConcatenation < SymbolType > * child = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node->getChild().get() ); + if( ! child ) return false; + + FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( child->getLeft().get() ); + if( ! leftIte ) return false; + + FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( child->getRight().get() ); + if( ! rightIte ) return false; + + n = new FormalRegExpIteration < SymbolType >( FormalRegExpAlternation < SymbolType >(std::move( leftIte->getElement ( ) ), std::move( rightIte->getElement ( ) ) ) ); + + delete node; + return true; +} + +/** + * optimization V5: x*y = y + x*xy + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V5( FormalRegExpElement < SymbolType > * & /* n */) { + return false; //TODO +} + +/** + * optimization V6: x*y = y + xx*y + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V6( FormalRegExpElement < SymbolType > * & /* n */) { + return false; //TODO +} + +/** + * optimization V8: \e in h(x) => xx*=x* + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V8( FormalRegExpElement < SymbolType > * & /* n */) { + return false; //TODO +} + +/** + * optimization V9: (xy)*x = x(yx)* + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V9( FormalRegExpElement < SymbolType > * & /* n */) { + return false; //TODO +} + +/** + * optimization V10: (x+y)* = (x*+y*)* + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::V10( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); + if( ! node ) return false; + + FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getLeft().get() ); + if( ! leftIte ) return false; + + FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getRight().get() ); + if( ! rightIte ) return false; + + n = new FormalRegExpConcatenation < SymbolType >(std::move( leftIte->getElement() ), std::move( rightIte->getElement() ) ); + + delete node; + return true; +} + +/** + * optimization X1: a* + \e = a* + * @param node FormalRegExpElement < SymbolType > node + * @return bool true if optimization applied else false + */ +template < class SymbolType > +bool RegExpOptimize::X1( FormalRegExpElement < SymbolType > * & n ) { + FormalRegExpAlternation < SymbolType > * node = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( n ); + if( ! node ) return false; + + FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getLeft().get() ); + if( leftIte ) { + if(dynamic_cast<FormalRegExpEpsilon < SymbolType > *>(node->getRight().get())) { + n = node->getLeft().release(); + delete node; + return true; + } + } + + FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node->getRight().get() ); + if( rightIte ) { + if(dynamic_cast<FormalRegExpEpsilon < SymbolType > *>(node->getLeft().get())) { + n = node->getRight().release(); + delete node; + return true; + } + } + + return false; + +} + +} /* namespace simplify */ + +} /* namespace regexp */ diff --git a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp similarity index 55% rename from alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx rename to alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp index 56bdafdc76..3e1dc7c7c2 100644 --- a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.cxx +++ b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp @@ -9,150 +9,161 @@ namespace regexp { namespace simplify { -void RegExpOptimize::optimize( UnboundedRegExpAlternation < alphabet::Symbol > & alt ) { +template < class SymbolType > +void RegExpOptimize::optimize( UnboundedRegExpAlternation < SymbolType > & alt ) { for( unsigned i = 0; i < alt.getChildren ( ).size ( ); i++ ) - alt.setChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( * alt.getChildren ( ) [ i ] ) ), i ); + alt.setChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( * alt.getChildren ( ) [ i ] ) ), i ); while( A1( alt ) || A2( alt ) || A3( alt ) || A4( alt ) || A10( alt ) || V2( alt ) || V5( alt ) || V6( alt ) || X1( alt ) ); } -void RegExpOptimize::optimize( UnboundedRegExpConcatenation < alphabet::Symbol > & concat ) { +template < class SymbolType > +void RegExpOptimize::optimize( UnboundedRegExpConcatenation < SymbolType > & concat ) { for( unsigned i = 0; i < concat.getChildren ( ).size ( ); i++ ) - concat.setChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( * concat.getChildren ( ) [ i ] ) ), i ); + concat.setChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( * concat.getChildren ( ) [ i ] ) ), i ); while( A5( concat ) || A6( concat ) || A7( concat ) || A8( concat ) || A9( concat ) || V8( concat ) );//|| V9( concat ) ); } -void RegExpOptimize::optimize( UnboundedRegExpIteration < alphabet::Symbol > & iter ) { - iter.setChild ( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner ( * iter.getChild ( ) ) ) ); +template < class SymbolType > +void RegExpOptimize::optimize( UnboundedRegExpIteration < SymbolType > & iter ) { + iter.setChild ( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner ( * iter.getChild ( ) ) ) ); do { // V1 is implemented right here - if( dynamic_cast<UnboundedRegExpEmpty < alphabet::Symbol >*>( iter.getChild ( ).get() ) ) { + if( dynamic_cast<UnboundedRegExpEmpty < SymbolType >*>( iter.getChild ( ).get() ) ) { return; } // T1 is implemented right here \e* = \e - if( dynamic_cast<UnboundedRegExpEpsilon < alphabet::Symbol >*>( iter.getChild ( ).get() ) ) { + if( dynamic_cast<UnboundedRegExpEpsilon < SymbolType >*>( iter.getChild ( ).get() ) ) { return; } } while( A11( iter ) || V1( iter ) || V3( iter ) || V4( iter ) || V10( iter ) ); } -UnboundedRegExpElement < alphabet::Symbol >* RegExpOptimize::optimizeInner( const UnboundedRegExpElement < alphabet::Symbol > & node ) { - const UnboundedRegExpAlternation < alphabet::Symbol > * alternation = dynamic_cast<const UnboundedRegExpAlternation < alphabet::Symbol >*>( & node ); +template < class SymbolType > +UnboundedRegExpElement < SymbolType >* RegExpOptimize::optimizeInner( const UnboundedRegExpElement < SymbolType > & node ) { + const UnboundedRegExpAlternation < SymbolType > * alternation = dynamic_cast<const UnboundedRegExpAlternation < SymbolType >*>( & node ); if( alternation ) return optimizeInner( * alternation ); - const UnboundedRegExpConcatenation < alphabet::Symbol > * concatenation = dynamic_cast<const UnboundedRegExpConcatenation < alphabet::Symbol >*>( & node ); + const UnboundedRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<const UnboundedRegExpConcatenation < SymbolType >*>( & node ); if( concatenation ) return optimizeInner( * concatenation ); - const UnboundedRegExpIteration < alphabet::Symbol > * iteration = dynamic_cast<const UnboundedRegExpIteration < alphabet::Symbol >*>( & node ); + const UnboundedRegExpIteration < SymbolType > * iteration = dynamic_cast<const UnboundedRegExpIteration < SymbolType >*>( & node ); if( iteration ) return optimizeInner( * iteration ); - const UnboundedRegExpSymbol < alphabet::Symbol > * symbol = dynamic_cast<const UnboundedRegExpSymbol < alphabet::Symbol >*>( & node ); + const UnboundedRegExpSymbol < SymbolType > * symbol = dynamic_cast<const UnboundedRegExpSymbol < SymbolType >*>( & node ); if( symbol ) return optimizeInner( * symbol ); - const UnboundedRegExpEmpty < alphabet::Symbol > * empty = dynamic_cast<const UnboundedRegExpEmpty < alphabet::Symbol >*>( & node ); + const UnboundedRegExpEmpty < SymbolType > * empty = dynamic_cast<const UnboundedRegExpEmpty < SymbolType >*>( & node ); if( empty ) return optimizeInner( * empty ); - const UnboundedRegExpEpsilon < alphabet::Symbol > * eps = dynamic_cast<const UnboundedRegExpEpsilon < alphabet::Symbol >*>( & node ); + const UnboundedRegExpEpsilon < SymbolType > * eps = dynamic_cast<const UnboundedRegExpEpsilon < SymbolType >*>( & node ); if( eps ) return optimizeInner( * eps ); - throw exception::CommonException( "RegExpOptimize::optimize - unknown UnboundedRegExpElement < alphabet::Symbol > node" ); + throw exception::CommonException( "RegExpOptimize::optimize - unknown UnboundedRegExpElement < SymbolType > node" ); } -UnboundedRegExpElement < alphabet::Symbol > * RegExpOptimize::optimizeInner( const UnboundedRegExpAlternation < alphabet::Symbol > & node ) { - UnboundedRegExpAlternation < alphabet::Symbol >* alt = new UnboundedRegExpAlternation < alphabet::Symbol >( ); +template < class SymbolType > +UnboundedRegExpElement < SymbolType > * RegExpOptimize::optimizeInner( const UnboundedRegExpAlternation < SymbolType > & node ) { + UnboundedRegExpAlternation < SymbolType >* alt = new UnboundedRegExpAlternation < SymbolType >( ); for( const auto & child : node.getElements ( ) ) - alt->pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( * child ) ) ); + alt->pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( * child ) ) ); // optimize while you can while( A1( * alt ) || A2( * alt ) || A3( * alt ) || A4( * alt ) || A10( * alt ) || V2( * alt ) || V5( * alt ) || V6( * alt ) || X1( * alt ) ); if( alt->getElements ( ).size( ) == 1 ) { - UnboundedRegExpElement < alphabet::Symbol >* ret = alt->getChildren ( ).front ( ).release ( ); + UnboundedRegExpElement < SymbolType >* ret = alt->getChildren ( ).front ( ).release ( ); delete alt; return ret; } if( alt->getElements ( ).size( ) == 0 ) { delete alt; - return new UnboundedRegExpEmpty < alphabet::Symbol >( ); + return new UnboundedRegExpEmpty < SymbolType >( ); } return alt; } -UnboundedRegExpElement < alphabet::Symbol > * RegExpOptimize::optimizeInner( const UnboundedRegExpConcatenation < alphabet::Symbol > & node ) { - UnboundedRegExpConcatenation < alphabet::Symbol >* concat = new UnboundedRegExpConcatenation < alphabet::Symbol >( ); +template < class SymbolType > +UnboundedRegExpElement < SymbolType > * RegExpOptimize::optimizeInner( const UnboundedRegExpConcatenation < SymbolType > & node ) { + UnboundedRegExpConcatenation < SymbolType >* concat = new UnboundedRegExpConcatenation < SymbolType >( ); for( const auto & child : node.getElements ( ) ) - concat->pushBackChild ( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( * child ) ) ); + concat->pushBackChild ( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( * child ) ) ); while( A5( * concat ) || A6( * concat ) || A7( * concat ) || A8( * concat ) || A9( * concat ) || V8( * concat ) );//|| V9( * concat ) ); if( concat->getElements ( ).size( ) == 1 ) { - UnboundedRegExpElement < alphabet::Symbol >* ret = concat->getChildren ( ).front( ).release(); + UnboundedRegExpElement < SymbolType >* ret = concat->getChildren ( ).front( ).release(); delete concat; return ret; } if( concat->getElements ( ).size( ) == 0 ) { delete concat; - return new UnboundedRegExpEpsilon < alphabet::Symbol >( ); + return new UnboundedRegExpEpsilon < SymbolType >( ); } return concat; } -UnboundedRegExpElement < alphabet::Symbol > * RegExpOptimize::optimizeInner( const UnboundedRegExpIteration < alphabet::Symbol > & node ) { - UnboundedRegExpIteration < alphabet::Symbol >* iter = new UnboundedRegExpIteration < alphabet::Symbol >( UnboundedRegExpEmpty < alphabet::Symbol > {} ); - iter->setChild ( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner ( * node.getChild ( ) ) ) ); +template < class SymbolType > +UnboundedRegExpElement < SymbolType > * RegExpOptimize::optimizeInner( const UnboundedRegExpIteration < SymbolType > & node ) { + UnboundedRegExpIteration < SymbolType >* iter = new UnboundedRegExpIteration < SymbolType >( UnboundedRegExpEmpty < SymbolType > {} ); + iter->setChild ( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner ( * node.getChild ( ) ) ) ); do { // V1 is implemented right here - if( dynamic_cast<UnboundedRegExpEmpty < alphabet::Symbol >*>( iter->getChild ( ).get() ) ) { + if( dynamic_cast<UnboundedRegExpEmpty < SymbolType >*>( iter->getChild ( ).get() ) ) { delete iter; - return new UnboundedRegExpEpsilon < alphabet::Symbol >( ); + return new UnboundedRegExpEpsilon < SymbolType >( ); } // T1 is implemented right here \e* = \e - if( dynamic_cast<UnboundedRegExpEpsilon < alphabet::Symbol >*>( iter->getChild ( ).get() ) ) { + if( dynamic_cast<UnboundedRegExpEpsilon < SymbolType >*>( iter->getChild ( ).get() ) ) { delete iter; - return new UnboundedRegExpEpsilon < alphabet::Symbol >( ); + return new UnboundedRegExpEpsilon < SymbolType >( ); } } while( A11( * iter ) || V1( * iter ) || V3( * iter ) || V4( * iter ) || V10( * iter ) ); return iter; } -UnboundedRegExpElement < alphabet::Symbol > * RegExpOptimize::optimizeInner( const UnboundedRegExpSymbol < alphabet::Symbol > & node ) { +template < class SymbolType > +UnboundedRegExpElement < SymbolType > * RegExpOptimize::optimizeInner( const UnboundedRegExpSymbol < SymbolType > & node ) { return node.clone( ); } -UnboundedRegExpElement < alphabet::Symbol > * RegExpOptimize::optimizeInner( const UnboundedRegExpEmpty < alphabet::Symbol > & node ) { +template < class SymbolType > +UnboundedRegExpElement < SymbolType > * RegExpOptimize::optimizeInner( const UnboundedRegExpEmpty < SymbolType > & node ) { return node.clone( ); } -UnboundedRegExpElement < alphabet::Symbol > * RegExpOptimize::optimizeInner( const UnboundedRegExpEpsilon < alphabet::Symbol > & node ) { +template < class SymbolType > +UnboundedRegExpElement < SymbolType > * RegExpOptimize::optimizeInner( const UnboundedRegExpEpsilon < SymbolType > & node ) { return node.clone( ); } /** * optimization A1: x + ( y + z ) = ( x + y ) + z = x + y + z - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A1( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::A1( UnboundedRegExpAlternation < SymbolType > & node ) { bool optimized = false; for( auto it = node.getChildren ( ).begin( ); it != node.getChildren ( ).end( ); ) { - if( dynamic_cast < UnboundedRegExpAlternation < alphabet::Symbol > * > ( it->get ( ) ) ) { - std::smart_ptr < UnboundedRegExpAlternation < alphabet::Symbol > > childAlt ( static_cast < UnboundedRegExpAlternation < alphabet::Symbol > * >( it->release() ) ); + if( dynamic_cast < UnboundedRegExpAlternation < SymbolType > * > ( it->get ( ) ) ) { + std::smart_ptr < UnboundedRegExpAlternation < SymbolType > > childAlt ( static_cast < UnboundedRegExpAlternation < SymbolType > * >( it->release() ) ); it = node.getChildren ( ).erase( it ); it = node.insert( it, std::make_move_iterator(childAlt->getChildren ( ).begin ( ) ), std::make_move_iterator(childAlt->getChildren ( ).end ( ) ) ); @@ -167,11 +178,12 @@ bool RegExpOptimize::A1( UnboundedRegExpAlternation < alphabet::Symbol > & node /** * optimization A2: x + y = y + x (sort) - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A2( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { - auto cmp = [ ]( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a, const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & b ) -> bool { return *a < *b; }; +template < class SymbolType > +bool RegExpOptimize::A2( UnboundedRegExpAlternation < SymbolType > & node ) { + auto cmp = [ ]( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a, const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & b ) -> bool { return *a < *b; }; if( std::is_sorted( node.getChildren ( ).begin( ), node.getChildren ( ).end( ), cmp ) ) return false; @@ -182,16 +194,17 @@ bool RegExpOptimize::A2( UnboundedRegExpAlternation < alphabet::Symbol > & node /** * optimization A3: x + \0 = x - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A3( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::A3( UnboundedRegExpAlternation < SymbolType > & node ) { bool optimized = false; // alternation with no children is efectively \0 for( auto it = node.getChildren ( ).begin( ); it != node.getChildren ( ).end( ); ) { - if( dynamic_cast < UnboundedRegExpEmpty < alphabet::Symbol > * >( it->get ( ) ) ) { + if( dynamic_cast < UnboundedRegExpEmpty < SymbolType > * >( it->get ( ) ) ) { it = node.getChildren ( ).erase( it ); optimized = true; @@ -204,10 +217,11 @@ bool RegExpOptimize::A3( UnboundedRegExpAlternation < alphabet::Symbol > & node /** * optimization A4: x + x = x - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A4( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::A4( UnboundedRegExpAlternation < SymbolType > & node ) { /* * two ways of implementing this opitimization: * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts... @@ -215,7 +229,7 @@ bool RegExpOptimize::A4( UnboundedRegExpAlternation < alphabet::Symbol > & node * * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2 */ - auto cmp = [ ]( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a, const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & b ) -> bool { return *a == *b; }; + auto cmp = [ ]( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a, const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & b ) -> bool { return *a == *b; }; size_t size = node.getChildren ( ).size ( ); node.getChildren ( ).erase ( std::unique ( node.getChildren ( ).begin ( ), node.getChildren ( ).end ( ), cmp), node.getChildren ( ).end( ) ); @@ -225,15 +239,16 @@ bool RegExpOptimize::A4( UnboundedRegExpAlternation < alphabet::Symbol > & node /** * optimization A5: x.(y.z) = (x.y).z = x.y.z - * @param node UnboundedRegExpConcatenation < alphabet::Symbol > node + * @param node UnboundedRegExpConcatenation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A5( UnboundedRegExpConcatenation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::A5( UnboundedRegExpConcatenation < SymbolType > & node ) { bool optimized = false; for( auto it = node.getChildren ( ).begin( ); it != node.getChildren ( ).end( ); ) { - if( dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol > *>( it->get() ) ) { - std::smart_ptr < UnboundedRegExpConcatenation < alphabet::Symbol > > childConcat ( static_cast<UnboundedRegExpConcatenation < alphabet::Symbol > *>( it->release() ) ); + if( dynamic_cast<UnboundedRegExpConcatenation < SymbolType > *>( it->get() ) ) { + std::smart_ptr < UnboundedRegExpConcatenation < SymbolType > > childConcat ( static_cast<UnboundedRegExpConcatenation < SymbolType > *>( it->release() ) ); it = node.getChildren ( ).erase( it ); it = node.insert( it, std::make_move_iterator(childConcat->getChildren ( ).begin( )), std::make_move_iterator(childConcat->getChildren ( ).end( ) )); @@ -248,16 +263,17 @@ bool RegExpOptimize::A5( UnboundedRegExpConcatenation < alphabet::Symbol > & nod /** * optimization A6: \e.x = x.\e = x - * @param node UnboundedRegExpConcatenation < alphabet::Symbol > node + * @param node UnboundedRegExpConcatenation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A6( UnboundedRegExpConcatenation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::A6( UnboundedRegExpConcatenation < SymbolType > & node ) { bool optimized = false; // concatenation with no children is efectively \e for( auto it = node.getChildren ( ).begin( ); it != node.getChildren ( ).end( ); ) { - if( dynamic_cast < UnboundedRegExpEpsilon < alphabet::Symbol > * >( it->get ( ) ) ) { + if( dynamic_cast < UnboundedRegExpEpsilon < SymbolType > * >( it->get ( ) ) ) { it = node.getChildren ( ).erase( it ); optimized = true; @@ -270,15 +286,16 @@ bool RegExpOptimize::A6( UnboundedRegExpConcatenation < alphabet::Symbol > & nod /** * optimization A7: \0.x = x.\0 = \0 - * @param node UnboundedRegExpConcatenation < alphabet::Symbol > node + * @param node UnboundedRegExpConcatenation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A7( UnboundedRegExpConcatenation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::A7( UnboundedRegExpConcatenation < SymbolType > & node ) { if(node.getChildren ( ).size() == 1) return false; - if( std::any_of( node.getChildren ( ).begin( ), node.getChildren ( ).end( ), []( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool{ return dynamic_cast < UnboundedRegExpEmpty < alphabet::Symbol > * >( a.get() ); } ) ) { + if( std::any_of( node.getChildren ( ).begin( ), node.getChildren ( ).end( ), []( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool{ return dynamic_cast < UnboundedRegExpEmpty < SymbolType > * >( a.get() ); } ) ) { node.getChildren ( ).clear( ); - node.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEmpty < alphabet::Symbol >( ) ) ); + node.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( new UnboundedRegExpEmpty < SymbolType >( ) ) ); return true; } @@ -288,16 +305,17 @@ bool RegExpOptimize::A7( UnboundedRegExpConcatenation < alphabet::Symbol > & nod /** * optimization A8: x.(y+z) = x.y + x.z - * @param node UnboundedRegExpConcatenation < alphabet::Symbol > node + * @param node UnboundedRegExpConcatenation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A8( UnboundedRegExpConcatenation < alphabet::Symbol > & /* node */) { +template < class SymbolType > +bool RegExpOptimize::A8( UnboundedRegExpConcatenation < SymbolType > & /* node */) { /* bool optimized = false; for( auto it = std::next( node->elements.begin( ) ); it != node->elements.end( ); ) { - UnboundedRegExpAlternation < alphabet::Symbol > * alt = dynamic_cast<UnboundedRegExpAlternation < alphabet::Symbol >*>( * it ); + UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast<UnboundedRegExpAlternation < SymbolType >*>( * it ); if( ! alt ) { it ++; @@ -305,19 +323,19 @@ bool RegExpOptimize::A8( UnboundedRegExpConcatenation < alphabet::Symbol > & /* } // take everything to the left and copy it as prefix of every element in alternation. - UnboundedRegExpConcatenation < alphabet::Symbol > * leftPart = new UnboundedRegExpConcatenation < alphabet::Symbol >( ); + UnboundedRegExpConcatenation < SymbolType > * leftPart = new UnboundedRegExpConcatenation < SymbolType >( ); leftPart->elements.insert( leftPart->elements.end( ), node->elements.begin( ), it ); for( auto altIt = alt->elements.begin( ); altIt != alt->elements.end( ); altIt ++ ) { - UnboundedRegExpConcatenation < alphabet::Symbol > * altElem = new UnboundedRegExpConcatenation < alphabet::Symbol >( ); + UnboundedRegExpConcatenation < SymbolType > * altElem = new UnboundedRegExpConcatenation < SymbolType >( ); altElem->elements.push_back( leftPart->clone( ) ); altElem->elements.push_back( * altIt ); * altIt = altElem; } - UnboundedRegExpElement < alphabet::Symbol > * optIt = optimize( * it ); + UnboundedRegExpElement < SymbolType > * optIt = optimize( * it ); delete *it; *it = optIt; @@ -335,16 +353,17 @@ bool RegExpOptimize::A8( UnboundedRegExpConcatenation < alphabet::Symbol > & /* /** * optimization A9: (x+y).z = x.z + y.z - * @param node UnboundedRegExpConcatenation < alphabet::Symbol > node + * @param node UnboundedRegExpConcatenation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A9( UnboundedRegExpConcatenation < alphabet::Symbol > & /* node */) { +template < class SymbolType > +bool RegExpOptimize::A9( UnboundedRegExpConcatenation < SymbolType > & /* node */) { /* bool optimized = false; for( auto it = node->elements.begin( ); it != std::prev( node->elements.end( ) ); ) { - UnboundedRegExpAlternation < alphabet::Symbol > * alt = dynamic_cast<UnboundedRegExpAlternation < alphabet::Symbol >*>( * it ); + UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast<UnboundedRegExpAlternation < SymbolType >*>( * it ); if( ! alt ) { it ++; @@ -352,19 +371,19 @@ bool RegExpOptimize::A9( UnboundedRegExpConcatenation < alphabet::Symbol > & /* } // take everything to the right and copy it as suffix of every element in alternation. - UnboundedRegExpConcatenation < alphabet::Symbol > * rest = new UnboundedRegExpConcatenation < alphabet::Symbol >( ); + UnboundedRegExpConcatenation < SymbolType > * rest = new UnboundedRegExpConcatenation < SymbolType >( ); rest->elements.insert( rest->elements.end( ), std::next( it ), node->elements.end( ) ); for( auto altIt = alt->elements.begin( ); altIt != alt->elements.end( ); altIt ++ ) { - UnboundedRegExpConcatenation < alphabet::Symbol > * altElem = new UnboundedRegExpConcatenation < alphabet::Symbol >( ); + UnboundedRegExpConcatenation < SymbolType > * altElem = new UnboundedRegExpConcatenation < SymbolType >( ); altElem->elements.push_back( * altIt ); altElem->elements.push_back( rest->clone( ) ); * altIt = altElem; } - UnboundedRegExpElement < alphabet::Symbol > * optIt = optimize( * it ); + UnboundedRegExpElement < SymbolType > * optIt = optimize( * it ); delete *it; *it = optIt; @@ -373,7 +392,7 @@ bool RegExpOptimize::A9( UnboundedRegExpConcatenation < alphabet::Symbol > & /* optimized = true; // as we move (delete) the rest of this expression, it surely wont do another round. More optimizations to be performerd are in subtree now. - // we do not care about this here as method optimize(UnboundedRegExpAlternation < alphabet::Symbol >) will take care of this in next iteration + // we do not care about this here as method optimize(UnboundedRegExpAlternation < SymbolType >) will take care of this in next iteration // it ++; break; } @@ -385,10 +404,11 @@ bool RegExpOptimize::A9( UnboundedRegExpConcatenation < alphabet::Symbol > & /* /** * optimization A10: x* = \e + x*x - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A10( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::A10( UnboundedRegExpAlternation < SymbolType > & node ) { bool optimized = false; /* @@ -398,27 +418,27 @@ bool RegExpOptimize::A10( UnboundedRegExpAlternation < alphabet::Symbol > & node */ // check if we have some epsilon or iteration left, else nothing to do - auto eps = find_if( node.getElements().begin( ), node.getElements().end( ), [ ]( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool { - return dynamic_cast<UnboundedRegExpEpsilon < alphabet::Symbol > const *>( a.get() ) || dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol > const*>( a.get() ); + auto eps = find_if( node.getElements().begin( ), node.getElements().end( ), [ ]( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool { + return dynamic_cast<UnboundedRegExpEpsilon < SymbolType > const *>( a.get() ) || dynamic_cast<UnboundedRegExpIteration < SymbolType > const*>( a.get() ); }); if( eps == node.getElements().end( ) ) return false; for( unsigned i = 0; i < node.getChildren ( ).size ( ); i++ ) { - UnboundedRegExpConcatenation < alphabet::Symbol > * childConcat = dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol > *>( node.getChildren ( ) [ i ].get ( ) ); + UnboundedRegExpConcatenation < SymbolType > * childConcat = dynamic_cast<UnboundedRegExpConcatenation < SymbolType > *>( node.getChildren ( ) [ i ].get ( ) ); if( ! childConcat ) continue; // if iteration is first element of concatenation - UnboundedRegExpIteration < alphabet::Symbol > * iter = dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol > *>( childConcat->getElements().front( ).get() ); + UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast<UnboundedRegExpIteration < SymbolType > *>( childConcat->getElements().front( ).get() ); if( ! iter ) continue; // concatenation without the iteration node - UnboundedRegExpConcatenation < alphabet::Symbol > tmpConcat ( * childConcat ); + UnboundedRegExpConcatenation < SymbolType > tmpConcat ( * childConcat ); tmpConcat.getChildren ( ).erase( tmpConcat.getChildren ( ).begin( ) ); - UnboundedRegExpElement < alphabet::Symbol > * tmpConcatOpt = optimizeInner ( tmpConcat ); + UnboundedRegExpElement < SymbolType > * tmpConcatOpt = optimizeInner ( tmpConcat ); // check if the iteration element is the same as the rest of the concatenation if( iter->getElement() == * tmpConcatOpt ) { @@ -435,16 +455,17 @@ bool RegExpOptimize::A10( UnboundedRegExpAlternation < alphabet::Symbol > & node /** * optimization A11: x* = (\e + x)* - * @param node UnboundedRegExpIteration < alphabet::Symbol > node + * @param node UnboundedRegExpIteration < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::A11( UnboundedRegExpIteration < alphabet::Symbol > & node ) { - UnboundedRegExpAlternation < alphabet::Symbol > * childAlt = dynamic_cast<UnboundedRegExpAlternation < alphabet::Symbol > *>( node.getChild ( ).get() ); +template < class SymbolType > +bool RegExpOptimize::A11( UnboundedRegExpIteration < SymbolType > & node ) { + UnboundedRegExpAlternation < SymbolType > * childAlt = dynamic_cast<UnboundedRegExpAlternation < SymbolType > *>( node.getChild ( ).get() ); if( childAlt ) { // check if eps inside iteration's alternation - auto eps = find_if( childAlt->getChildren ( ).begin( ), childAlt->getChildren ( ).end( ), [ ]( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool { - return dynamic_cast<UnboundedRegExpEpsilon < alphabet::Symbol > *>( a.get() ); + auto eps = find_if( childAlt->getChildren ( ).begin( ), childAlt->getChildren ( ).end( ), [ ]( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool { + return dynamic_cast<UnboundedRegExpEpsilon < SymbolType > *>( a.get() ); }); // if no eps @@ -461,21 +482,23 @@ bool RegExpOptimize::A11( UnboundedRegExpIteration < alphabet::Symbol > & node ) /** * optimization V1: \0* = \e - * @param node UnboundedRegExpIteration < alphabet::Symbol > node + * @param node UnboundedRegExpIteration < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V1( UnboundedRegExpIteration < alphabet::Symbol > &) { - // implemented in optimize( UnboundedRegExpIteration < alphabet::Symbol > ) +template < class SymbolType > +bool RegExpOptimize::V1( UnboundedRegExpIteration < SymbolType > &) { + // implemented in optimize( UnboundedRegExpIteration < SymbolType > ) return false; } /** * optimization V2: x* + x = x* - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V2( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::V2( UnboundedRegExpAlternation < SymbolType > & node ) { bool optimized = false; /* @@ -483,14 +506,14 @@ bool RegExpOptimize::V2( UnboundedRegExpAlternation < alphabet::Symbol > & node * We need also to cover the cases like ( a + b + d )* + ( e )* + a + b + c + e = ( a + b + d )* + ( e )* + c */ - std::vector < UnboundedRegExpElement < alphabet::Symbol > * > iterElements; + std::vector < UnboundedRegExpElement < SymbolType > * > iterElements; // cache iter elements because of operator invalidation after erase - for( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & n : node.getElements ( ) ) { - UnboundedRegExpIteration < alphabet::Symbol > * iter = dynamic_cast < UnboundedRegExpIteration < alphabet::Symbol > * > ( n.get ( ) ); + for( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & n : node.getElements ( ) ) { + UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( n.get ( ) ); if( iter ) { - UnboundedRegExpAlternation < alphabet::Symbol > * inner = dynamic_cast < UnboundedRegExpAlternation < alphabet::Symbol > * > ( iter->getChild ( ).get ( ) ); + UnboundedRegExpAlternation < SymbolType > * inner = dynamic_cast < UnboundedRegExpAlternation < SymbolType > * > ( iter->getChild ( ).get ( ) ); if ( inner ) - for ( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & innerElement : inner->getElements ( ) ) + for ( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & innerElement : inner->getElements ( ) ) iterElements.push_back ( innerElement.get() ); else iterElements.push_back ( iter->getChild ( ).get ( ) ); @@ -498,8 +521,8 @@ bool RegExpOptimize::V2( UnboundedRegExpAlternation < alphabet::Symbol > & node } } - for( UnboundedRegExpElement < alphabet::Symbol > * n : iterElements ) { - auto it = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ n ] ( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool { + for( UnboundedRegExpElement < SymbolType > * n : iterElements ) { + auto it = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ n ] ( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool { return *a == *n; }); @@ -516,11 +539,12 @@ bool RegExpOptimize::V2( UnboundedRegExpAlternation < alphabet::Symbol > & node /** * optimization V3: x** = x* - * @param node UnboundedRegExpIteration < alphabet::Symbol > node + * @param node UnboundedRegExpIteration < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V3( UnboundedRegExpIteration < alphabet::Symbol > & node ) { - UnboundedRegExpIteration < alphabet::Symbol >* childIter = dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol >*>( node.getChild ( ).get() ); +template < class SymbolType > +bool RegExpOptimize::V3( UnboundedRegExpIteration < SymbolType > & node ) { + UnboundedRegExpIteration < SymbolType >* childIter = dynamic_cast<UnboundedRegExpIteration < SymbolType >*>( node.getChild ( ).get() ); if( childIter ) { node.setChild ( std::move ( childIter->getChild ( ) ) ); @@ -532,31 +556,33 @@ bool RegExpOptimize::V3( UnboundedRegExpIteration < alphabet::Symbol > & node ) /** * optimization V4: (x+y)* = (x*y*)* - * @param node UnboundedRegExpIteration < alphabet::Symbol > node + * @param node UnboundedRegExpIteration < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V4( UnboundedRegExpIteration < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::V4( UnboundedRegExpIteration < SymbolType > & node ) { // interpretation: if iteration's element is concat and every concat's element is iteration - UnboundedRegExpConcatenation < alphabet::Symbol >* cont = dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol >*>( node.getChild ( ).get() ); - if( ! cont || ! all_of( cont->getChildren ( ).begin( ), cont->getChildren ( ).end( ), [] ( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool{ return dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol > * >( a.get() ); } ) ) + UnboundedRegExpConcatenation < SymbolType >* cont = dynamic_cast<UnboundedRegExpConcatenation < SymbolType >*>( node.getChild ( ).get() ); + if( ! cont || ! all_of( cont->getChildren ( ).begin( ), cont->getChildren ( ).end( ), [] ( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool{ return dynamic_cast<UnboundedRegExpIteration < SymbolType > * >( a.get() ); } ) ) return false; - UnboundedRegExpAlternation < alphabet::Symbol > newAlt; + UnboundedRegExpAlternation < SymbolType > newAlt; for( const auto & n : cont->getChildren ( ) ) - newAlt.pushBackChild ( std::move ( static_cast < UnboundedRegExpIteration < alphabet::Symbol > * > ( n.get ( ) )->getChild ( ) ) ); + newAlt.pushBackChild ( std::move ( static_cast < UnboundedRegExpIteration < SymbolType > * > ( n.get ( ) )->getChild ( ) ) ); - node.setChild ( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner ( newAlt ) ) ); + node.setChild ( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner ( newAlt ) ) ); return true; } /** * optimization V5: x*y = y + x*xy - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V5( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::V5( UnboundedRegExpAlternation < SymbolType > & node ) { bool optimized = false; // reinterpretation: ax*y = ay+ax*xy @@ -567,14 +593,14 @@ bool RegExpOptimize::V5( UnboundedRegExpAlternation < alphabet::Symbol > & node // prefix.x*x.suffix + prefix.suffix = prefix.x*.suffix for( auto itA = node.getChildren().begin( ); itA != node.getChildren().end( ); ) { - UnboundedRegExpConcatenation < alphabet::Symbol > * concat = dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol >*>( itA->get() ); + UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast<UnboundedRegExpConcatenation < SymbolType >*>( itA->get() ); if( ! concat ) { ++ itA; continue; } for( auto itC = concat->getChildren().begin( ); itC != std::prev( concat->getChildren().end( ) ); ) { - UnboundedRegExpIteration < alphabet::Symbol > * iter = dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol >*>( itC->get() ); + UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast<UnboundedRegExpIteration < SymbolType >*>( itC->get() ); if( ! iter ) { ++ itC; continue; @@ -584,12 +610,12 @@ bool RegExpOptimize::V5( UnboundedRegExpAlternation < alphabet::Symbol > & node auto itStartY = std::next( itC ); //itStartY points to y in expression x*xy // if iter's element is concat - if( dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol >*>( iter->getChild().get() ) ) { - UnboundedRegExpConcatenation < alphabet::Symbol > * iterConcat = dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol >*>( iter->getChild().get() ); + if( dynamic_cast<UnboundedRegExpConcatenation < SymbolType >*>( iter->getChild().get() ) ) { + UnboundedRegExpConcatenation < SymbolType > * iterConcat = dynamic_cast<UnboundedRegExpConcatenation < SymbolType >*>( iter->getChild().get() ); if( iterConcat->getChildren().size( ) != ( unsigned ) distance( std::next( itC ), concat->getChildren().end( ) ) || ! equal( iterConcat->getChildren().begin( ), iterConcat->getChildren().end( ), std::next( itC ), - [ ]( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a, const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & b ) -> bool{ return *a == *b; } ) ) { + [ ]( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a, const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & b ) -> bool{ return *a == *b; } ) ) { ++ itC; continue; } @@ -604,28 +630,28 @@ bool RegExpOptimize::V5( UnboundedRegExpAlternation < alphabet::Symbol > & node } // store everything before iteration as "a" - UnboundedRegExpConcatenation < alphabet::Symbol > tmpAY; + UnboundedRegExpConcatenation < SymbolType > tmpAY; if( concat->getChildren().begin( ) == itC ) { - tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEpsilon < alphabet::Symbol >( ) ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( new UnboundedRegExpEpsilon < SymbolType >( ) ) ); } else { - UnboundedRegExpConcatenation < alphabet::Symbol > tmpA; + UnboundedRegExpConcatenation < SymbolType > tmpA; tmpA.insert( tmpA.getChildren().end( ), concat->getChildren().begin( ), itC ); - tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( tmpA ) ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( tmpA ) ) ); } // store everything behind iteration's followup element as "y" if( itStartY == concat->getChildren().end( ) ) { - tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEpsilon < alphabet::Symbol >( ) ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( new UnboundedRegExpEpsilon < SymbolType >( ) ) ); } else { - UnboundedRegExpConcatenation < alphabet::Symbol > tmpY; + UnboundedRegExpConcatenation < SymbolType > tmpY; tmpY.insert( tmpY.getChildren().end( ), itStartY, concat->getChildren().end( ) ); - tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( tmpY ) ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( tmpY ) ) ); } // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node.getChildren() ) - UnboundedRegExpElement < alphabet::Symbol > * regexpAY = optimizeInner( tmpAY ); + UnboundedRegExpElement < SymbolType > * regexpAY = optimizeInner( tmpAY ); - auto iterAY = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ regexpAY ] ( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool{ return *a == *regexpAY; } ); + auto iterAY = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ regexpAY ] ( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool{ return *a == *regexpAY; } ); delete regexpAY; if( iterAY == node.getChildren().end( ) ) { ++ itC; @@ -635,7 +661,7 @@ bool RegExpOptimize::V5( UnboundedRegExpAlternation < alphabet::Symbol > & node tmpAY.insert( tmpAY.getChildren ( ).begin ( ) + 1, * itC ); - node.setChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( tmpAY ) ), itA ); + node.setChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( tmpAY ) ), itA ); itA = node.getChildren().erase( iterAY ); @@ -651,10 +677,11 @@ bool RegExpOptimize::V5( UnboundedRegExpAlternation < alphabet::Symbol > & node /** * optimization V6: x*y = y + xx*y - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V6( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::V6( UnboundedRegExpAlternation < SymbolType > & node ) { bool optimized = false; // reinterpretation: ax*y = ay+axx*y @@ -665,14 +692,14 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation < alphabet::Symbol > & node // prefix.xx*.suffix + prefix.suffix = prefix.x*.suffix for( auto itA = node.getChildren ( ).begin( ); itA != node.getChildren ( ).end( ); ) { - UnboundedRegExpConcatenation < alphabet::Symbol > * concat = dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol >*>( itA->get() ); + UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast<UnboundedRegExpConcatenation < SymbolType >*>( itA->get() ); if( ! concat ) { ++ itA; continue; } for( auto itC = std::next( concat->getChildren ( ).begin( ) ); itC != concat->getChildren ( ).end( ); ) { - UnboundedRegExpIteration < alphabet::Symbol > * iter = dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol >*>( itC->get() ); + UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast<UnboundedRegExpIteration < SymbolType >*>( itC->get() ); if( ! iter ) { ++ itC; continue; @@ -682,7 +709,7 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation < alphabet::Symbol > & node auto itStartX = itC; //itStartX points to first x in expression xx*, everything before is therefore prefix - regexp "a" // if iter's element is concat - UnboundedRegExpConcatenation < alphabet::Symbol > * iterConcat = dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol >*>( iter->getChild ( ).get() ); + UnboundedRegExpConcatenation < SymbolType > * iterConcat = dynamic_cast<UnboundedRegExpConcatenation < SymbolType >*>( iter->getChild ( ).get() ); if( iterConcat ) { if( distance( concat->getChildren ( ).begin( ), itC ) < (int) iterConcat->getChildren ( ).size( ) ) { @@ -693,7 +720,7 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation < alphabet::Symbol > & node if( iterConcat->getChildren ( ).size( ) != ( unsigned ) distance( itStartX, concat->getChildren ( ).end( ) ) || ! equal( iterConcat->getChildren().begin( ), iterConcat->getChildren().end( ), itStartX, - []( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a, const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & b ) -> bool{ return *a == *b; } ) ) { + []( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a, const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & b ) -> bool{ return *a == *b; } ) ) { ++ itC; continue; } @@ -707,26 +734,26 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation < alphabet::Symbol > & node } // concatenate "a" and "y" and see if they exist somewhere in parent alternation ( node->getChildren() ) - UnboundedRegExpConcatenation < alphabet::Symbol > tmpAY; + UnboundedRegExpConcatenation < SymbolType > tmpAY; if( concat->getChildren().begin( ) == itStartX ) { - tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEpsilon < alphabet::Symbol >( ) ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( new UnboundedRegExpEpsilon < SymbolType >( ) ) ); } else { - UnboundedRegExpConcatenation < alphabet::Symbol > tmpA; + UnboundedRegExpConcatenation < SymbolType > tmpA; tmpA.insert( tmpA.getChildren().end( ), concat->getChildren().begin( ), itStartX ); - tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( tmpA ) ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( tmpA ) ) ); } if( std::next( itC ) == concat->getChildren().end( ) ) { - tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( new UnboundedRegExpEpsilon < alphabet::Symbol >( ) ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( new UnboundedRegExpEpsilon < SymbolType >( ) ) ); } else { - UnboundedRegExpConcatenation < alphabet::Symbol > tmpY; + UnboundedRegExpConcatenation < SymbolType > tmpY; tmpY.insert( tmpY.getChildren().end( ), std::next( itC ), concat->getChildren ( ).end( ) ); - tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( tmpY ) ) ); + tmpAY.pushBackChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( tmpY ) ) ); } - UnboundedRegExpElement < alphabet::Symbol > * regexpAY = optimizeInner( tmpAY ); + UnboundedRegExpElement < SymbolType > * regexpAY = optimizeInner( tmpAY ); - auto iterAY = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ regexpAY ] ( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool{ return *a == *regexpAY; } ); + auto iterAY = find_if( node.getChildren().begin( ), node.getChildren().end( ), [ regexpAY ] ( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool{ return *a == *regexpAY; } ); delete regexpAY; if( iterAY == node.getChildren().end( ) ) { ++ itC; @@ -737,7 +764,7 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation < alphabet::Symbol > & node // if so make a x* y and replace a x x* y tmpAY.insert( tmpAY.getChildren ( ).begin ( ) + 1, * itC ); - node.setChild( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > ( optimizeInner( tmpAY ) ), itA ); + node.setChild( std::smart_ptr < UnboundedRegExpElement < SymbolType > > ( optimizeInner( tmpAY ) ), itA ); // remove a y itA = node.getChildren().erase( iterAY ); @@ -754,10 +781,11 @@ bool RegExpOptimize::V6( UnboundedRegExpAlternation < alphabet::Symbol > & node /** * optimization V8: \e in h(x) => xx*=x* - * @param node UnboundedRegExpConcatenation < alphabet::Symbol > node + * @param node UnboundedRegExpConcatenation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V8( UnboundedRegExpConcatenation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::V8( UnboundedRegExpConcatenation < SymbolType > & node ) { bool optimized = false; // interpretation: if there is iteration in concatenation node, and element of iteration contains eps and is straight before this iteration, then this element can be omitted @@ -766,7 +794,7 @@ bool RegExpOptimize::V8( UnboundedRegExpConcatenation < alphabet::Symbol > & nod return false; for( auto it = next ( node.getChildren ( ).begin( ) ); it != node.getChildren ( ).end( ); ) { - UnboundedRegExpIteration < alphabet::Symbol >* iter = dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol >*>( it->get() ); + UnboundedRegExpIteration < SymbolType >* iter = dynamic_cast<UnboundedRegExpIteration < SymbolType >*>( it->get() ); if( ! iter ) { ++ it; @@ -774,7 +802,7 @@ bool RegExpOptimize::V8( UnboundedRegExpConcatenation < alphabet::Symbol > & nod } // if element of iteration is concatenation, we need to check this specially - UnboundedRegExpConcatenation < alphabet::Symbol > * concat = dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol >*>( iter->getChild ( ).get() ); + UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast<UnboundedRegExpConcatenation < SymbolType >*>( iter->getChild ( ).get() ); if( concat ) { // check if not out of bounds @@ -789,7 +817,7 @@ bool RegExpOptimize::V8( UnboundedRegExpConcatenation < alphabet::Symbol > & nod if( regexp::properties::RegExpEpsilon::languageContainsEpsilon(*concat) && concat->getChildren().size ( ) == ( unsigned ) distance ( it2, node.getChildren ( ).end( ) ) && - equal( concat->getChildren ( ).begin( ), concat->getChildren ( ).end( ), it2, [] ( const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a, const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & b ) -> bool { return *a == *b; } ) ) { + equal( concat->getChildren ( ).begin( ), concat->getChildren ( ).end( ), it2, [] ( const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a, const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & b ) -> bool { return *a == *b; } ) ) { optimized = true; it = node.getChildren().erase( it2, it ); @@ -815,22 +843,23 @@ bool RegExpOptimize::V8( UnboundedRegExpConcatenation < alphabet::Symbol > & nod /** * optimization V9: (xy)*x = x(yx)* - * @param node UnboundedRegExpConcatenation < alphabet::Symbol > node + * @param node UnboundedRegExpConcatenation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V9( UnboundedRegExpConcatenation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::V9( UnboundedRegExpConcatenation < SymbolType > & node ) { bool optimized = false; // interpretation: if concat (C1) with iter && iteration's element is concat (C2), then: // simultaneously iterate through C1 and C2. (axy)*axz=ax(yax)*z -> get ax that is same and relocate them... for( auto it = node.getChildren().begin( ) ; it != node.getChildren().end( ) ; ) { - UnboundedRegExpIteration < alphabet::Symbol > * iter = dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol >*>( it->get() ); + UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast<UnboundedRegExpIteration < SymbolType >*>( it->get() ); if ( ! iter ) { ++ it; continue; } - UnboundedRegExpConcatenation < alphabet::Symbol > * concat = dynamic_cast<UnboundedRegExpConcatenation < alphabet::Symbol >*>( iter->getChild().get() ); + UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast<UnboundedRegExpConcatenation < SymbolType >*>( iter->getChild().get() ); if( ! concat ) { ++it; continue; @@ -849,12 +878,12 @@ bool RegExpOptimize::V9( UnboundedRegExpConcatenation < alphabet::Symbol > & nod } // std::cout << "xy" << std::endl; - // UnboundedRegExpConcatenation < alphabet::Symbol >* tmp = new UnboundedRegExpConcatenation < alphabet::Symbol >( ); + // UnboundedRegExpConcatenation < SymbolType >* tmp = new UnboundedRegExpConcatenation < SymbolType >( ); // tmp->insert( tmp->getChildren().end( ), std::next( it ), c1Iter ); // std::cout << RegExp( tmp ) << std::endl; // copy the range <it;sth>, delete it and go back to the iter node - std::vector<std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > > copyRange; + std::vector<std::smart_ptr < UnboundedRegExpElement < SymbolType > > > copyRange; copyRange.insert( copyRange.end(), std::next( it ), c1Iter ); it = node.getChildren().erase( std::next( it ), c1Iter ); it = std::prev( it ); @@ -875,18 +904,19 @@ bool RegExpOptimize::V9( UnboundedRegExpConcatenation < alphabet::Symbol > & nod /** * optimization V10: (x+y)* = (x*+y*)* * generalized to (x+y)* = (x+y*)* = (x*+y)* = (x*+y*)* - * @param node UnboundedRegExpIteration < alphabet::Symbol > node + * @param node UnboundedRegExpIteration < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::V10( UnboundedRegExpIteration < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::V10( UnboundedRegExpIteration < SymbolType > & node ) { // interpretation: if iter's child is alternation where some of its children are iteration, then they do not have to be iterations - UnboundedRegExpAlternation < alphabet::Symbol >* alt = dynamic_cast<UnboundedRegExpAlternation < alphabet::Symbol >*>( node.getChild ( ).get() ); - if( ! alt || ! any_of( alt->getChildren ( ).begin( ), alt->getChildren ( ).end( ), [] ( std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool{ return dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol > * >( a.get() ); } ) ) + UnboundedRegExpAlternation < SymbolType >* alt = dynamic_cast<UnboundedRegExpAlternation < SymbolType >*>( node.getChild ( ).get() ); + if( ! alt || ! any_of( alt->getChildren ( ).begin( ), alt->getChildren ( ).end( ), [] ( std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool{ return dynamic_cast<UnboundedRegExpIteration < SymbolType > * >( a.get() ); } ) ) return false; for( auto it = alt->getChildren ( ).begin( ); it != alt->getChildren ( ).end( ); ++it ) { - if ( dynamic_cast < UnboundedRegExpIteration < alphabet::Symbol > * > ( it->get ( ) ) ) - alt->setChild ( std::move ( static_cast < UnboundedRegExpIteration < alphabet::Symbol > * >( it->get ( ) )->getChild ( ) ), it ); + if ( dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( it->get ( ) ) ) + alt->setChild ( std::move ( static_cast < UnboundedRegExpIteration < SymbolType > * >( it->get ( ) )->getChild ( ) ), it ); } return true; @@ -894,15 +924,16 @@ bool RegExpOptimize::V10( UnboundedRegExpIteration < alphabet::Symbol > & node ) /** * optimization X1: a* + \e = a* - * @param node UnboundedRegExpAlternation < alphabet::Symbol > node + * @param node UnboundedRegExpAlternation < SymbolType > node * @return bool true if optimization applied else false */ -bool RegExpOptimize::X1( UnboundedRegExpAlternation < alphabet::Symbol > & node ) { +template < class SymbolType > +bool RegExpOptimize::X1( UnboundedRegExpAlternation < SymbolType > & node ) { // theorem: In regexp like a* + \e, \e is described twice, first in a*, second in \e. // therefore we can delete the \e as it is redundant - auto iter = find_if( node.getChildren ( ).begin( ), node.getChildren ( ).end( ), [] (const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool { return dynamic_cast<UnboundedRegExpIteration < alphabet::Symbol > * >( a.get() );} ); - auto eps = find_if( node.getChildren ( ).begin( ), node.getChildren ( ).end( ), [] (const std::smart_ptr < UnboundedRegExpElement < alphabet::Symbol > > & a ) -> bool { return dynamic_cast<UnboundedRegExpEpsilon < alphabet::Symbol > * >( a.get() );} ); + auto iter = find_if( node.getChildren ( ).begin( ), node.getChildren ( ).end( ), [] (const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool { return dynamic_cast<UnboundedRegExpIteration < SymbolType > * >( a.get() );} ); + auto eps = find_if( node.getChildren ( ).begin( ), node.getChildren ( ).end( ), [] (const std::smart_ptr < UnboundedRegExpElement < SymbolType > > & a ) -> bool { return dynamic_cast<UnboundedRegExpEpsilon < SymbolType > * >( a.get() );} ); if( iter != node.getChildren ( ).end( ) && eps != node.getChildren ( ).end( ) ) { node.getChildren ( ).erase( eps ); -- GitLab