From 8b40dffe534cf0e0eae3e7c92250069cc75ee586 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Tue, 4 Feb 2014 12:19:48 +0100 Subject: [PATCH] RegExpNormalize adaptation to latest regexp changes --- .../src/derivatives/BrzozowskiDerivative.cpp | 7 +- .../src/derivatives/RegExpNormalize.cpp | 191 ++++++++++++------ .../src/derivatives/RegExpNormalize.h | 14 +- 3 files changed, 137 insertions(+), 75 deletions(-) diff --git a/aconversions/src/derivatives/BrzozowskiDerivative.cpp b/aconversions/src/derivatives/BrzozowskiDerivative.cpp index bfa61df775..6bf15d2e57 100644 --- a/aconversions/src/derivatives/BrzozowskiDerivative.cpp +++ b/aconversions/src/derivatives/BrzozowskiDerivative.cpp @@ -36,10 +36,9 @@ RegExp BrzozowskiDerivative::derivative ( const list<RegExpElement*> & dString ) throw ConversionException( "BrzozowskiDerivative::derivative - invalid/unknown RegExpElement node passed in dString." ); } - // RegExpNormalize norm; - // return norm.normalize( expression ); - - return expression; + RegExpNormalize norm; + return norm.normalize( expression ); + // return expression; } RegExpElement * BrzozowskiDerivative::derivative( RegExpElement * node, const RegExpSymbol & dSymbol ) const diff --git a/aconversions/src/derivatives/RegExpNormalize.cpp b/aconversions/src/derivatives/RegExpNormalize.cpp index d3ba54110a..bdc27918d6 100644 --- a/aconversions/src/derivatives/RegExpNormalize.cpp +++ b/aconversions/src/derivatives/RegExpNormalize.cpp @@ -19,130 +19,189 @@ RegExp RegExpNormalize::normalize( RegExpElement * element ) } -RegExpElement* RegExpNormalize::optimize( RegExpElement * element ) +RegExpElement* RegExpNormalize::optimize( RegExpElement * node ) { - if( element == NULL ) - return NULL; - - Alternation* alternation = dynamic_cast<Alternation*>( element ); - Concatenation* concatenation = dynamic_cast<Concatenation*>( element ); - Iteration* iteration = dynamic_cast<Iteration*>( element ); - RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element ); + Alternation * alternation = dynamic_cast<Alternation*>( node ); + Concatenation * concatenation = dynamic_cast<Concatenation*>( node ); + Iteration * iteration = dynamic_cast<Iteration*>( node ); + RegExpSymbol * symbol = dynamic_cast<RegExpSymbol*>( node ); + RegExpEmpty * empty= dynamic_cast<RegExpEmpty*>( node ); + RegExpEpsilon * eps = dynamic_cast<RegExpEpsilon*>( node ); if( alternation ) return optimize( alternation ); - else if( concatenation ) + if( concatenation ) return optimize( concatenation ); - else if( iteration ) + if( iteration ) return optimize( iteration ); - else if( symbol ) + if( symbol ) return optimize( symbol ); + if( empty ) + return optimize( empty); + if( eps ) + return optimize( eps ); - throw ConversionException( "RegExpNormalize::optimize - unknown RegExpElement type" ); + throw ConversionException( "RegExpNormalize::optimize - unknown RegExpElement node" ); } -RegExpElement * RegExpNormalize::optimize( Alternation * element ) +RegExpElement * RegExpNormalize::optimize( Alternation * node ) { - for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); it ++ ) + auto & nodeElements = node->getElements( ); + + for( auto it = nodeElements.begin( ) ; it != nodeElements.end( ); it ++ ) * it = optimize( * it ); - // 0 + x = x - erases all empty sets from Alternation - for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); ) + // Targets for optimization: Melichar, 2.87: Rule A1, A2, A3, A4, A8, A9 + + // A1: x + ( y + z ) = ( x + y ) + z + for( auto it = nodeElements.begin( ) ; it != nodeElements.end( ); ) + { + Alternation * childAlt = dynamic_cast<Alternation*> ( * it ); + if( childAlt ) + { + auto it2 = it; + it2 ++; + + nodeElements.insert( it2, childAlt->getElements( ).begin( ), childAlt->getElements( ).end( ) ); + + childAlt->getElements( ).clear( ); + delete childAlt; + it = nodeElements.erase( it ); + } + else + it ++; + } + + // TODO: A2: x + y = y + x + // reorder list by RegExpElement::operator< ? + + // A3: x + EMPTY = x + for( auto it = nodeElements.begin( ) ; it != nodeElements.end( ); ) { - if ( * it == NULL ) - it = element->getElements( ).erase( it ); + if ( dynamic_cast<RegExpEmpty*>( *it ) ) + { + delete *it; + it = nodeElements.erase( it ); + } else it ++; } - if( element->getElements( ).size( ) == 0 ) + // TODO: A4: x + x = x + // depends on RegExpElement::operator< + + + // Empty subtree and alternation of single node + if( node->getElements( ).size( ) == 0 ) { - delete element; - return NULL; + delete node; + return new RegExpEmpty( ); } - else if( element->getElements( ).size( ) == 1 ) + else if( node->getElements( ).size( ) == 1 ) { - RegExpElement * ret = * element->getElements( ).begin( ); - element->getElements( ).clear( ); - delete element; + RegExpElement * ret = * node->getElements( ).begin( ); + node->getElements( ).clear( ); + delete node; return ret; } - return element; + return node; } -RegExpElement * RegExpNormalize::optimize( Concatenation * element ) +RegExpElement * RegExpNormalize::optimize( Concatenation * node) { - for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); it ++ ) + auto & nodeElements = node->getElements( ); + + for( auto it = nodeElements.begin( ) ; it != nodeElements.end( ); it ++ ) * it = optimize( * it ); - // a.(b.c) = (a.b).c - for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); it ++ ) + // Targets for optimization: Melichar, 2.87: Rule A5, A6, A7 + + // A5: a.(b.c) = (a.b).c + for( auto it = nodeElements.begin( ) ; it != nodeElements.end( ); ) { - Concatenation* c = dynamic_cast<Concatenation*>( * it ); - if( ! c ) - continue; + Concatenation * childConcat = dynamic_cast<Concatenation*>( * it ); + if( childConcat ) + { + auto it2 = it; + it2 ++; - for( const auto & e : c->getElements( ) ) - element->getElements( ).insert( it, e ); + nodeElements.insert( it2, childConcat->getElements( ).begin( ), childConcat->getElements( ).end( ) ); - c->getElements( ).clear( ); - delete c; - it = element->getElements( ).erase( it ); + childConcat->getElements( ).clear( ); + delete childConcat; + it = nodeElements.erase( it ); + } + else + it ++; } - // 0.x = 0 - for( auto const & e : element->getElements( ) ) + // A7: EMPTY.x = EMPTY + for( auto const & childNode : nodeElements ) { - if( e == NULL ) + if( dynamic_cast<RegExpEmpty*>( childNode ) ) { - element->getElements( ).clear( ); - delete element; - return NULL; + nodeElements.clear( ); + delete node; + return new RegExpEmpty( ); } } - // delete all redundant epsilons - for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); ) + // A6: EPS.x = x.EPS = x + for( auto it = nodeElements.begin( ) ; it != nodeElements.end( ); ) { - RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( * it ); - - if( symbol && symbol->getSymbol( ) == "" && element->getElements( ).size( ) > 1 ) - it = element->getElements( ).erase( it ); + // EPS.EPS.EPS.EPS = EPS + if( dynamic_cast<RegExpEpsilon*>( *it ) && nodeElements.size( ) > 1 ) + it = nodeElements.erase( it ); else it ++; } - if( element->getElements( ).size( ) == 1 ) + if( nodeElements.size( ) == 1 ) { - RegExpElement * ret = * element->getElements( ).begin( ); - element->getElements( ).clear( ); - delete element; + RegExpElement * ret = * nodeElements.begin( ); + nodeElements.clear( ); + delete node; return ret; } - return element; + return node; } -RegExpElement * RegExpNormalize::optimize( Iteration * element ) +RegExpElement * RegExpNormalize::optimize( Iteration * node ) { - element->setElement( optimize( element->getElement( ) ) ); + node->setElement( optimize( node->getElement( ) ) ); - // a********* = a* - Iteration* iteration; - while( ( iteration = dynamic_cast<Iteration*>( element->getElement( ) ) ) != NULL ) + // Targets for optimization: Melichar, 2.87: Rule A10, A11, a** = a* + + // a** = a* + Iteration* childIteration; + while( ( childIteration = dynamic_cast<Iteration*>( node->getElement( ) ) ) ) { - element->setElement( iteration->getElement( ) ); - iteration->setElement( NULL ); - delete iteration; + node->setElement( childIteration->getElement( ) ); + childIteration->setElement( NULL ); + delete childIteration; } - return element; + // TODO: A10: x* = EPS + x*x // prob must be done in Concatenation node + // TODO: A11: x* = ( EPS + x )* + + return node; +} + +RegExpElement * RegExpNormalize::optimize( RegExpSymbol * node ) +{ + return node; +} + +RegExpElement * RegExpNormalize::optimize( RegExpEmpty * node ) +{ + return node; } -RegExpElement * RegExpNormalize::optimize( RegExpSymbol * element ) +RegExpElement * RegExpNormalize::optimize( RegExpEpsilon * node ) { - return element; + return node; } diff --git a/aconversions/src/derivatives/RegExpNormalize.h b/aconversions/src/derivatives/RegExpNormalize.h index 9ebad0e398..7c16f28bc4 100644 --- a/aconversions/src/derivatives/RegExpNormalize.h +++ b/aconversions/src/derivatives/RegExpNormalize.h @@ -14,6 +14,8 @@ #include <regexp/Concatenation.h> #include <regexp/Iteration.h> #include <regexp/RegExpSymbol.h> +#include <regexp/RegExpEmpty.h> +#include <regexp/RegExpEpsilon.h> #include "../utils/ConversionException.h" @@ -26,11 +28,13 @@ public: regexp::RegExp normalize( regexp::RegExpElement* element ); private: - regexp::RegExpElement * optimize( regexp::RegExpElement* element ); - regexp::RegExpElement * optimize( regexp::Alternation * element ); - regexp::RegExpElement * optimize( regexp::Concatenation * element ); - regexp::RegExpElement * optimize( regexp::Iteration * element ); - regexp::RegExpElement * optimize( regexp::RegExpSymbol * element); + regexp::RegExpElement * optimize( regexp::RegExpElement* node ); + regexp::RegExpElement * optimize( regexp::Alternation * node ); + regexp::RegExpElement * optimize( regexp::Concatenation * node ); + regexp::RegExpElement * optimize( regexp::Iteration * node ); + regexp::RegExpElement * optimize( regexp::RegExpSymbol * node ); + regexp::RegExpElement * optimize( regexp::RegExpEpsilon * node ); + regexp::RegExpElement * optimize( regexp::RegExpEmpty * node ); }; } /* namespace conversions */ -- GitLab