From 516322c34ba54cb84a0c4350e639acd0ec605f11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <tomaspecka@gmail.com> Date: Mon, 20 Jan 2014 20:56:05 +0100 Subject: [PATCH] first steps at RE derivatives --- .../src/conversions/re2fa/Brzozowski.cpp | 8 + .../src/conversions/re2fa/Brzozowski.h | 1 + .../re2fa/BrzozowskiDerivative.cpp | 164 ++++++++++++++++++ .../conversions/re2fa/BrzozowskiDerivative.h | 54 ++++++ .../re2fa/BrzozowskiDerivativeNormalize.cpp | 66 +++++++ .../re2fa/BrzozowskiDerivativeNormalize.h | 36 ++++ 6 files changed, 329 insertions(+) create mode 100644 aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp create mode 100644 aconversions/src/conversions/re2fa/BrzozowskiDerivative.h create mode 100644 aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.cpp create mode 100644 aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.h diff --git a/aconversions/src/conversions/re2fa/Brzozowski.cpp b/aconversions/src/conversions/re2fa/Brzozowski.cpp index ba42a73b5d..d7ad39b0b8 100644 --- a/aconversions/src/conversions/re2fa/Brzozowski.cpp +++ b/aconversions/src/conversions/re2fa/Brzozowski.cpp @@ -10,6 +10,8 @@ using namespace automaton; using namespace regexp; +#include <iostream> + namespace conversions { @@ -20,6 +22,12 @@ Brzozowski::Brzozowski( const RegExp & re ) : AbstractREtoFAConverter( re ) const FSM Brzozowski::convert( void ) { + BrzozowskiDerivative bd( m_re ); + list<RegExpSymbol> string; + string.push_back( RegExpSymbol( "0" ) ); + bd.derivative( string ).toXML( cout ); + + return m_fsm; } diff --git a/aconversions/src/conversions/re2fa/Brzozowski.h b/aconversions/src/conversions/re2fa/Brzozowski.h index 3da6f85cd9..b3b3cc73de 100644 --- a/aconversions/src/conversions/re2fa/Brzozowski.h +++ b/aconversions/src/conversions/re2fa/Brzozowski.h @@ -9,6 +9,7 @@ #define BRZOZOWSKI_H_ #include "AbstractREtoFAConverter.h" +#include "BrzozowskiDerivative.h" namespace conversions { diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp b/aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp new file mode 100644 index 0000000000..d2c9f896b6 --- /dev/null +++ b/aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp @@ -0,0 +1,164 @@ +/* + * BrzozowskiDerivative.cpp + * + * Created on: 19. 1. 2014 + * Author: tomas + */ + +#include "BrzozowskiDerivative.h" + +using namespace regexp; +using namespace std; + +namespace conversions +{ + +BrzozowskiDerivative::BrzozowskiDerivative( const RegExp & re ) : m_re( re ) +{ + //FIXME in alib! + m_regexpRoot = const_cast<RegExp&>( m_re ).getRegExp( ); +} + +RegExp BrzozowskiDerivative::derivative ( const list<RegExpSymbol> & string ) const +{ + if( string.size( ) == 0 ) // dV/d(eps) = V + return RegExp ( m_regexpRoot->clone( ) ); + + RegExpElement * expression = m_regexpRoot; + for( auto symbol : string ) // dV/d(ab) = d( dV/da )/db + expression = derivative( expression, symbol ); + + // BrzozowskiDerivativeNormalize normalizer; + // expression = normalizer.normalize( expression ); + + return RegExp( expression ); +} + +RegExpElement * BrzozowskiDerivative::derivative( RegExpElement * element, const RegExpSymbol & dSymbol ) const +{ + Alternation* alternation = dynamic_cast<Alternation*>( element ); + Concatenation* concatenation = dynamic_cast<Concatenation*>( element ); + Iteration* iteration = dynamic_cast<Iteration*>( element ); + RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element ); + + if( alternation ) + return derivativeAlternation( alternation, dSymbol ); + else if( concatenation ) + return derivativeConcatenation( concatenation, dSymbol); + else if( iteration ) + return derivativeIteration( iteration, dSymbol ); + else if( symbol ) + return derivativeSymbol( symbol, dSymbol ); + + throw ConversionException( "BrzozowskiDerivative::derivative - unknown RegExpElement type" ); +} + +RegExpElement * BrzozowskiDerivative::derivativeAlternation( Alternation * element, const RegExpSymbol & dSymbol ) const +{ + Alternation* ret = new Alternation( ); + list<RegExpElement*> & l = ret->getElements(); + + for( auto e : element->getElements( ) ) + { + RegExpElement* d = derivative( e, dSymbol ); + if( d != NULL ) // rule A3, Melichar 2.87 + l.push_back( d ); + } + + return ret; +} + +RegExpElement * BrzozowskiDerivative::derivativeConcatenation( Concatenation * element, const RegExpSymbol & dSymbol ) const +{ + Concatenation* ret = new Concatenation( ); + list<RegExpElement*> & l = ret->getElements(); + + for( auto e : element->getElements( ) ) + { + if( e == * element->getElements( ).begin( ) ) // if first + { + RegExpElement* d = derivative( e, dSymbol ); + + // if EmptySet is returned from subtree, whole concatenation is emptySet also (rule A7, Melichar 2.87) + if( !d ) + { + delete ret; + return NULL; + } + + l.push_back( d ); + } + else + l.push_back( e->clone() ); + } + + if( containsEpsilon( * element->getElements( ).begin( ) ) ) + { + Alternation* alt = new Alternation( ); + list<RegExpElement*> & al = alt->getElements( ); + al.push_back( ret ); + + // Lets derive concatenation w/o first subtree + Concatenation* c = dynamic_cast<Concatenation *>( element->clone( ) ); + c->getElements( ).pop_front( ); + RegExpElement* d = derivative( c, dSymbol ); + if(d != NULL ) + al.push_back( d ); + + return alt; + } + + return ret; +} + +RegExpElement * BrzozowskiDerivative::derivativeIteration( Iteration * element, const RegExpSymbol & dSymbol ) const +{ + Concatenation* ret = new Concatenation( ); + list<RegExpElement*> & l = ret->getElements(); + + RegExpElement* d = derivative( element->getElement( ), dSymbol ); + if( !d ) + { + delete ret; + return NULL; + } + + l.push_back( d ); + l.push_back( element->clone( ) ); + return ret; +} + +RegExpElement * BrzozowskiDerivative::derivativeSymbol( RegExpSymbol * element, const RegExpSymbol & dSymbol ) const +{ + return dSymbol == element->getSymbol( ) ? new RegExpSymbol( "" ) : NULL; +} + +bool BrzozowskiDerivative::containsEpsilon( RegExpElement * element ) const +{ + Alternation* alternation = dynamic_cast<Alternation*>( element ); + Concatenation* concatenation = dynamic_cast<Concatenation*>( element ); + Iteration* iteration = dynamic_cast<Iteration*>( element ); + RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element ); + + if( alternation ) + { + for( auto e : alternation->getElements( ) ) + if( containsEpsilon( e ) ) + return true; + + return false; + } + + if( concatenation ) + { + for( auto e : concatenation->getElements( ) ) + if( ! containsEpsilon( e ) ) + return false; + + return true; + } + + return iteration || ( symbol && *symbol == RegExpSymbol( "" ) ); +} + +} /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivative.h b/aconversions/src/conversions/re2fa/BrzozowskiDerivative.h new file mode 100644 index 0000000000..aaf55c4d13 --- /dev/null +++ b/aconversions/src/conversions/re2fa/BrzozowskiDerivative.h @@ -0,0 +1,54 @@ +/* + * BrzozowskiDerivative.h + * + * Created on: 19. 1. 2014 + * Author: tomas + */ + +#ifndef BRZOZOWSKIDERIVATIVE_H_ +#define BRZOZOWSKIDERIVATIVE_H_ + +#include <regexp/RegExp.h> +#include <regexp/RegExpElement.h> +#include <regexp/Alternation.h> +#include <regexp/Concatenation.h> +#include <regexp/Iteration.h> +#include <regexp/RegExpSymbol.h> + +#include <list> + +#include "BrzozowskiDerivativeNormalize.h" +#include "../../utils/ConversionException.h" + +namespace conversions +{ + +/** + * Calculates derivative of regular expression re by string passed in derivative( ). + * Implements Melichar, definition 2.91 in chapter 2.4.3 + */ +class BrzozowskiDerivative +{ +public: + BrzozowskiDerivative( const regexp::RegExp & re ); + regexp::RegExp derivative ( const std::list<regexp::RegExpSymbol> & string ) const; + +private: + regexp::RegExpElement * derivative( regexp::RegExpElement * element, const regexp::RegExpSymbol & dSymbol ) const; + regexp::RegExpElement * derivativeAlternation( regexp::Alternation * element, const regexp::RegExpSymbol & dSymbol ) const; + regexp::RegExpElement * derivativeConcatenation( regexp::Concatenation * element, const regexp::RegExpSymbol & dSymbol ) const; + regexp::RegExpElement * derivativeIteration( regexp::Iteration * element, const regexp::RegExpSymbol & dSymbol ) const; + regexp::RegExpElement * derivativeSymbol( regexp::RegExpSymbol * element, const regexp::RegExpSymbol & dSymbol ) const; + + /** + * No warranty for this one, unsourced, created by me. + */ + bool containsEpsilon( regexp::RegExpElement * element ) const; + + const regexp::RegExp & m_re; + regexp::RegExpElement* m_regexpRoot; +}; + +} /* namespace conversions */ + +#endif /* BRZOZOWSKIDERIVATIVE_H_ */ diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.cpp b/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.cpp new file mode 100644 index 0000000000..50a8af1400 --- /dev/null +++ b/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.cpp @@ -0,0 +1,66 @@ +/* + * BrzozowskiDerivativeNormalize.cpp + * + * Created on: 20. 1. 2014 + * Author: tomas + */ + +#include "BrzozowskiDerivativeNormalize.h" + +using namespace regexp; + +namespace conversions +{ + +RegExpElement* BrzozowskiDerivativeNormalize::normalize( RegExpElement * element ) const +{ + Alternation* alternation = dynamic_cast<Alternation*>( element ); + Concatenation* concatenation = dynamic_cast<Concatenation*>( element ); + Iteration* iteration = dynamic_cast<Iteration*>( element ); + RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element ); + + if( alternation ) + return normalizeAlternation( alternation ); + else if( concatenation ) + return normalizeConcatenation( concatenation ); + else if( iteration ) + return normalizeIteration( iteration ); + else if( symbol ) + return normalizeSymbol( symbol ); + + throw ConversionException( "BrzozowskiDerivativeNormalize::normalize - unknown RegExpElement type" ); +} + +RegExpElement * BrzozowskiDerivativeNormalize::normalizeAlternation( Alternation * element ) const +{ + list<RegExpElement*> l = element->getElements( ); + + if( l.size( ) == 1 ) + { + RegExpElement* child = * l.begin( ); + l.pop_front( ); + delete element; + return child; + } + + return element; + +} + +RegExpElement * BrzozowskiDerivativeNormalize::normalizeConcatenation( Concatenation * element ) const +{ + return element; +} + +RegExpElement * BrzozowskiDerivativeNormalize::normalizeIteration( Iteration * element ) const +{ + // element->setElement( normalize( element->getElement( ) ) ); + return element; +} + +RegExpElement * BrzozowskiDerivativeNormalize::normalizeSymbol( RegExpSymbol * element ) const +{ + return element; +} + +} /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.h b/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.h new file mode 100644 index 0000000000..11b1dbf8c6 --- /dev/null +++ b/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.h @@ -0,0 +1,36 @@ +/* + * BrzozowskiDerivativeNormalize.h + * + * Created on: 20. 1. 2014 + * Author: tomas + */ + +#ifndef BRZOZOWSKIDERIVATIVENORMALIZE_H_ +#define BRZOZOWSKIDERIVATIVENORMALIZE_H_ + +#include <regexp/RegExpElement.h> +#include <regexp/Alternation.h> +#include <regexp/Concatenation.h> +#include <regexp/Iteration.h> +#include <regexp/RegExpSymbol.h> + +#include "../../utils/ConversionException.h" + +namespace conversions +{ + +class BrzozowskiDerivativeNormalize +{ +public: + regexp::RegExpElement * normalize( regexp::RegExpElement* element ) const; + +private: + regexp::RegExpElement * normalizeAlternation( regexp::Alternation * element ) const; + regexp::RegExpElement * normalizeConcatenation( regexp::Concatenation * element ) const; + regexp::RegExpElement * normalizeIteration( regexp::Iteration * element ) const; + regexp::RegExpElement * normalizeSymbol( regexp::RegExpSymbol * element) const; +}; + +} /* namespace conversions */ + +#endif /* BRZOZOWSKIDERIVATIVENORMALIZE_H_ */ -- GitLab