From 516322c34ba54cb84a0c4350e639acd0ec605f11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <tomaspecka@gmail.com>
Date: Mon, 20 Jan 2014 20:56:05 +0100
Subject: [PATCH] first steps at RE derivatives

---
 .../src/conversions/re2fa/Brzozowski.cpp      |   8 +
 .../src/conversions/re2fa/Brzozowski.h        |   1 +
 .../re2fa/BrzozowskiDerivative.cpp            | 164 ++++++++++++++++++
 .../conversions/re2fa/BrzozowskiDerivative.h  |  54 ++++++
 .../re2fa/BrzozowskiDerivativeNormalize.cpp   |  66 +++++++
 .../re2fa/BrzozowskiDerivativeNormalize.h     |  36 ++++
 6 files changed, 329 insertions(+)
 create mode 100644 aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp
 create mode 100644 aconversions/src/conversions/re2fa/BrzozowskiDerivative.h
 create mode 100644 aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.cpp
 create mode 100644 aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.h

diff --git a/aconversions/src/conversions/re2fa/Brzozowski.cpp b/aconversions/src/conversions/re2fa/Brzozowski.cpp
index ba42a73b5d..d7ad39b0b8 100644
--- a/aconversions/src/conversions/re2fa/Brzozowski.cpp
+++ b/aconversions/src/conversions/re2fa/Brzozowski.cpp
@@ -10,6 +10,8 @@
 using namespace automaton;
 using namespace regexp;
 
+#include <iostream>
+
 namespace conversions
 {
 
@@ -20,6 +22,12 @@ Brzozowski::Brzozowski( const RegExp & re ) : AbstractREtoFAConverter( re )
 
 const FSM Brzozowski::convert( void )
 {
+    BrzozowskiDerivative bd( m_re );
+    list<RegExpSymbol> string;
+    string.push_back( RegExpSymbol( "0" ) );
+    bd.derivative( string ).toXML( cout );
+
+
     return m_fsm;
 }
 
diff --git a/aconversions/src/conversions/re2fa/Brzozowski.h b/aconversions/src/conversions/re2fa/Brzozowski.h
index 3da6f85cd9..b3b3cc73de 100644
--- a/aconversions/src/conversions/re2fa/Brzozowski.h
+++ b/aconversions/src/conversions/re2fa/Brzozowski.h
@@ -9,6 +9,7 @@
 #define BRZOZOWSKI_H_
 
 #include "AbstractREtoFAConverter.h"
+#include "BrzozowskiDerivative.h"
 
 namespace conversions
 {
diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp b/aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp
new file mode 100644
index 0000000000..d2c9f896b6
--- /dev/null
+++ b/aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp
@@ -0,0 +1,164 @@
+/*
+ * BrzozowskiDerivative.cpp
+ *
+ *  Created on: 19. 1. 2014
+ *      Author: tomas
+ */
+
+#include "BrzozowskiDerivative.h"
+
+using namespace regexp;
+using namespace std;
+
+namespace conversions
+{
+
+BrzozowskiDerivative::BrzozowskiDerivative( const RegExp & re ) : m_re( re )
+{
+    //FIXME in alib!
+    m_regexpRoot = const_cast<RegExp&>( m_re ).getRegExp( );
+}
+
+RegExp BrzozowskiDerivative::derivative ( const list<RegExpSymbol> & string ) const
+{
+    if( string.size( ) == 0 ) // dV/d(eps) = V
+        return RegExp ( m_regexpRoot->clone( ) );
+
+    RegExpElement * expression = m_regexpRoot;
+    for( auto symbol : string ) // dV/d(ab) = d( dV/da )/db
+        expression = derivative( expression, symbol );
+
+    // BrzozowskiDerivativeNormalize normalizer;
+    // expression = normalizer.normalize( expression );
+
+    return RegExp( expression );
+}
+
+RegExpElement * BrzozowskiDerivative::derivative( RegExpElement * element, const RegExpSymbol & dSymbol ) const
+{
+    Alternation* alternation = dynamic_cast<Alternation*>( element );
+    Concatenation* concatenation = dynamic_cast<Concatenation*>( element );
+    Iteration* iteration = dynamic_cast<Iteration*>( element );
+    RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element );
+
+    if( alternation )
+        return derivativeAlternation( alternation, dSymbol );
+    else if( concatenation )
+        return derivativeConcatenation( concatenation, dSymbol);
+    else if( iteration )
+        return derivativeIteration( iteration, dSymbol );
+    else if( symbol )
+        return derivativeSymbol( symbol, dSymbol );
+
+    throw ConversionException( "BrzozowskiDerivative::derivative - unknown RegExpElement type" );
+}
+
+RegExpElement * BrzozowskiDerivative::derivativeAlternation( Alternation * element, const RegExpSymbol & dSymbol ) const
+{
+    Alternation* ret = new Alternation( );
+    list<RegExpElement*> & l = ret->getElements();
+
+    for( auto e : element->getElements( ) )
+    {
+        RegExpElement* d = derivative( e, dSymbol );
+        if( d != NULL ) // rule A3, Melichar 2.87
+            l.push_back( d );
+    }
+
+    return ret;
+}
+
+RegExpElement * BrzozowskiDerivative::derivativeConcatenation( Concatenation * element, const RegExpSymbol & dSymbol ) const
+{
+    Concatenation* ret = new Concatenation( );
+    list<RegExpElement*> & l = ret->getElements();
+
+    for( auto e : element->getElements( ) )
+    {
+        if( e == * element->getElements( ).begin( ) ) // if first
+        {
+            RegExpElement* d = derivative( e, dSymbol );
+
+            // if EmptySet is returned from subtree, whole concatenation is emptySet also (rule A7, Melichar 2.87)
+            if( !d )
+            {
+                delete ret;
+                return NULL;
+            }
+
+            l.push_back( d );
+        }
+        else
+            l.push_back( e->clone() );
+    }
+
+    if( containsEpsilon( * element->getElements( ).begin( ) ) )
+    {
+        Alternation* alt = new Alternation( );
+        list<RegExpElement*> & al = alt->getElements( );
+        al.push_back( ret );
+
+        // Lets derive concatenation w/o first subtree
+        Concatenation* c = dynamic_cast<Concatenation *>( element->clone( ) );
+        c->getElements( ).pop_front( );
+        RegExpElement* d  = derivative( c, dSymbol );
+        if(d != NULL )
+            al.push_back( d );
+
+        return alt;
+    }
+
+    return ret;
+}
+
+RegExpElement * BrzozowskiDerivative::derivativeIteration( Iteration * element, const RegExpSymbol & dSymbol ) const
+{
+    Concatenation* ret = new Concatenation( );
+    list<RegExpElement*> & l = ret->getElements();
+
+    RegExpElement* d = derivative( element->getElement( ), dSymbol );
+    if( !d )
+    {
+        delete ret;
+        return NULL;
+    }
+
+    l.push_back( d );
+    l.push_back( element->clone( ) );
+    return ret;
+}
+
+RegExpElement * BrzozowskiDerivative::derivativeSymbol( RegExpSymbol * element, const RegExpSymbol & dSymbol ) const
+{
+    return dSymbol == element->getSymbol( ) ? new RegExpSymbol( "" ) : NULL;
+}
+
+bool BrzozowskiDerivative::containsEpsilon( RegExpElement * element ) const
+{
+    Alternation* alternation = dynamic_cast<Alternation*>( element );
+    Concatenation* concatenation = dynamic_cast<Concatenation*>( element );
+    Iteration* iteration = dynamic_cast<Iteration*>( element );
+    RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element );
+
+    if( alternation )
+    {
+        for( auto e : alternation->getElements( ) )
+            if( containsEpsilon( e ) )
+                return true;
+
+        return false;
+    }
+
+    if( concatenation )
+    {
+        for( auto e : concatenation->getElements( ) )
+            if( ! containsEpsilon( e ) )
+                return false;
+
+        return true;
+    }
+
+    return iteration || ( symbol && *symbol == RegExpSymbol( "" ) );
+}
+
+} /* namespace conversions */
diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivative.h b/aconversions/src/conversions/re2fa/BrzozowskiDerivative.h
new file mode 100644
index 0000000000..aaf55c4d13
--- /dev/null
+++ b/aconversions/src/conversions/re2fa/BrzozowskiDerivative.h
@@ -0,0 +1,54 @@
+/*
+ * BrzozowskiDerivative.h
+ *
+ *  Created on: 19. 1. 2014
+ *      Author: tomas
+ */
+
+#ifndef BRZOZOWSKIDERIVATIVE_H_
+#define BRZOZOWSKIDERIVATIVE_H_
+
+#include <regexp/RegExp.h>
+#include <regexp/RegExpElement.h>
+#include <regexp/Alternation.h>
+#include <regexp/Concatenation.h>
+#include <regexp/Iteration.h>
+#include <regexp/RegExpSymbol.h>
+
+#include <list>
+
+#include "BrzozowskiDerivativeNormalize.h"
+#include "../../utils/ConversionException.h"
+
+namespace conversions
+{
+
+/**
+ * Calculates derivative of regular expression re by string passed in derivative( ).
+ * Implements Melichar, definition 2.91 in chapter 2.4.3
+ */
+class BrzozowskiDerivative
+{
+public:
+    BrzozowskiDerivative( const regexp::RegExp & re );
+    regexp::RegExp derivative ( const std::list<regexp::RegExpSymbol> & string ) const;
+
+private:
+    regexp::RegExpElement * derivative( regexp::RegExpElement * element, const regexp::RegExpSymbol & dSymbol ) const;
+    regexp::RegExpElement * derivativeAlternation( regexp::Alternation * element, const regexp::RegExpSymbol & dSymbol ) const;
+    regexp::RegExpElement * derivativeConcatenation( regexp::Concatenation * element, const regexp::RegExpSymbol & dSymbol ) const;
+    regexp::RegExpElement * derivativeIteration( regexp::Iteration * element, const regexp::RegExpSymbol & dSymbol ) const;
+    regexp::RegExpElement * derivativeSymbol( regexp::RegExpSymbol * element, const regexp::RegExpSymbol & dSymbol ) const;
+
+    /**
+     * No warranty for this one, unsourced, created by me.
+     */
+    bool containsEpsilon( regexp::RegExpElement * element ) const;
+
+    const regexp::RegExp & m_re;
+    regexp::RegExpElement* m_regexpRoot;
+};
+
+} /* namespace conversions */
+
+#endif /* BRZOZOWSKIDERIVATIVE_H_ */
diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.cpp b/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.cpp
new file mode 100644
index 0000000000..50a8af1400
--- /dev/null
+++ b/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.cpp
@@ -0,0 +1,66 @@
+/*
+ * BrzozowskiDerivativeNormalize.cpp
+ *
+ *  Created on: 20. 1. 2014
+ *      Author: tomas
+ */
+
+#include "BrzozowskiDerivativeNormalize.h"
+
+using namespace regexp;
+
+namespace conversions
+{
+
+RegExpElement* BrzozowskiDerivativeNormalize::normalize( RegExpElement * element ) const
+{
+    Alternation* alternation = dynamic_cast<Alternation*>( element );
+    Concatenation* concatenation = dynamic_cast<Concatenation*>( element );
+    Iteration* iteration = dynamic_cast<Iteration*>( element );
+    RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element );
+
+    if( alternation )
+        return normalizeAlternation( alternation );
+    else if( concatenation )
+        return normalizeConcatenation( concatenation );
+    else if( iteration )
+        return normalizeIteration( iteration );
+    else if( symbol )
+        return normalizeSymbol( symbol );
+
+    throw ConversionException( "BrzozowskiDerivativeNormalize::normalize - unknown RegExpElement type" );
+}
+
+RegExpElement * BrzozowskiDerivativeNormalize::normalizeAlternation( Alternation * element ) const
+{
+    list<RegExpElement*> l = element->getElements( );
+
+    if( l.size( ) == 1 )
+    {
+        RegExpElement* child = * l.begin( );
+        l.pop_front( );
+        delete element;
+        return child;
+    }
+
+    return element;
+
+}
+
+RegExpElement * BrzozowskiDerivativeNormalize::normalizeConcatenation( Concatenation * element ) const
+{
+    return element;
+}
+
+RegExpElement * BrzozowskiDerivativeNormalize::normalizeIteration( Iteration * element ) const
+{
+    // element->setElement( normalize( element->getElement( ) ) );
+    return element;
+}
+
+RegExpElement * BrzozowskiDerivativeNormalize::normalizeSymbol( RegExpSymbol * element ) const
+{
+    return element;
+}
+
+} /* namespace conversions */
diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.h b/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.h
new file mode 100644
index 0000000000..11b1dbf8c6
--- /dev/null
+++ b/aconversions/src/conversions/re2fa/BrzozowskiDerivativeNormalize.h
@@ -0,0 +1,36 @@
+/*
+ * BrzozowskiDerivativeNormalize.h
+ *
+ *  Created on: 20. 1. 2014
+ *      Author: tomas
+ */
+
+#ifndef BRZOZOWSKIDERIVATIVENORMALIZE_H_
+#define BRZOZOWSKIDERIVATIVENORMALIZE_H_
+
+#include <regexp/RegExpElement.h>
+#include <regexp/Alternation.h>
+#include <regexp/Concatenation.h>
+#include <regexp/Iteration.h>
+#include <regexp/RegExpSymbol.h>
+
+#include "../../utils/ConversionException.h"
+
+namespace conversions
+{
+
+class BrzozowskiDerivativeNormalize
+{
+public:
+    regexp::RegExpElement * normalize( regexp::RegExpElement* element ) const;
+
+private:
+    regexp::RegExpElement * normalizeAlternation( regexp::Alternation * element ) const;
+    regexp::RegExpElement * normalizeConcatenation( regexp::Concatenation * element ) const;
+    regexp::RegExpElement * normalizeIteration( regexp::Iteration * element ) const;
+    regexp::RegExpElement * normalizeSymbol( regexp::RegExpSymbol * element) const;
+};
+
+} /* namespace conversions */
+
+#endif /* BRZOZOWSKIDERIVATIVENORMALIZE_H_ */
-- 
GitLab