From 4bed3eeea4272f2cef3467fa08357b2e2f5a7774 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Fri, 7 Feb 2014 11:20:55 +0100
Subject: [PATCH] Working version of Brzozowski NFA. Infinite loops for some
 REs.

---
 .../src/conversions/re2fa/Brzozowski.cpp      |  88 +++++-----
 .../src/conversions/re2fa/Brzozowski.h        |  16 +-
 aconversions/src/conversions/re2fa/Makefile   |   7 +-
 .../conversions/re2fa/RegExpComparator.cpp    | 157 ++++++++++++++++++
 .../src/conversions/re2fa/RegExpComparator.h  |  46 +++++
 .../conversions/re2fa/are2fa.brzozowski.cpp   |   2 +-
 .../src/derivatives/BrzozowskiDerivative.cpp  |   7 +
 .../src/derivatives/BrzozowskiDerivative.h    |   1 +
 aconversions/src/utils/RegExpUtils.cpp        |   2 +-
 9 files changed, 270 insertions(+), 56 deletions(-)
 create mode 100644 aconversions/src/conversions/re2fa/RegExpComparator.cpp
 create mode 100644 aconversions/src/conversions/re2fa/RegExpComparator.h

diff --git a/aconversions/src/conversions/re2fa/Brzozowski.cpp b/aconversions/src/conversions/re2fa/Brzozowski.cpp
index 801b20e2ef..9fe6f6bf78 100644
--- a/aconversions/src/conversions/re2fa/Brzozowski.cpp
+++ b/aconversions/src/conversions/re2fa/Brzozowski.cpp
@@ -6,15 +6,16 @@
  */
 
 #include "Brzozowski.h"
+#include <iostream>
 
 using namespace automaton;
 using namespace regexp;
 
-#include <iostream>
-
 namespace conversions
 {
 
+#define DEBUG false
+
 Brzozowski::Brzozowski( const RegExp & re ) : AbstractREtoFAConverter( re )
 {
 
@@ -22,29 +23,40 @@ Brzozowski::Brzozowski( const RegExp & re ) : AbstractREtoFAConverter( re )
 
 const FSM Brzozowski::convert( void )
 {
-    /*
-    BrzozowskiDerivative bd( m_re );
-    list<RegExpSymbol> string( 1, RegExpSymbol( "0" ) );
-    bd.derivative( string ).toXML( cout );
-    */
-
-    /*
-    list<RegExpSymbol*> alphabet = RegExpUtils::getRegExpSymbols( m_re );
-    set<RegExp> Q = { m_re }, Qprev = { m_re }, Qcurr;
+    set<RegExpSymbol> alphabet = RegExpUtils::getRegExpAlphabet( m_re );
+    set<RegExp, RegExpComparator> Q = { m_re }, Qprev = { m_re }, Qcurr;
 
     int i = 1;
     while( true )
     {
+        if( DEBUG ) std::cout << "Round " << i << std::endl;
         for( const auto & regexp : Qprev )
         {
+
+            auto itFrom = Q.find( regexp );
+            if( DEBUG ) std::cout << "DERIVUJI:" << std::endl;
+            if( DEBUG ) const_cast<RegExp&>(regexp).toXML( cout );
             BrzozowskiDerivative bd( regexp );
+            if( DEBUG ) std::cout << "----" << std::endl;
             for( const auto & symbol : alphabet )
             {
-                const RegExp re = bd.derivative( list<RegExpSymbol>( 1, * symbol ) );
+                if( DEBUG ) std::cout << "Podle: " << symbol.getSymbol() << std::endl;
+                const RegExp re = bd.derivative( list<RegExpElement*>( 1, new RegExpSymbol( symbol.getSymbol( ) ) ) );
+                if( DEBUG ) const_cast<RegExp&>(re).toXML( cout );
                 if( ! RegExpUtils::isRegExpEmpty( re ) )
                 {
-                    Qcurr.insert( re );
-                    m_transitions.push_back( BrzozowskiTransition( regexp, symbol, re ) );
+                    if( ! isInSet( re, Q ) )
+                    {
+                        Qcurr.insert( re );
+                        Q.insert( re );
+                        auto itTo = Q.find( re );
+                        m_transitions.push_back( BrzozowskiTransition( * itFrom, symbol, * itTo ) );
+                    }
+                    else
+                    {
+                        auto itTo = Q.find( re );
+                        m_transitions.push_back( BrzozowskiTransition( * itFrom, symbol, * itTo ) );
+                    }
                 }
             }
         }
@@ -52,70 +64,60 @@ const FSM Brzozowski::convert( void )
         if( Qcurr.size( ) == 0 )
             break;
 
-        set<RegExp> setunion;
-        set_union( Q.begin( ), Q.end( ), Qcurr.begin( ), Qcurr.end( ), setunion.begin( ) );
-
-        Q = setunion;
         Qprev = Qcurr;
         Qcurr.clear( );
 
+        if( DEBUG ) std::cout << "-------------------------------------------------------" << std::endl;
         i += 1;
+
     }
 
+    // --------------------------------
 
     StateBuilder builder;
 
-    for( const auto & regexp : Q )
-    {
-        const State s = builder.constructState( regexp );
-        m_fsm.addState( s );
-        if( RegExpUtils::containsEpsilon( regexp ) )
-            m_fsm.addFinalState( s );
-    }
-    m_fsm.addInitialState( builder.constructState ( m_re ) );
+    for( const auto & r : Q )
+        m_fsm.addState( builder.getState( r ) );
 
     for( const auto & symbol : alphabet )
-        m_fsm.addInputSymbol( Symbol( symbol->getSymbol( ) ) );
+        m_fsm.addInputSymbol( symbol.getSymbol( ) );
 
     for( const auto & t : m_transitions )
-    {
-        const State from = builder.constructState( t.m_from );
-        const State to = builder.constructState( t.m_to );
-        const Symbol symb = Symbol( t.m_regexpSymbol->getSymbol( ) );
+        m_fsm.addTransition( TransitionFSM( builder.getState( t.m_from ), Symbol( t.m_regexpSymbol.getSymbol( ) ), builder.getState( t.m_to ) ) );
 
-        m_fsm.addTransition( from, symb, to );
-    }
+    m_fsm.addInitialState( builder.getState( m_re ) );
 
-    */
+    for( const auto & r : Q )
+        if( RegExpUtils::containsEpsilon( r ) )
+            m_fsm.addFinalState( builder.getState( r ) );
 
     return m_fsm;
 }
 
 // ----------------------------------------------------------------------------
 
-Brzozowski::BrzozowskiTransition::BrzozowskiTransition( const RegExp & from, const RegExpSymbol * symb, const RegExp & to )
+Brzozowski::BrzozowskiTransition::BrzozowskiTransition( const RegExp & from, const RegExpSymbol & symb, const RegExp & to )
     : m_from( from ), m_to( to ), m_regexpSymbol( symb )
 {
 
 }
 
 // ----------------------------------------------------------------------------
-/*
 
 Brzozowski::StateBuilder::StateBuilder( void )
 {
     m_stateId = 0;
 }
 
-const State & Brzozowski::StateBuilder::constructState( const RegExp & re )
+const State & Brzozowski::StateBuilder::getState( const RegExp & re )
 {
     if( ! isKeyInMap( re, m_states ) )
-        m_states[ re ] = State( createNewName( ) );
+        m_states[ re ] = new State( createNewName( ) );
 
-    return m_states[ re ];
+    return * m_states[ re ];
 }
 
-const string Brzozowski::StateBuilder::createNewName( void )
+string Brzozowski::StateBuilder::createNewName( void )
 {
     // http://en.wikipedia.org/wiki/Hexavigesimal
 
@@ -124,13 +126,11 @@ const string Brzozowski::StateBuilder::createNewName( void )
     do
     {
         unsigned int remainder = n % 26;
-        name.insert( 0, ( char )( remainder + 'A' ), 1 );
+        name += ( char )( remainder + 'A' );
         n = (n - remainder) / 26;
     } while (n > 0);
 
-    return name;
+    return string( name.rbegin( ), name.rend( ) );
 }
 
-*/
-
 } /* namespace conversions */
diff --git a/aconversions/src/conversions/re2fa/Brzozowski.h b/aconversions/src/conversions/re2fa/Brzozowski.h
index 7e36f8f585..2a07024236 100644
--- a/aconversions/src/conversions/re2fa/Brzozowski.h
+++ b/aconversions/src/conversions/re2fa/Brzozowski.h
@@ -12,10 +12,12 @@
 #include <map>
 #include <set>
 
+#include <automaton/State.h>
+
 #include "AbstractREtoFAConverter.h"
+#include "RegExpComparator.h"
 #include "../../derivatives/BrzozowskiDerivative.h"
 #include "../../utils/RegExpUtils.h"
-#include "../../utils/AutomatonUtils.h"
 
 namespace conversions
 {
@@ -30,27 +32,25 @@ public:
     const automaton::FSM convert( void );
 
 private:
-    /*
     class StateBuilder
     {
     public:
         StateBuilder( void );
-        const automaton::State & constructState( const regexp::RegExp & re );
+        const automaton::State & getState( const regexp::RegExp & re );
 
     private:
-        const std::string createNewName( void );
+        std::string createNewName( void );
 
-        std::map<const regexp::RegExp, automaton::State> m_states;
+        std::map<const regexp::RegExp, automaton::State*, RegExpComparator> m_states;
         unsigned int m_stateId;
     };
-    */
 
     struct BrzozowskiTransition
     {
         const regexp::RegExp & m_from, & m_to;
-        const regexp::RegExpSymbol * m_regexpSymbol;
+        const regexp::RegExpSymbol & m_regexpSymbol;
 
-        BrzozowskiTransition( const regexp::RegExp & from, const regexp::RegExpSymbol * symb, const regexp::RegExp & to );
+        BrzozowskiTransition( const regexp::RegExp & from, const regexp::RegExpSymbol & symb, const regexp::RegExp & to );
     };
 
     std::list<BrzozowskiTransition> m_transitions;
diff --git a/aconversions/src/conversions/re2fa/Makefile b/aconversions/src/conversions/re2fa/Makefile
index a9def9363d..7d9ba70052 100644
--- a/aconversions/src/conversions/re2fa/Makefile
+++ b/aconversions/src/conversions/re2fa/Makefile
@@ -6,7 +6,7 @@ all: are2fa.glushkov are2fa.brzozowski are2fa.thompson
 are2fa.glushkov: are2fa.glushkov.o Glushkov.o AbstractREtoFAConverter.o RegExpUtils.o ConversionException.o
 	$(LD) $(LDFLAGS) $^ -o $@
 
-are2fa.brzozowski: are2fa.brzozowski.o Brzozowski.o AbstractREtoFAConverter.o BrzozowskiDerivative.o ConversionException.o RegExpNormalize.o RegExpUtils.o
+are2fa.brzozowski: are2fa.brzozowski.o Brzozowski.o AbstractREtoFAConverter.o BrzozowskiDerivative.o ConversionException.o RegExpComparator.o RegExpNormalize.o RegExpUtils.o
 	$(LD) $(LDFLAGS) $^ -o $@
 
 are2fa.thompson: are2fa.thompson.o Thompson.o AbstractREtoFAConverter.o AutomatonUtils.o ConversionException.o RegExpUtils.o
@@ -26,7 +26,10 @@ are2fa.thompson.o: are2fa.thompson.cpp Thompson.h AbstractREtoFAConverter.h
 AbstractREtoFAConverter.o: AbstractREtoFAConverter.cpp AbstractREtoFAConverter.h
 	$(CXX) $(CXXFLAGS) $< -o $@
 
-Brzozowski.o: Brzozowski.cpp Brzozowski.h AbstractREtoFAConverter.h
+Brzozowski.o: Brzozowski.cpp Brzozowski.h AbstractREtoFAConverter.h RegExpComparator.h
+	$(CXX) $(CXXFLAGS) $< -o $@
+
+RegExpComparator.o: RegExpComparator.cpp RegExpComparator.h
 	$(CXX) $(CXXFLAGS) $< -o $@
 
 Glushkov.o: Glushkov.cpp Glushkov.h AbstractREtoFAConverter.h
diff --git a/aconversions/src/conversions/re2fa/RegExpComparator.cpp b/aconversions/src/conversions/re2fa/RegExpComparator.cpp
new file mode 100644
index 0000000000..0f2313bbdb
--- /dev/null
+++ b/aconversions/src/conversions/re2fa/RegExpComparator.cpp
@@ -0,0 +1,157 @@
+/*
+ * RegExpComparator.cpp
+ *
+ *  Created on: 5. 2. 2014
+ *      Author: tomas
+ */
+
+#include "RegExpComparator.h"
+#include <typeinfo>
+#include <iostream>
+using namespace regexp;
+
+namespace conversions
+{
+
+RegExpComparator::RegExpComparator( void )
+{
+
+}
+
+bool RegExpComparator::operator() ( const RegExp & lhs, const RegExp & rhs ) //const
+{
+    RegExpElement * leftRoot = const_cast<RegExp&>( lhs ).getRegExp( );
+    RegExpElement * rightRoot = const_cast<RegExp&>( rhs ).getRegExp( );
+
+    if( ! leftRoot ){
+        std::cout << "NOLEFT" << std::endl;
+        const_cast<RegExp&>(lhs).toXML( cout );
+    }
+    if( ! rightRoot ){
+        std::cout << "NORIGHT" << std::endl;
+        const_cast<RegExp&>(rhs).toXML( cout );
+    }
+
+    return compare( leftRoot, rightRoot ) == -1;
+}
+
+int RegExpComparator::compare( RegExpElement * lhs, RegExpElement * rhs ) const
+{
+    Alternation* lhsAlt = dynamic_cast<Alternation*>( lhs ), *rhsAlt = dynamic_cast<Alternation*>( rhs );
+    Concatenation* lhsConcat = dynamic_cast<Concatenation*>( lhs ), *rhsConcat = dynamic_cast<Concatenation*>( rhs );
+    Iteration* lhsIter = dynamic_cast<Iteration*>( lhs ), *rhsIter = dynamic_cast<Iteration*>( rhs );
+    RegExpSymbol* lhsSymb = dynamic_cast<RegExpSymbol*>( lhs ), *rhsSymb = dynamic_cast<RegExpSymbol*>( rhs );
+    RegExpEmpty* lhsEmpty = dynamic_cast<RegExpEmpty*>( lhs ), *rhsEmpty = dynamic_cast<RegExpEmpty*>( rhs );
+    RegExpEpsilon* lhsEps = dynamic_cast<RegExpEpsilon*>( lhs ), *rhsEps = dynamic_cast<RegExpEpsilon*>( rhs );
+
+    if( ( lhsAlt && rhsAlt ) || ( lhsConcat && rhsConcat ) || ( lhsIter && rhsIter ) || ( lhsSymb && rhsSymb ) ||
+            ( lhsEmpty && rhsEmpty ) || ( lhsEps && rhsEps ) )
+    {
+        if( lhsAlt )
+            return compare( lhsAlt, rhsAlt );
+        if( lhsConcat )
+            return compare( lhsConcat, rhsConcat );
+        if( lhsIter )
+            return compare( lhsIter, rhsIter );
+        if( lhsSymb )
+            return compare( lhsSymb, rhsSymb );
+        if( lhsEmpty )
+            return compare( lhsEmpty, rhsEmpty );
+        if( lhsEps )
+            return compare( lhsEps, rhsEps );
+    }
+    else
+    {
+        int leftOrder = 0, rightOrder = 0;
+
+        if( lhsAlt ) leftOrder = 1;
+        if( lhsConcat ) leftOrder = 2;
+        if( lhsIter ) leftOrder = 3;
+        if( lhsSymb ) leftOrder = 4;
+        if( lhsEmpty ) leftOrder = 5;
+        if( lhsEps ) leftOrder = 6;
+
+        if( rhsAlt ) rightOrder = 1;
+        if( rhsConcat ) rightOrder = 2;
+        if( rhsIter ) rightOrder = 3;
+        if( rhsSymb ) rightOrder = 4;
+        if( rhsEmpty ) rightOrder = 5;
+        if( rhsEps ) rightOrder = 6;
+
+        if ( leftOrder < rightOrder ) return -1;
+        if ( leftOrder > rightOrder ) return 1;
+        return 0;
+    }
+
+    throw ConversionException( "such exception, many errors" );
+}
+
+int RegExpComparator::compare( Alternation * lhs, Alternation * rhs ) const
+{
+    auto lhsEnd = lhs->getElements( ).end( );
+    auto rhsEnd = rhs->getElements( ).end( );
+
+    for( auto lhsIt = lhs->getElements( ).begin( ), rhsIt = rhs->getElements( ).begin( ); ; lhsIt ++, rhsIt ++ )
+    {
+        if( lhsIt == lhsEnd && rhsIt != rhsEnd )
+            return -1;
+        if( lhsIt != lhsEnd && rhsIt == rhsEnd )
+            return 1;
+        if( lhsIt == lhsEnd && rhsIt == rhsEnd )
+            return 0;
+
+        int res = compare ( *lhsIt, *rhsIt );
+        if( res != 0 )
+            return res;
+    }
+
+    return 0;
+}
+
+int RegExpComparator::compare( Concatenation * lhs, Concatenation * rhs ) const
+{
+    auto lhsEnd = lhs->getElements( ).end( );
+    auto rhsEnd = rhs->getElements( ).end( );
+
+    for( auto lhsIt = lhs->getElements( ).begin( ), rhsIt = rhs->getElements( ).begin( ); ; lhsIt ++, rhsIt ++ )
+    {
+        if( lhsIt == lhsEnd && rhsIt != rhsEnd )
+            return -1;
+        if( lhsIt != lhsEnd && rhsIt == rhsEnd )
+            return 1;
+        if( lhsIt == lhsEnd && rhsIt == rhsEnd )
+            return 0;
+
+        int res = compare ( *lhsIt, *rhsIt );
+        if( res != 0 )
+            return res;
+    }
+
+    return 0;
+}
+
+int RegExpComparator::compare( Iteration * lhs, Iteration * rhs ) const
+{
+    return compare( lhs->getElement( ), rhs->getElement( ) );
+}
+
+int RegExpComparator::compare( RegExpSymbol * lhs, RegExpSymbol * rhs ) const
+{
+    if( lhs->getSymbol( ) < rhs->getSymbol( ) )
+        return -1;
+    if( lhs->getSymbol( ) > rhs->getSymbol( ) )
+        return 1;
+    return 0;
+}
+
+int RegExpComparator::compare( RegExpEmpty * lhs, RegExpEmpty * rhs ) const
+{
+    return 0;
+}
+
+int RegExpComparator::compare( RegExpEpsilon * lhs, RegExpEpsilon * rhs ) const
+{
+    return 0;
+}
+
+} /* namespace conversions */
diff --git a/aconversions/src/conversions/re2fa/RegExpComparator.h b/aconversions/src/conversions/re2fa/RegExpComparator.h
new file mode 100644
index 0000000000..dc267d5293
--- /dev/null
+++ b/aconversions/src/conversions/re2fa/RegExpComparator.h
@@ -0,0 +1,46 @@
+/*
+ * RegExpComparator.h
+ *
+ *  Created on: 5. 2. 2014
+ *      Author: tomas
+ */
+
+#ifndef REGEXPCOMPARATOR_H_
+#define REGEXPCOMPARATOR_H_
+
+#include <map>
+#include <string>
+
+#include <regexp/RegExp.h>
+#include <regexp/RegExpElement.h>
+#include <regexp/Alternation.h>
+#include <regexp/Concatenation.h>
+#include <regexp/Iteration.h>
+#include <regexp/RegExpSymbol.h>
+#include <regexp/RegExpEmpty.h>
+#include <regexp/RegExpEpsilon.h>
+
+#include "../../utils/ConversionException.h"
+
+namespace conversions
+{
+
+class RegExpComparator
+{
+public:
+    RegExpComparator( void );
+    bool operator() ( const regexp::RegExp & lhs, const regexp::RegExp & rhs ); //const;
+
+private:
+    int compare( regexp::RegExpElement * lhs, regexp::RegExpElement * rhs ) const;
+    int compare( regexp::Alternation * lhs, regexp::Alternation * rhs ) const;
+    int compare( regexp::Concatenation * lhs, regexp::Concatenation * rhs ) const;
+    int compare( regexp::Iteration * lhs, regexp::Iteration * rhs ) const;
+    int compare( regexp::RegExpSymbol * lhs, regexp::RegExpSymbol * rhs ) const;
+    int compare( regexp::RegExpEmpty * lhs, regexp::RegExpEmpty * rhs ) const;
+    int compare( regexp::RegExpEpsilon * lhs, regexp::RegExpEpsilon * rhs ) const;
+};
+
+} /* namespace conversions */
+
+#endif /* REGEXPCOMPARATOR_H_ */
diff --git a/aconversions/src/conversions/re2fa/are2fa.brzozowski.cpp b/aconversions/src/conversions/re2fa/are2fa.brzozowski.cpp
index 63efce4a2e..e23912e899 100644
--- a/aconversions/src/conversions/re2fa/are2fa.brzozowski.cpp
+++ b/aconversions/src/conversions/re2fa/are2fa.brzozowski.cpp
@@ -25,7 +25,7 @@ int main(int argc, char** argv)
         RegExp re = RegExpParser::parse(tokens);
 
         Brzozowski conv( re );
-        conv.convert( );//.toXML( cout );
+        conv.convert( ).toXML( cout );
     }
     catch( AlibException & e )
     {
diff --git a/aconversions/src/derivatives/BrzozowskiDerivative.cpp b/aconversions/src/derivatives/BrzozowskiDerivative.cpp
index 6bf15d2e57..c74652bb99 100644
--- a/aconversions/src/derivatives/BrzozowskiDerivative.cpp
+++ b/aconversions/src/derivatives/BrzozowskiDerivative.cpp
@@ -17,12 +17,19 @@ namespace conversions
 BrzozowskiDerivative::BrzozowskiDerivative( const RegExp & re ) : m_re( re )
 {
     //FIXME in alib!
+    RegExpNormalize norm;
     m_regexpRoot = const_cast<RegExp&>( m_re ).getRegExp( );
+
+    m_reNorm = norm.normalize( m_regexpRoot );
+    m_regexpRoot = m_reNorm.getRegExp( );
+
+    // const_cast<RegExp&>(m_reNorm).toXML( cout );
 }
 
 RegExp BrzozowskiDerivative::derivative ( const list<RegExpElement*> & dString ) const
 {
     RegExpElement * expression = m_regexpRoot;
+
     for( const auto & dSymbol : dString ) // dV/d(ab) = d( dV/da )/db
     {
         // FIXME: memory leak
diff --git a/aconversions/src/derivatives/BrzozowskiDerivative.h b/aconversions/src/derivatives/BrzozowskiDerivative.h
index d9cc458e47..135bdf4aff 100644
--- a/aconversions/src/derivatives/BrzozowskiDerivative.h
+++ b/aconversions/src/derivatives/BrzozowskiDerivative.h
@@ -48,6 +48,7 @@ private:
     regexp::RegExpElement * derivative( regexp::RegExpEmpty * element, const regexp::RegExpSymbol & dSymbol ) const;
 
     const regexp::RegExp & m_re;
+    regexp::RegExp m_reNorm;
     regexp::RegExpElement* m_regexpRoot;
 };
 
diff --git a/aconversions/src/utils/RegExpUtils.cpp b/aconversions/src/utils/RegExpUtils.cpp
index 86b444a438..431ba67ba8 100644
--- a/aconversions/src/utils/RegExpUtils.cpp
+++ b/aconversions/src/utils/RegExpUtils.cpp
@@ -11,7 +11,7 @@ namespace conversions
 
 bool RegExpUtils::isRegExpEmpty( const RegExp & re )
 {
-    return const_cast<RegExp&>( re ).getRegExp( ) == NULL;
+    return dynamic_cast<RegExpEmpty*>( const_cast<RegExp&>( re ).getRegExp( ) ) != NULL;
 }
 
 set<RegExpSymbol> RegExpUtils::getRegExpAlphabet( const RegExp & re )
-- 
GitLab