From b07e8e3ac7aaae062ce25ff7d88e2df159140381 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Mon, 14 Apr 2014 13:31:16 +0200
Subject: [PATCH] aconversions: Brzozowski Derivation Method - follow math
 precisely

---
 aconversions/src/re2fa/Brzozowski.cpp         | 45 +++++++----------
 aconversions/src/re2fa/Brzozowski.h           | 24 ---------
 .../re2rg/re2rrg/BrzozowskiDerivationRRG.cpp  | 50 +++++++------------
 .../re2rg/re2rrg/BrzozowskiDerivationRRG.h    | 11 ----
 4 files changed, 35 insertions(+), 95 deletions(-)

diff --git a/aconversions/src/re2fa/Brzozowski.cpp b/aconversions/src/re2fa/Brzozowski.cpp
index 90e268a227..24415f7739 100644
--- a/aconversions/src/re2fa/Brzozowski.cpp
+++ b/aconversions/src/re2fa/Brzozowski.cpp
@@ -54,14 +54,9 @@ FSM Brzozowski::convert( void )
                 RegExp derived = deriv.derivation( a );
                 derived = opt.optimize( derived );
 
-                // no "trash state" (though algorithm probably considers it)
-                if( ! derived.isEmpty( ) )
-                {
-                    if( ! isInSet( derived, Q ) ) // if this state has already been found, do not add
-                        Qi.at( i ).insert( derived );
-
-                    m_transitions.insert( Transition( regexp, Symbol( a.getSymbol( ) ), derived ) );
-                }
+                // this will also add \emptyset as a regexp (and as FA state)
+                if( ! isInSet( derived, Q ) ) // if this state has already been found, do not add
+                    Qi.at( i ).insert( derived );
 
             }
         }
@@ -82,8 +77,20 @@ FSM Brzozowski::convert( void )
     for( const auto & a : alphabet )
         m_fsm.addInputSymbol( a.getSymbol( ) );
 
-    for( const auto & t : m_transitions )
-        m_fsm.addTransition( builder.getState( t.m_from ), t.m_symbol, builder.getState( t.m_to ) );
+    for( const auto & r : Q )
+    {
+        RegExpDerivation deriv( r );
+
+        for( const auto & a: m_fsm.getInputAlphabet( ) )
+        {
+            RegExp derived = deriv.derivation( a );
+            derived = opt.optimize( derived );
+
+            TransitionFSM t( builder.getState( r ), a, builder.getState( derived ) );
+            if( ! isInSet( t, m_fsm.getTransitions( ) ) )
+                m_fsm.addTransition( t );
+        }
+    }
 
     m_fsm.addInitialState( builder.getState( V ) );
 
@@ -96,24 +103,6 @@ FSM Brzozowski::convert( void )
 
 // ----------------------------------------------------------------------------
 
-Brzozowski::Transition::Transition( const RegExp & from, const Symbol & symb, const RegExp & to )
-    : m_from( from ), m_to( to ), m_symbol( symb )
-{
-
-}
-
-bool Brzozowski::Transition::operator<( const Transition & x ) const
-{
-    if( m_from != x.m_from )
-        return m_from < x.m_from;
-    else if( m_symbol != x.m_symbol )
-        return m_symbol < x.m_symbol;
-    else
-        return m_to < x.m_to;
-}
-
-// ----------------------------------------------------------------------------
-
 Brzozowski::StateBuilder::StateBuilder( const set<RegExp> & Q )
 {
     m_stateId = 0;
diff --git a/aconversions/src/re2fa/Brzozowski.h b/aconversions/src/re2fa/Brzozowski.h
index d455ff5308..c858dfb5c7 100644
--- a/aconversions/src/re2fa/Brzozowski.h
+++ b/aconversions/src/re2fa/Brzozowski.h
@@ -77,30 +77,6 @@ private:
 
         unsigned int m_stateId;
     };
-
-    /**
-     * Stores original regular epxression (m_from), and result of derivation (m_to) over m_symbol.
-     */
-    struct Transition
-    {
-        const regexp::RegExp m_from, m_to;
-        const alphabet::Symbol m_symbol;
-
-        /**
-         * @param from original regexp
-         * @param to resulting regexp
-         * @param symb
-         */
-        Transition( const regexp::RegExp & from, const alphabet::Symbol & symb, const regexp::RegExp & to );
-
-        bool operator<( const Transition & other ) const;
-    };
-
-    /**
-     * Set of transitions.
-     * @see Brzozowski::Transition
-     */
-    std::set<Transition> m_transitions;
 };
 
 } /* namespace conversions */
diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
index eff5b812bf..0797f640ce 100644
--- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
+++ b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
@@ -56,13 +56,8 @@ RightRegularGrammar BrzozowskiDerivationRRG::convert( void )
                 RegExp derived = deriv.derivation( a );
                 derived = opt.optimize( derived );
 
-                if( ! derived.isEmpty( ) )
-                {
-                    if( ! isInSet( derived, N ) ) // if this state has already been found, do not add
-                        Ni.at( i ).insert( derived );
-
-                    m_transitions.insert( Transition( regexp, Symbol( a.getSymbol( ) ), derived ) );
-                }
+                if( ! isInSet( derived, N ) ) // if this state has already been found, do not add
+                    Ni.at( i ).insert( derived );
             }
         }
 
@@ -77,18 +72,27 @@ RightRegularGrammar BrzozowskiDerivationRRG::convert( void )
 
     NonTerminalBuilder builder( N, m_grammar );
 
-    for( const auto & t : m_transitions )
+    for( const auto & r : N )
     {
-        list<Symbol> leftSide = { builder.getNonTerminal( t.m_from ) };
-        list<Symbol> rightSide = { t.m_symbol, builder.getNonTerminal( t.m_to ) };
-        Rule r( leftSide, rightSide );
-        m_grammar.addRule( r );
+        RegExpDerivation deriv( r );
 
-        if( t.m_to.containsEmptyString( ) )
+        for( const auto & a : alphabet )
         {
-            list<Symbol> rightSide = { t.m_symbol };
+            RegExp derived = deriv.derivation( a );
+            derived = opt.optimize( derived );
+
+            list<Symbol> leftSide = { builder.getNonTerminal( r ) };
+            list<Symbol> rightSide = { a, builder.getNonTerminal( derived ) };
+
             Rule r( leftSide, rightSide );
             m_grammar.addRule( r );
+
+            if( derived.containsEmptyString( ) )
+            {
+                list<Symbol> rightSide = { a };
+                Rule r( leftSide, rightSide );
+                m_grammar.addRule( r );
+            }
         }
     }
 
@@ -122,24 +126,6 @@ RightRegularGrammar BrzozowskiDerivationRRG::convert( void )
 
 // ----------------------------------------------------------------------------
 
-BrzozowskiDerivationRRG::Transition::Transition( const RegExp & from, const Symbol & symb, const RegExp & to )
-    : m_from( from ), m_to( to ), m_symbol( symb )
-{
-
-}
-
-bool BrzozowskiDerivationRRG::Transition::operator<( const Transition & x ) const
-{
-    if( m_from != x.m_from )
-        return m_from < x.m_from;
-    else if( m_symbol != x.m_symbol )
-        return m_symbol < x.m_symbol;
-    else
-        return m_to < x.m_to;
-}
-
-// ----------------------------------------------------------------------------
-
 BrzozowskiDerivationRRG::NonTerminalBuilder::NonTerminalBuilder( const set<RegExp> & Q, RightRegularGrammar & grammar ) : m_grammar( grammar )
 {
     m_nonTerminalId = 0;
diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h
index 47b1546dcc..90182cf0cc 100644
--- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h
+++ b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h
@@ -45,15 +45,6 @@ public:
     grammar::RightRegularGrammar convert( void );
 
 protected:
-    struct Transition
-    {
-        const regexp::RegExp m_from, m_to;
-        const alphabet::Symbol m_symbol;
-
-        Transition( const regexp::RegExp & from, const alphabet::Symbol & symb, const regexp::RegExp & to );
-        bool operator<( const Transition & other ) const;
-    };
-
     class NonTerminalBuilder
     {
     public:
@@ -66,8 +57,6 @@ protected:
         std::string createNewName( void );
         unsigned int m_nonTerminalId;
     };
-
-    std::set<Transition> m_transitions;
 };
 
 } /* namespace conversions */
-- 
GitLab