From eb00bf77f770eaee57b07ec40864700d1b03c5ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Sun, 7 Sep 2014 21:36:35 +0200
Subject: [PATCH] algo: re2rg: Brzozowski

---
 .../re2rg/re2rrg/BrzozowskiDerivationRRG.cpp  | 134 ------------------
 .../re2rg/re2rrg/BrzozowskiDerivationRRG.h    |  58 --------
 aconversions2/src/ConversionHandler.cpp       |  11 +-
 .../re2rg/re2rrg/BrzozowskiDerivationRRG.cpp  | 132 +++++++++++++++++
 .../re2rg/re2rrg/BrzozowskiDerivationRRG.h    |  42 ++++++
 5 files changed, 179 insertions(+), 198 deletions(-)
 delete mode 100644 aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
 delete mode 100644 aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h
 create mode 100644 alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
 create mode 100644 alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h

diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
deleted file mode 100644
index 246199f696..0000000000
--- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * BrzozowskiDerivationRRG.cpp
- *
- *  Created on: 6. 3. 2014
- *      Author: tomas
- */
-
-#include "BrzozowskiDerivationRRG.h"
-
-using namespace alib;
-using namespace alphabet;
-using namespace grammar;
-using namespace regexp;
-
-namespace conversions
-{
-
-BrzozowskiDerivationRRG::BrzozowskiDerivationRRG( const RegExp & re ) : m_re( re )
-{
-
-}
-
-BrzozowskiDerivationRRG::~BrzozowskiDerivationRRG( void )
-{
-
-}
-
-RightRegularGrammar BrzozowskiDerivationRRG::convert( void )
-{
-    RegExpOptimize opt;
-
-    // 1.
-    RegExp V = opt.optimize( m_re );
-
-    set<Symbol> alphabet = m_re.getAlphabet( );
-
-    set<RegExp> N = { V };
-    deque<set<RegExp>> Ni;
-
-    Ni.push_back( set<RegExp>( ) );
-    Ni.at( 0 ).insert( V );
-
-    int i = 1;
-
-    // 2.
-    while( ! Ni.at( i - 1 ).empty( ) )
-    {
-        Ni.push_back( set<RegExp>( ) ); // initialize set Q_i
-
-        for( const auto & regexp : Ni.at( i - 1 ) )
-        {
-            RegExpDerivation deriv( regexp );
-
-            for( const auto & a : alphabet )
-            {
-                RegExp derived = deriv.derivation( a );
-                derived = opt.optimize( derived );
-
-                if( ! isInSet( derived, N ) ) // if this state has already been found, do not add
-                    Ni.at( i ).insert( derived );
-            }
-        }
-
-        N.insert( Ni.at( i ).begin( ), Ni.at( i ).end( ) );
-        i += 1;
-    }
-
-    // 3.
-    RightRegularGrammar grammar;
-    map<RegExp, Symbol> nonterminalMap;
-    int nonterminalId = 0;
-
-    for( const auto & s : alphabet )
-        grammar.addTerminalSymbol( s.getSymbol( ) );
-
-    for( const auto & r : N )
-    {
-        Symbol nt = grammar.createUniqueNonTerminalSymbol( toBase26( nonterminalId ++ ) );
-        nonterminalMap.insert( pair<RegExp, Symbol>( r, nt ) );
-    }
-
-    for( const auto & r : N )
-    {
-        RegExpDerivation deriv( r );
-
-        for( const auto & a : alphabet )
-        {
-            RegExp derived = deriv.derivation( a );
-            derived = opt.optimize( derived );
-
-            list<Symbol> leftSide = { nonterminalMap.find( r )->second };
-            list<Symbol> rightSide = { a, nonterminalMap.find( derived )->second };
-
-            Rule r( leftSide, rightSide );
-            grammar.addRule( r );
-
-            if( derived.containsEmptyString( ) )
-            {
-                list<Symbol> rightSide = { a };
-                Rule r( leftSide, rightSide );
-                grammar.addRule( r );
-            }
-        }
-    }
-
-    grammar.setStartSymbol( nonterminalMap.find( V )->second );
-
-    if( V.containsEmptyString( ) )
-    {
-        list<Symbol> leftSide = { nonterminalMap.find( V )->second };
-        list<Symbol> rightSide;
-
-        if( grammar.isNonTerminalOnRightSideOfAnyRule( grammar.getStartSymbol( ) ) )
-        {
-            Symbol newStart = grammar.createUniqueNonTerminalSymbol( grammar.getStartSymbol( ).getSymbol( ), false );
-
-            list<Symbol> leftSideNewStart = { newStart };
-            for( const auto & rule : grammar.getRules( ) )
-                if( rule.getLeftSide( ).front( ) == grammar.getStartSymbol( ) )
-                    grammar.addRule( Rule( leftSideNewStart, rule.getRightSide( ) ) );
-
-            grammar.setStartSymbol( newStart );
-            grammar.addRule( Rule( leftSideNewStart, rightSide ) );
-        }
-        else
-        {
-            grammar.addRule( Rule ( leftSide, rightSide ) );
-        }
-    }
-
-    return grammar;
-}
-
-} /* namespace conversions */
diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h
deleted file mode 100644
index 28be812b35..0000000000
--- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * BrzozowskiDerivationRRG.h
- *
- *  Created on: 6. 3. 2014
- *      Author: tomas
- */
-
-#ifndef BRZOZOWSKIDERIVATIONRRG_H_
-#define BRZOZOWSKIDERIVATIONRRG_H_
-
-#include <deque>
-#include <set>
-#include <map>
-
-#include <alphabet/Symbol.h>
-#include <grammar/Regular/RightRegularGrammar.h>
-#include <regexp/RegExp.h>
-
-#include "../../include/macros.h"
-#include "../../interface/IConversionRRG.h"
-#include "../../shared/Hexavigesimal.h"
-
-#include "RegExpOptimize.h"
-#include "RegExpDerivation.h"
-
-namespace conversions
-{
-
-/**
- * Converts reg. expression to right regular grammar using brzozowski derivation algorithm.
- * Source: Melichar 2.137
- */
-class BrzozowskiDerivationRRG : public IConversionRRG
-{
-public:
-    /**
-     * @param re Source regular expression.
-     */
-    BrzozowskiDerivationRRG( const regexp::RegExp & re );
-
-    ~BrzozowskiDerivationRRG( void );
-
-    /**
-     * Performs conversion.
-     * @return right regular grammar equivalent to source regexp.
-     */
-    grammar::RightRegularGrammar convert( void );
-
-protected:
-    /*
-     * input regexp
-     */
-    const regexp::RegExp & m_re;
-};
-
-} /* namespace conversions */
-
-#endif /* BRZOZOWSKIDERIVATIONRRG_H_ */
diff --git a/aconversions2/src/ConversionHandler.cpp b/aconversions2/src/ConversionHandler.cpp
index 18673ede36..a3c7a957a7 100644
--- a/aconversions2/src/ConversionHandler.cpp
+++ b/aconversions2/src/ConversionHandler.cpp
@@ -24,7 +24,7 @@
 //#include "conversions/rg2re/lrg2re/LRGAlgebraic.h"
 
 //#include "conversions/re2rg/re2rrg/GlushkovRRG.h"
-//#include "conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h"
+#include "conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h"
 
 #include "conversions/rg2rg/lrg2rrg/LeftToRightRegularGrammar.h"
 #include "conversions/rg2rg/rrg2lrg/RightToLeftRegularGrammar.h"
@@ -208,14 +208,13 @@ void ConversionHandler::convertREtoRG( void )
 
 void ConversionHandler::convertREtoRRG( void )
 {
-	const regexp::UnboundedRegExp regexp = alib::DataFactory::fromTokens<regexp::UnboundedRegExp>( m_tokens );
+	const regexp::RegExp regexp = alib::DataFactory::fromTokens<regexp::RegExp>(m_tokens);
 
-	switch( m_algorithm )
+	switch(m_algorithm)
 	{
 	case BRZOZOWSKI_DERIVATION: {
-/*			re2rg::BrzozowskiDerivationRRG conv( regexp );
-			grammar::RightRG rrg = conv.convert();
-			alib::DataFactory::toStdout(rrg);*/
+			re2rg::BrzozowskiDerivationRRG conv;
+			alib::DataFactory::toStdout(conv.convert(regexp));
 			break;
 		}
 	default: {
diff --git a/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp b/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
new file mode 100644
index 0000000000..af145f5ca2
--- /dev/null
+++ b/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
@@ -0,0 +1,132 @@
+/*
+ * BrzozowskiDerivationRRG.cpp
+ *
+ *  Created on: 6. 3. 2014
+ *      Author: tomas
+ */
+
+#include "BrzozowskiDerivationRRG.h"
+
+#include <set>
+#include <deque>
+#include <set>
+#include <vector>
+
+#include <label/StringLabel.h>
+#include <std/hexavigesimal.h>
+
+#include "../../../regexp/RegExpOptimize.h"
+#include "../../../regexp/RegExpDerivation.h"
+
+namespace re2rg
+{
+
+BrzozowskiDerivationRRG::BrzozowskiDerivationRRG(void){}
+BrzozowskiDerivationRRG::~BrzozowskiDerivationRRG(void){}
+
+
+void BrzozowskiDerivationRRG::Visit(void* userData, const regexp::FormalRegExp& regexp)
+{
+    std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData;
+    out.first = regexp.getAlphabet();
+    out.second = regexp.containsEmptyString();
+}
+void BrzozowskiDerivationRRG::Visit(void* userData, const regexp::UnboundedRegExp& regexp)
+{
+    std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData;
+    out.first = regexp.getAlphabet();
+    out.second = regexp.containsEmptyString();
+}
+
+grammar::RightRG BrzozowskiDerivationRRG::convert(const regexp::RegExp& regexp)
+{
+    // 1.
+    // regexp::RegExpOptimize opt;
+    // RegExp V = opt.optimize(regexp);
+    regexp::RegExp V = regexp;
+
+    std::pair<std::set<alphabet::Symbol>, bool> out({}, false);
+    regexp.getData().Accept((void*) &out, *this);
+    const std::set<alphabet::Symbol>& alphabet = out.first;
+
+    std::set<regexp::RegExp> N = { V };
+    std::deque<std::set<regexp::RegExp>> Ni;
+
+    Ni.push_back(std::set<regexp::RegExp>());
+    Ni.at(0).insert(V);
+
+    int i = 1;
+
+    // 2.
+    while(! Ni.at(i - 1).empty())
+    {
+        Ni.push_back(std::set<regexp::RegExp>()); // initialize set Q_i
+
+        for(const auto & dregexp : Ni.at( i - 1 ))
+        {
+            regexp::RegExpDerivation deriv;
+
+            for(const auto & a : alphabet)
+            {
+                string::LinearString string(std::vector<alphabet::Symbol>{a});
+                regexp::RegExp derived = deriv.derivation(dregexp, string);
+                // derived = opt.optimize(derived);
+
+                // this will also add \emptyset as a regexp (and as FA state)
+                if(N.count(derived) == 0) // if this state has already been found, do not add
+                    Ni.at(i).insert(derived);
+            }
+        }
+
+        N.insert(Ni.at(i).begin(), Ni.at(i).end());
+        i += 1;
+    }
+
+    // ------------------------------------------------------------------------
+    // 3.
+
+    int nonterminalId = 0;
+    std::map<regexp::RegExp, alphabet::Symbol> nonterminalMap;
+
+    alphabet::Symbol ntV(alphabet::LabeledSymbol(label::Label(label::StringLabel(std::toBase26(nonterminalId++)))));
+    nonterminalMap.insert(std::make_pair(V, ntV));
+
+    grammar::RightRG grammar(ntV);
+    grammar.setTerminalAlphabet(alphabet);
+
+    for(const auto & r : N)
+    {
+        if(V == r) continue;
+
+        alphabet::Symbol nt = alphabet::createUniqueSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel(std::toBase26(nonterminalId++))))), grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet());
+        nonterminalMap.insert(std::make_pair(r, nt));
+    }
+
+    for(const auto & r : N)
+    {
+        regexp::RegExpDerivation deriv;
+
+        for(const auto & a : alphabet)
+        {
+            string::LinearString string(std::vector<alphabet::Symbol>{a});
+            regexp::RegExp derived = deriv.derivation(r, string);
+            // derived = opt.optimize(derived);
+
+            grammar.addRule(nonterminalMap.find(r)->second, std::make_pair(a, nonterminalMap.find(derived)->second));
+
+            derived.getData().Accept((void*) &out, *this);
+            if(out.second) // if(derived.containsEmptyString())
+                grammar.addRule(nonterminalMap.find(r)->second, a);
+        }
+    }
+
+    grammar.setInitialSymbol(nonterminalMap.find(V)->second);
+
+    V.getData().Accept((void*) &out, *this);
+    if(out.second) // if(V.containsEmptyString())
+        grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY!
+
+    return grammar;
+}
+
+} /* namespace re2rg */
diff --git a/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h b/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h
new file mode 100644
index 0000000000..b61df215fa
--- /dev/null
+++ b/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h
@@ -0,0 +1,42 @@
+/*
+ * BrzozowskiDerivationRRG.h
+ *
+ *  Created on: 6. 3. 2014
+ *      Author: tomas
+ */
+
+#ifndef BRZOZOWSKIDERIVATIONRRG_H_
+#define BRZOZOWSKIDERIVATIONRRG_H_
+
+#include <grammar/Regular/RightRG.h>
+#include <regexp/RegExp.h>
+#include <regexp/formal/FormalRegExp.h>
+#include <regexp/unbounded/UnboundedRegExp.h>
+
+namespace re2rg
+{
+
+/**
+ * Converts reg. expression to right regular grammar using brzozowski derivation algorithm.
+ * Source: Melichar 2.137
+ */
+class BrzozowskiDerivationRRG : public regexp::VisitableRegExpBase::visitor_type
+{
+public:
+    BrzozowskiDerivationRRG(void);
+    ~BrzozowskiDerivationRRG(void);
+
+    /**
+     * Performs conversion.
+     * @return right regular grammar equivalent to source regexp.
+     */
+    grammar::RightRG convert(const regexp::RegExp& regexp);
+
+private:
+    void Visit(void*, const regexp::FormalRegExp& regexp);
+    void Visit(void*, const regexp::UnboundedRegExp& regexp);
+};
+
+} /* namespace re2rg */
+
+#endif /* BRZOZOWSKIDERIVATIONRRG_H_ */
-- 
GitLab