From 5d9af68bec5280f64b7fe30d50c417598d32cf46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <tomaspecka@gmail.com>
Date: Sat, 18 Jan 2014 20:23:45 +0100
Subject: [PATCH] Glushkov - symbol numbering

---
 .../src/conversions/re2fa/Glushkov.cpp        | 35 ++++++++++++++++++
 aconversions/src/conversions/re2fa/Glushkov.h | 22 +++++++++--
 aconversions/src/conversions/re2fa/Makefile   | 10 +++--
 aconversions/src/utils/RegExpUtils.cpp        | 37 +++++++++++++++++--
 aconversions/src/utils/RegExpUtils.h          | 11 +++++-
 aconversions/src/utils/utils.h                |  1 +
 6 files changed, 104 insertions(+), 12 deletions(-)

diff --git a/aconversions/src/conversions/re2fa/Glushkov.cpp b/aconversions/src/conversions/re2fa/Glushkov.cpp
index c784a226a5..024e034ce5 100644
--- a/aconversions/src/conversions/re2fa/Glushkov.cpp
+++ b/aconversions/src/conversions/re2fa/Glushkov.cpp
@@ -20,7 +20,42 @@ Glushkov::Glushkov( const RegExp & re ) : AbstractREtoFAConverter( re )
 
 const FSM Glushkov::convert( void )
 {
+    initNumberSymbols( );
+    /*
+    constructBeginSymbolSet( );
+    constructNeighbourSet( );
+    constructEndSymbolSet( );
+     */
+
     return m_fsm;
 }
 
+void Glushkov::initNumberSymbols( void )
+{
+    map<RegExpSymbol, int> iter;
+
+    for( auto symb : RegExpUtils::getRegExpSymbols( m_re ) )
+    {
+        if( ! isKeyInMap( symb, iter ) )
+            iter.insert( std::pair<RegExpSymbol,int>( symb, 0 ) );
+
+        m_numberedSymbols.push_back( NumberedSymbol( symb, iter[ symb ] ++ ) );
+    }
+}
+
+// ----------------------------------------------------------------------------
+
+Glushkov::NumberedSymbol::NumberedSymbol( const RegExpSymbol & symbol, int i ) : m_symbol( symbol ), m_i( i )
+{
+
+}
+
+std::string Glushkov::NumberedSymbol::constructStateName( void )
+{
+    ostringstream oss;
+    oss << m_symbol.getSymbol( ) << "_" << m_i;
+
+    return oss.str( );
+}
+
 } /* namespace conversions */
diff --git a/aconversions/src/conversions/re2fa/Glushkov.h b/aconversions/src/conversions/re2fa/Glushkov.h
index 5da8240f33..c528a333f1 100644
--- a/aconversions/src/conversions/re2fa/Glushkov.h
+++ b/aconversions/src/conversions/re2fa/Glushkov.h
@@ -8,9 +8,13 @@
 #ifndef GLUSHKOV_H_
 #define GLUSHKOV_H_
 
+#include <sstream>
+#include <map>
+
+#include <alphabet/Symbol.h>
 #include <regexp/RegExp.h>
-#include <regexp/RegExpSymbol.h>
 
+#include "../../utils/RegExpUtils.h"
 #include "AbstractREtoFAConverter.h"
 
 namespace conversions
@@ -18,6 +22,9 @@ namespace conversions
 
 /**
  * Converts regular expression to finite automata using Glushkov's NFA construction algorithm.
+ * Sources:
+ *  - Melichar 2.107
+ *
  */
 class Glushkov : public AbstractREtoFAConverter
 {
@@ -26,13 +33,20 @@ public:
     const automaton::FSM convert( void );
 
 private:
-    struct Struct
+    struct NumberedSymbol
     {
-        regexp::RegExpSymbol & m_symbol;
+        const regexp::RegExpSymbol & m_symbol;
         int m_i;
 
-        std::string constructStateName();
+        NumberedSymbol( const regexp::RegExpSymbol & symb, int i );
+        std::string constructStateName( void );
     };
+
+
+    void initNumberSymbols( void );
+
+    std::list<NumberedSymbol> m_numberedSymbols;
+
 };
 
 } /* namespace conversions */
diff --git a/aconversions/src/conversions/re2fa/Makefile b/aconversions/src/conversions/re2fa/Makefile
index e8758d4474..d1943d04da 100644
--- a/aconversions/src/conversions/re2fa/Makefile
+++ b/aconversions/src/conversions/re2fa/Makefile
@@ -1,10 +1,10 @@
 all: are2fa.glushkov are2fa.brzozowski are2fa.thompson
-	#mv are2fa.glushkov $(BIN_DIR)
+	mv are2fa.glushkov $(BIN_DIR)
 	#mv are2fa.brzozowski $(BIN_DIR)
 	mv are2fa.thompson $(BIN_DIR)
 
-are2fa.glushkov: are2fa.glushkov.o Glushkov.o AbstractREtoFAConverter.o
-	#$(LD) $(LDFLAGS) $^ -o $@
+are2fa.glushkov: are2fa.glushkov.o Glushkov.o AbstractREtoFAConverter.o RegExpUtils.o ConversionException.o
+	$(LD) $(LDFLAGS) $^ -o $@
 
 are2fa.brzozowski: are2fa.brzozowski.o Brzozowski.o AbstractREtoFAConverter.o
 	#$(LD) $(LDFLAGS) $^ -o $@
@@ -22,7 +22,6 @@ are2fa.glushkov.o: are2fa.glushkov.cpp Glushkov.h AbstractREtoFAConverter.h
 are2fa.thompson.o: are2fa.thompson.cpp Thompson.h AbstractREtoFAConverter.h
 	$(CXX) $(CXXFLAGS) $< -o $@
 
-
 AbstractREtoFAConverter.o: AbstractREtoFAConverter.cpp AbstractREtoFAConverter.h
 	$(CXX) $(CXXFLAGS) $< -o $@
 
@@ -39,6 +38,9 @@ Thompson.o: Thompson.cpp Thompson.h AbstractREtoFAConverter.h ../../utils/Automa
 AutomatonUtils.o: ../../utils/AutomatonUtils.cpp ../../utils/AutomatonUtils.h ../../utils/utils.h
 	$(CXX) $(CXXFLAGS) $< -o $@
 
+RegExpUtils.o: ../../utils/RegExpUtils.cpp ../../utils/RegExpUtils.h ../../utils/utils.h ../../utils/ConversionException.h
+	$(CXX) $(CXXFLAGS) $< -o $@
+
 ConversionException.o: ../../utils/ConversionException.cpp ../../utils/ConversionException.h
 	$(CXX) $(CXXFLAGS) $< -o $@
 
diff --git a/aconversions/src/utils/RegExpUtils.cpp b/aconversions/src/utils/RegExpUtils.cpp
index cc4532bf52..6d7feab78c 100644
--- a/aconversions/src/utils/RegExpUtils.cpp
+++ b/aconversions/src/utils/RegExpUtils.cpp
@@ -1,17 +1,48 @@
 #include "RegExpUtils.h"
 
 using namespace alphabet;
+using namespace regexp;
 using namespace std;
 
 namespace conversions
 {
 
-set<Symbol> getAlphabet( void )
+list<RegExpSymbol> RegExpUtils::getRegExpSymbols( const RegExp & re )
 {
-    std::set<Symbol> alphabet;
-    // iterate through
+    // returning list to preserver ordering of symbols in the regexp tree
+
+    list<RegExpSymbol> alphabet;
+
+    RegExp& r = const_cast<RegExp&>( re );
+    traverseSymbols( r.getRegExp(), alphabet );
 
     return alphabet;
 }
 
+
+void RegExpUtils::traverseSymbols( RegExpElement * element, list<RegExpSymbol> & alphabet )
+{
+    Alternation* alternation = dynamic_cast<Alternation*>(element);
+    Concatenation* concatenation = dynamic_cast<Concatenation*>(element);
+    Iteration* iteration = dynamic_cast<Iteration*>(element);
+    RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>(element);
+
+    if( symbol )
+        alphabet.push_back( * symbol );
+
+    else if( alternation )
+        for( auto element : alternation->getElements() )
+            traverseSymbols( element, alphabet );
+
+    else if( concatenation )
+        for( auto element : concatenation->getElements() )
+            traverseSymbols( element, alphabet );
+
+    else if( iteration )
+        traverseSymbols( iteration->getElement(), alphabet );
+
+
+    throw ConversionException( "Captain's log. Stardate 3413.6. Approaching TraverseUtils, class RegExpUtils planet. Encountered invalid RegExpElement. Sending away team to explore." );
+}
+
 } /* namespace conversions */
diff --git a/aconversions/src/utils/RegExpUtils.h b/aconversions/src/utils/RegExpUtils.h
index aff77de130..e67b227bf3 100644
--- a/aconversions/src/utils/RegExpUtils.h
+++ b/aconversions/src/utils/RegExpUtils.h
@@ -1,7 +1,15 @@
 #ifndef REGEXPUTILS_H_
 #define REGEXPUTILS_H_
 
+#include <regexp/RegExp.h>
+#include <regexp/RegExpElement.h>
+#include <regexp/Alternation.h>
+#include <regexp/Concatenation.h>
+#include <regexp/Iteration.h>
+#include <regexp/RegExpSymbol.h>
+
 #include "utils.h"
+#include "ConversionException.h"
 
 namespace conversions
 {
@@ -9,9 +17,10 @@ namespace conversions
 class RegExpUtils
 {
 public:
-    static std::set<alphabet::Symbol> getAlphabet( void );
+    static std::list<regexp::RegExpSymbol> getRegExpSymbols( const regexp::RegExp & re );
 
 private:
+    static void traverseSymbols( regexp::RegExpElement * element, std::list<regexp::RegExpSymbol> & alphabet );
 };
 
 } /* namespace conversions */
diff --git a/aconversions/src/utils/utils.h b/aconversions/src/utils/utils.h
index 4d14be70ae..609a4e61b1 100644
--- a/aconversions/src/utils/utils.h
+++ b/aconversions/src/utils/utils.h
@@ -12,6 +12,7 @@ namespace conversions
 {
 
 #define isInSet(x,set) ( (set).find((x)) != (set).end())
+#define isKeyInMap(key,map) ( (map).find((key)) != (map).end())
 
 enum SuffixType
 {
-- 
GitLab