From f6ea746bfab66d6bb43e155ba3dea4808d89b87e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Sat, 6 Sep 2014 22:58:43 +0200
Subject: [PATCH] algo: brzozowski, thompson

---
 aconversions/src/re2fa/Brzozowski.cpp         | 111 --------
 aconversions/src/re2fa/Brzozowski.h           |  58 ----
 aconversions/src/re2fa/Thompson.cpp           | 148 ----------
 aconversions/src/re2fa/Thompson.h             |  82 ------
 aconversions2/src/ConversionHandler.cpp       |  21 +-
 .../src/conversions/re2fa/Brzozowski.cpp      | 128 +++++++++
 alib2algo/src/conversions/re2fa/Brzozowski.h  |  42 +++
 alib2algo/src/conversions/re2fa/Thompson.cpp  | 261 ++++++++++++++++++
 alib2algo/src/conversions/re2fa/Thompson.h    |  59 ++++
 9 files changed, 499 insertions(+), 411 deletions(-)
 delete mode 100644 aconversions/src/re2fa/Brzozowski.cpp
 delete mode 100644 aconversions/src/re2fa/Brzozowski.h
 delete mode 100644 aconversions/src/re2fa/Thompson.cpp
 delete mode 100644 aconversions/src/re2fa/Thompson.h
 create mode 100644 alib2algo/src/conversions/re2fa/Brzozowski.cpp
 create mode 100644 alib2algo/src/conversions/re2fa/Brzozowski.h
 create mode 100644 alib2algo/src/conversions/re2fa/Thompson.cpp
 create mode 100644 alib2algo/src/conversions/re2fa/Thompson.h

diff --git a/aconversions/src/re2fa/Brzozowski.cpp b/aconversions/src/re2fa/Brzozowski.cpp
deleted file mode 100644
index 8009aec9a1..0000000000
--- a/aconversions/src/re2fa/Brzozowski.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Brzozowski.cpp
- *
- *  Created on: 11. 1. 2014
- *      Author: tomas
- */
-
-#include "Brzozowski.h"
-
-using namespace std;
-using namespace alib;
-using namespace automaton;
-using namespace regexp;
-
-namespace conversions
-{
-
-Brzozowski::Brzozowski( const RegExp & re ) : m_re( re )
-{
-
-}
-
-Brzozowski::~Brzozowski( void )
-{
-
-}
-
-FSM Brzozowski::convert( void )
-{
-    RegExpOptimize opt;
-
-    // 1.
-    RegExp V = opt.optimize( m_re );
-    set<alphabet::Symbol> alphabet = m_re.getAlphabet( );
-
-    set<RegExp> Q = { V };
-    deque<set<RegExp>> Qi;
-
-    Qi.push_back( set<RegExp>( ) );
-    Qi.at( 0 ).insert( V );
-
-    int i = 1;
-
-    // 2.
-    while( ! Qi.at( i - 1 ).empty( ) )
-    {
-        Qi.push_back( set<RegExp>( ) ); // initialize set Q_i
-
-        for( const auto & regexp : Qi.at( i - 1 ) )
-        {
-            RegExpDerivation deriv( regexp );
-
-            for( const auto & a : alphabet )
-            {
-                RegExp derived = deriv.derivation( a );
-                derived = opt.optimize( derived );
-
-                // this will also add \emptyset as a regexp (and as FA state)
-                if( ! isInSet( derived, Q ) ) // if this state has already been found, do not add
-                    Qi.at( i ).insert( derived );
-
-            }
-        }
-
-        Q.insert( Qi.at( i ).begin( ), Qi.at( i ).end( ) );
-
-        i += 1;
-    }
-
-    // ------------------------------------------------------------------------
-    // 3.
-
-    FSM automaton;
-    int stateId = 0;
-    map<RegExp, State> stateMap;
-
-    for( const auto & r : Q )
-    {
-        State q( toBase26( stateId ++ ) );
-        stateMap.insert( std::pair<RegExp,State>( r, q ) );
-        automaton.addState( q );
-    }
-
-    for( const auto & a : alphabet )
-        automaton.addInputSymbol( a.getSymbol( ) );
-
-    for( const auto & r : Q )
-    {
-        RegExpDerivation deriv( r );
-
-        for( const auto & a: automaton.getInputAlphabet( ) )
-        {
-            RegExp derived = deriv.derivation( a );
-            derived = opt.optimize( derived );
-
-            TransitionFSM t( stateMap.find( r )->second, a, stateMap.find( derived )->second );
-            if( ! isInSet( t, automaton.getTransitions( ) ) )
-                automaton.addTransition( t );
-        }
-    }
-
-    automaton.addInitialState( stateMap.find( V )->second );
-
-    for( const auto & U : Q )
-        if( U.containsEmptyString( ) )
-            automaton.addFinalState( stateMap.find( U )->second );
-
-    return automaton;
-}
-
-} /* namespace conversions */
diff --git a/aconversions/src/re2fa/Brzozowski.h b/aconversions/src/re2fa/Brzozowski.h
deleted file mode 100644
index 732c6b3851..0000000000
--- a/aconversions/src/re2fa/Brzozowski.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Brzozowski.h
- *
- *  Created on: 11. 1. 2014
- *      Author: tomas
- */
-
-#ifndef BRZOZOWSKI_H_
-#define BRZOZOWSKI_H_
-
-#include <map>
-#include <set>
-#include <string>
-#include <deque>
-
-#include <automaton/State.h>
-#include <AlibException.h>
-
-#include "../interface/IConversionFSM.h"
-#include "../shared/Hexavigesimal.h"
-#include "../include/macros.h"
-
-#include "RegExpDerivation.h"
-#include "RegExpOptimize.h"
-
-namespace conversions
-{
-
-/**
- * Converts regular expression to finite automaton using Brzozowski algorithm (derivations of regular expressions).
- * Source: Melichar 2.110
- */
-class Brzozowski : public IConversionFSM
-{
-public:
-    /**
-     * @param re Source regular expression.
-     */
-    Brzozowski( const regexp::RegExp & re );
-
-    ~Brzozowski( void );
-
-    /**
-     * Performs conversion.
-     * @return FSM equivalent to original regular expression.
-     */
-    automaton::FSM convert( void );
-
-private:
-    /**
-     * input regexp
-     */
-    const regexp::RegExp & m_re;
-};
-
-} /* namespace conversions */
-
-#endif /* BRZOZOWSKI_H_ */
diff --git a/aconversions/src/re2fa/Thompson.cpp b/aconversions/src/re2fa/Thompson.cpp
deleted file mode 100644
index b23f79c9e4..0000000000
--- a/aconversions/src/re2fa/Thompson.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Thompson.cpp
- *
- *  Created on: 11. 1. 2014
- *      Author: tomas
- */
-#include "Thompson.h"
-
-using namespace alib;
-using namespace automaton;
-using namespace regexp;
-
-namespace conversions
-{
-
-Thompson::Thompson( const RegExp & re ) : m_re( re )
-{
-
-}
-
-Thompson::~Thompson( void )
-{
-
-}
-
-
-FSM Thompson::convert( void )
-{
-    m_fsm = FSM( );
-    m_stateId = 0;
-
-    for( const auto & symbol : m_re.getAlphabet( ) )
-        m_fsm.addInputSymbol( symbol.getSymbol( ) );
-
-    SubexpressionTails st = processRegExpNode( m_re.getRegExp( ) );
-
-    m_fsm.addInitialState( st.m_head );
-    m_fsm.addFinalState( st.m_tail );
-
-    return m_fsm;
-}
-
-Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpElement * node )
-{
-    const Alternation* alternation = dynamic_cast<const Alternation*>( node );
-    const Concatenation* concatenation = dynamic_cast<const Concatenation*>( node );
-    const Iteration* iteration = dynamic_cast<const Iteration*>( node );
-    const RegExpSymbol* symbol = dynamic_cast<const RegExpSymbol*>( node );
-    const RegExpEmpty* empty = dynamic_cast<const RegExpEmpty*>( node );
-    const RegExpEpsilon* eps = dynamic_cast<const RegExpEpsilon*>( node );
-
-    if( alternation )
-        return processRegExpNode( alternation );
-    else if( concatenation )
-        return processRegExpNode( concatenation );
-    else if( iteration )
-        return processRegExpNode( iteration );
-    else if( symbol )
-        return processRegExpNode( symbol );
-    else if( eps )
-        return processRegExpNode( eps );
-    else if( empty )
-        return processRegExpNode( empty );
-
-     throw AlibException( "Thompson::process - invalid RegExpElement node." );
-}
-
-Thompson::SubexpressionTails Thompson::processRegExpNode( const Iteration * node )
-{
-    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
-    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
-
-    SubexpressionTails st = processRegExpNode( node->getElement( ) );
-
-    m_fsm.addTransition( head, Symbol( "" ), st.m_head );
-    m_fsm.addTransition( head, Symbol( "" ), tail );
-    m_fsm.addTransition( st.m_tail, Symbol( "" ), tail );
-    m_fsm.addTransition( st.m_tail, Symbol( "" ), st.m_head );
-
-    return SubexpressionTails( head, tail );
-}
-
-Thompson::SubexpressionTails Thompson::processRegExpNode( const Alternation * node )
-{
-    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
-    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
-
-    for( const auto & element : node->getElements( ) )
-    {
-        SubexpressionTails st = processRegExpNode( element );
-
-        m_fsm.addTransition( head, Symbol( "" ), st.m_head );
-        m_fsm.addTransition( st.m_tail, Symbol( "" ), tail );
-    }
-
-    return SubexpressionTails( head, tail );
-}
-
-Thompson::SubexpressionTails Thompson::processRegExpNode( const Concatenation * node )
-{
-    vector<SubexpressionTails> st;
-    for( const auto & element : node->getElements( ) )
-        st.push_back( processRegExpNode( element ) );
-
-    for( size_t i = 1; i < st.size( ); i ++ )
-        m_fsm.addTransition( st[ i - 1 ].m_tail, Symbol( "" ), st[ i ].m_head );
-
-    return SubexpressionTails( st[ 0 ].m_head, st[ st.size( ) - 1 ].m_tail );
-}
-
-Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpSymbol * node )
-{
-    Symbol symb( node->getSymbol( ) );
-    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
-    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
-
-    m_fsm.addTransition( head, symb, tail );
-
-    return SubexpressionTails( head, tail );
-}
-
-Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpEpsilon * node )
-{
-    Symbol symb( "" );
-    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
-    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
-
-    m_fsm.addTransition( head, symb, tail );
-
-    return SubexpressionTails( head, tail );
-}
-
-Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpEmpty * node )
-{
-    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
-    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
-
-    return SubexpressionTails( head, tail );
-}
-
-// ----------------------------------------------------------------------------
-
-Thompson::SubexpressionTails::SubexpressionTails( const State & head, const State & tail ) : m_head( head ), m_tail ( tail )
-{
-
-}
-
-} /* namespace conversions */
diff --git a/aconversions/src/re2fa/Thompson.h b/aconversions/src/re2fa/Thompson.h
deleted file mode 100644
index ae2f5452bb..0000000000
--- a/aconversions/src/re2fa/Thompson.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Thompson.h
- *
- *  Created on: 11. 1. 2014
- *      Author: tomas
- */
-
-#ifndef THOMPSON_H_
-#define THOMPSON_H_
-
-#include <set>
-#include <vector>
-
-#include <AlibException.h>
-#include <automaton/FSM/FSM.h>
-#include <regexp/RegExp.h>
-#include <regexp/RegExpElements.h>
-
-#include "../interface/IConversionFSM.h"
-#include "../include/macros.h"
-#include "../shared/Hexavigesimal.h"
-
-
-namespace conversions
-{
-
-/**
- * Converts regular expression to finite automaton using Thompson's Construction Algorithm (TCA).
- * Sources:
- *  Hopcroft, section 3.2.3
- *  http://www.eecis.udel.edu/~cavazos/cisc672/lectures/Lecture-04.pdf
- *  http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.21.7450&rep=rep1&type=ps
- *  Melichar 2.112
- */
-class Thompson : public IConversionFSM
-{
-public:
-    /**
-     * @param re Source regular expression.
-     */
-    Thompson( const regexp::RegExp & re );
-    ~Thompson( void );
-
-    /**
-     * Performs conversion.
-     * @return FSM equivalent to original regular expression.
-     */
-    automaton::FSM convert( void );
-
-private:
-    /**
-     * input regexp
-     */
-    const regexp::RegExp & m_re;
-
-    /**
-     * output FSM ($\varepsilon$--NFA)
-     */
-    automaton::FSM m_fsm;
-    int m_stateId;
-
-    /**
-     * Stores head and tail state of "subautomaton" created in regexp subtree.
-     */
-    struct SubexpressionTails
-    {
-        SubexpressionTails( const automaton::State & head, const automaton::State & tail );
-        automaton::State m_head, m_tail;
-    };
-
-    SubexpressionTails processRegExpNode( const regexp::RegExpElement * node );
-    SubexpressionTails processRegExpNode( const regexp::Alternation * node );
-    SubexpressionTails processRegExpNode( const regexp::Concatenation * node );
-    SubexpressionTails processRegExpNode( const regexp::Iteration * node );
-    SubexpressionTails processRegExpNode( const regexp::RegExpSymbol * node );
-    SubexpressionTails processRegExpNode( const regexp::RegExpEmpty * node );
-    SubexpressionTails processRegExpNode( const regexp::RegExpEpsilon * node );
-};
-
-} /* namespace conversions */
-
-#endif /* THOMPSON_H_ */
diff --git a/aconversions2/src/ConversionHandler.cpp b/aconversions2/src/ConversionHandler.cpp
index 814185c436..2ad49e893a 100644
--- a/aconversions2/src/ConversionHandler.cpp
+++ b/aconversions2/src/ConversionHandler.cpp
@@ -11,8 +11,8 @@
 #include "conversions/fa2re/BrzozowskiAlgebraic.h"
 
 #include "conversions/re2fa/Glushkov.h"
-//#include "conversions/re2fa/Thompson.h"
-//#include "conversions/re2fa/Brzozowski.h"
+#include "conversions/re2fa/Thompson.h"
+#include "conversions/re2fa/Brzozowski.h"
 
 #include "conversions/fa2rg/fa2lrg/FAtoLRGConverter.h"
 #include "conversions/fa2rg/fa2rrg/FAtoRRGConverter.h"
@@ -173,27 +173,24 @@ void ConversionHandler::convertFSMtoLRG( void )
 
 void ConversionHandler::convertREtoFSM( void )
 {
-	const regexp::UnboundedRegExp regexp = alib::DataFactory::fromTokens<regexp::UnboundedRegExp>( m_tokens );
+    const regexp::RegExp regexp = alib::DataFactory::fromTokens<regexp::RegExp>(m_tokens);
 
 	switch( m_algorithm )
 	{
 	case BRZOZOWSKI_DERIVATION: {
-/*			re2fa::Brzozowski conv( regexp );
-			automaton::DFA dfa = conv.convert();
-			alib::DataFactory::toStdout(dfa);*/
+			re2fa::Brzozowski conv;
+			alib::DataFactory::toStdout(conv.convert(regexp));
 			break;
 		}
 	case THOMPSON_NFA: {
-/*			re2fa::Thompson conv( regexp );
-			autoamton::EpsilonNFA enfa = conv.convert();
-			alib::DataFactory::toStdout(enfa);*/
+			re2fa::Thompson conv;
+			alib::DataFactory::toStdout(conv.convert(regexp));
 			break;
 		}
 	case GLUSHKOV_NFA:
 	default: {
-			re2fa::Glushkov conv( regexp );
-			automaton::NFA nfa = conv.convert();
-			alib::DataFactory::toStdout(nfa);
+			//re2fa::Glushkov conv;
+			//alib::DataFactory::toStdout(conv.convert(regexp));
 			break;
 		}
 	}
diff --git a/alib2algo/src/conversions/re2fa/Brzozowski.cpp b/alib2algo/src/conversions/re2fa/Brzozowski.cpp
new file mode 100644
index 0000000000..d420f67ab6
--- /dev/null
+++ b/alib2algo/src/conversions/re2fa/Brzozowski.cpp
@@ -0,0 +1,128 @@
+/*
+ * Brzozowski.cpp
+ *
+ *  Created on: 11. 1. 2014
+ *      Author: tomas
+ */
+
+#include "Brzozowski.h"
+
+#include <set>
+#include <deque>
+#include <queue>
+#include <vector>
+
+#include <string/LinearString.h>
+#include <std/hexavigesimal.h>
+#include <label/StringLabel.h>
+
+#include "../../regexp/RegExpDerivation.h"
+//#include "regexp/RegExpOptimize.h"
+
+namespace re2fa
+{
+
+Brzozowski::Brzozowski(void){}
+Brzozowski::~Brzozowski(void){}
+
+
+void Brzozowski::Visit(void* userData, const regexp::FormalRegExp& regexp)
+{
+    std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData;
+    out.first = regexp.getAlphabet();
+    out.second = regexp.containsEmptyString();
+}
+void Brzozowski::Visit(void* userData, const regexp::UnboundedRegExp& regexp)
+{
+    std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData;
+    out.first = regexp.getAlphabet();
+    out.second = regexp.containsEmptyString();
+}
+
+automaton::NFA Brzozowski::convert(const regexp::RegExp& regexp)
+{
+    // 1.
+    // regexp::RegExpOptimize opt;
+    // regexp::RegExp V = opt.optimize(regexp);
+    regexp::RegExp V = regexp;
+
+    std::pair<std::set<alphabet::Symbol>, bool> out({}, false);
+    regexp.getData().Accept((void*) &out, *this);
+    const std::set<alphabet::Symbol>& alphabet = out.first;
+
+    std::set<regexp::RegExp> Q = { V };
+    std::deque<std::set<regexp::RegExp>> Qi;
+
+    Qi.push_back(std::set<regexp::RegExp>());
+    Qi.at(0).insert(V);
+
+    int i = 1;
+
+    // 2.
+    while(! Qi.at(i - 1).empty())
+    {
+        Qi.push_back(std::set<regexp::RegExp>()); // initialize set Q_i
+
+        for(const auto& dregexp : Qi.at(i - 1))
+        {
+            regexp::RegExpDerivation deriv;
+
+            for(const auto& a : alphabet)
+            {
+                string::LinearString string(std::vector<alphabet::Symbol>{a});
+                regexp::RegExp derived = deriv.derivation(dregexp, string);
+                // derived = opt.optimize(derived);
+
+                // this will also add \emptyset as a regexp (and as FA state)
+                if(Q.count(derived) == 0) // if this state has already been found, do not add
+                    Qi.at(i).insert(derived);
+            }
+        }
+
+        Q.insert(Qi.at(i).begin(), Qi.at(i).end());
+        i += 1;
+    }
+
+    // ------------------------------------------------------------------------
+    // 3.
+
+    automaton::NFA automaton;
+    int stateId = 0;
+    std::map<regexp::RegExp, automaton::State> stateMap;
+
+    for(const auto& r : Q)
+    {
+        automaton::State q(label::Label(label::StringLabel(std::toBase26(stateId++))));
+        stateMap.insert(std::make_pair(r, q));
+        automaton.addState(q);
+    }
+
+    automaton.setInputSymbols(alphabet);
+
+    for(const auto& r : Q)
+    {
+        regexp::RegExpDerivation deriv;
+
+        for(const auto& a: alphabet)
+        {
+            string::LinearString string(std::vector<alphabet::Symbol>{a});
+            regexp::RegExp derived = deriv.derivation(r, string);
+            // derived = opt.optimize(derived);
+
+            automaton.addTransition(stateMap.find(r)->second, a, stateMap.find(derived)->second);
+        }
+    }
+
+    automaton.addInitialState(stateMap.find(V)->second);
+
+    for(const auto& r : Q)
+    {
+        regexp.getData().Accept((void*) &out, *this);
+        if(out.second) // if(r.containsEmptyString())
+            automaton.addFinalState(stateMap.find(r)->second);
+    }
+
+    return automaton;
+}
+
+} /* namespace re2fa */
diff --git a/alib2algo/src/conversions/re2fa/Brzozowski.h b/alib2algo/src/conversions/re2fa/Brzozowski.h
new file mode 100644
index 0000000000..58d4d0aaa5
--- /dev/null
+++ b/alib2algo/src/conversions/re2fa/Brzozowski.h
@@ -0,0 +1,42 @@
+/*
+ * Brzozowski.h
+ *
+ *  Created on: 11. 1. 2014
+ *      Author: tomas
+ */
+
+#ifndef BRZOZOWSKI_H_
+#define BRZOZOWSKI_H_
+
+#include <regexp/RegExp.h>
+#include <regexp/formal/FormalRegExp.h>
+#include <regexp/unbounded/UnboundedRegExp.h>
+#include <automaton/FSM/NFA.h>
+
+namespace re2fa
+{
+
+/**
+ * Converts regular expression to finite automaton using Brzozowski algorithm (derivations of regular expressions).
+ * Source: Melichar 2.110
+ */
+class Brzozowski : public regexp::VisitableRegExpBase::visitor_type
+{
+public:
+    Brzozowski(void);
+    ~Brzozowski(void);
+
+    /**
+     * Performs conversion.
+     * @return FSM equivalent to original regular expression.
+     */
+    automaton::NFA convert(const regexp::RegExp& regexp);
+
+private:
+    void Visit(void* , const regexp::FormalRegExp& regexp);
+    void Visit(void* , const regexp::UnboundedRegExp& regexp);
+};
+
+} /* namespace re2fa */
+
+#endif /* BRZOZOWSKI_H_ */
diff --git a/alib2algo/src/conversions/re2fa/Thompson.cpp b/alib2algo/src/conversions/re2fa/Thompson.cpp
new file mode 100644
index 0000000000..1b3520fe04
--- /dev/null
+++ b/alib2algo/src/conversions/re2fa/Thompson.cpp
@@ -0,0 +1,261 @@
+/*
+ * Thompson.cpp
+ *
+ *  Created on: 11. 1. 2014
+ *      Author: tomas
+ */
+#include "Thompson.h"
+#include <tuple>
+#include <label/Label.h>
+#include <label/IntegerLabel.h>
+
+namespace re2fa
+{
+
+Thompson::Thompson(void){}
+Thompson::~Thompson(void){}
+
+automaton::EpsilonNFA Thompson::convert(const regexp::RegExp& regexp)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> out(automaton::EpsilonNFA(), 0, nullptr, nullptr);
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    regexp.getData().Accept((void*) &out, *this);
+
+    automaton.setInitialStates({*std::get<2>(out)});
+    automaton.setFinalStates(std::set<automaton::State>{*std::get<3>(out)});
+
+    return std::get<0>(out);
+}
+
+void Thompson::Visit(void* userData, const regexp::FormalRegExp& regexp)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton.setInputSymbols(regexp.getAlphabet());
+    regexp.getRegExp().Accept((void*) &out, *this);
+}
+
+void Thompson::Visit(void* userData, const regexp::FormalRegExpAlternation& alternation)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    static_cast<const regexp::FormalRegExpElement&>(alternation.getLeftElement()).Accept(userData, *this);
+    automaton.addTransition(head, string::Epsilon::EPSILON, *std::get<2>(out));
+    automaton.addTransition(*std::get<3>(out), string::Epsilon::EPSILON, tail);
+
+    static_cast<const regexp::FormalRegExpElement&>(alternation.getRightElement()).Accept(userData, *this);
+    automaton.addTransition(head, string::Epsilon::EPSILON, *std::get<2>(out));
+    automaton.addTransition(*std::get<3>(out), string::Epsilon::EPSILON, tail);
+
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::FormalRegExpConcatenation& concatenation)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    static_cast<const regexp::FormalRegExpElement&>(concatenation.getLeftElement()).Accept(userData, *this);
+    const automaton::State* leftHead = std::get<2>(out);
+    const automaton::State* leftTail = std::get<3>(out);
+
+    static_cast<const regexp::FormalRegExpElement&>(concatenation.getRightElement()).Accept(userData, *this);
+    automaton.addTransition(*leftTail, string::Epsilon::EPSILON, *std::get<2>(out));
+
+    std::get<2>(out) = &(*automaton.getStates().find(*leftHead));
+    // std::get<3>(out) = std::get<3>(out);
+}
+
+void Thompson::Visit(void* userData, const regexp::FormalRegExpIteration& iteration)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    static_cast<const regexp::FormalRegExpElement&>(iteration.getElement()).Accept(userData, *this);
+    automaton.addTransition(head, string::Epsilon::EPSILON, *std::get<2>(out));
+    automaton.addTransition(head, string::Epsilon::EPSILON, tail);
+    automaton.addTransition(*std::get<3>(out), string::Epsilon::EPSILON, tail);
+    automaton.addTransition(*std::get<3>(out), string::Epsilon::EPSILON, *std::get<2>(out));
+
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::FormalRegExpSymbol& symbol)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    automaton.addTransition(head, symbol.getSymbol(), tail);
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::FormalRegExpEpsilon& epsilon)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    automaton.addTransition(head, string::Epsilon::EPSILON, tail);
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::FormalRegExpEmpty& empty)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::UnboundedRegExp& regexp)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton.setInputSymbols(regexp.getAlphabet());
+    regexp.getRegExp().Accept((void*) &out, *this);
+}
+
+void Thompson::Visit(void* userData, const regexp::UnboundedRegExpAlternation& alternation)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    for(const auto& element : alternation.getElements())
+    {
+        static_cast<const regexp::UnboundedRegExpElement&>(*element).Accept(userData, *this);
+        automaton.addTransition(head, string::Epsilon::EPSILON, *std::get<2>(out));
+        automaton.addTransition(*std::get<3>(out), string::Epsilon::EPSILON, tail);
+    }
+
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::UnboundedRegExpConcatenation& concatenation)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    std::vector<std::pair<const automaton::State*, const automaton::State*>> tails;
+    for(const auto& element : concatenation.getElements())
+    {
+        static_cast<const regexp::UnboundedRegExpElement&>(*element).Accept(userData, *this);
+        tails.push_back(std::make_pair(std::get<2>(out), std::get<3>(out)));
+    }
+
+    for(size_t i = 1; i < tails.size(); i++)
+        automaton.addTransition(*tails[i-1].second, string::Epsilon::EPSILON, *tails[i].first);
+
+    std::get<2>(out) = tails[0].first;
+    std::get<3>(out) = tails[tails.size()-1].second;
+}
+
+void Thompson::Visit(void* userData, const regexp::UnboundedRegExpIteration& iteration)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    static_cast<const regexp::UnboundedRegExpElement&>(iteration.getElement()).Accept(userData, *this);
+    automaton.addTransition(head, string::Epsilon::EPSILON, *std::get<2>(out));
+    automaton.addTransition(head, string::Epsilon::EPSILON, tail);
+    automaton.addTransition(*std::get<3>(out), string::Epsilon::EPSILON, tail);
+    automaton.addTransition(*std::get<3>(out), string::Epsilon::EPSILON, *std::get<2>(out));
+
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::UnboundedRegExpSymbol& symbol)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    automaton.addTransition(head, symbol.getSymbol(), tail);
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::UnboundedRegExpEpsilon& epsilon)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    automaton.addTransition(head, string::Epsilon::EPSILON, tail);
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+void Thompson::Visit(void* userData, const regexp::UnboundedRegExpEmpty& empty)
+{
+    std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*> &out = *(std::tuple<automaton::EpsilonNFA, int, const automaton::State*, const automaton::State*>*) userData;
+    automaton::EpsilonNFA& automaton = std::get<0>(out);
+
+    automaton::State head = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton::State tail = automaton::State(label::Label(label::IntegerLabel(std::get<1>(out)++)));
+    automaton.addState(head);
+    automaton.addState(tail);
+
+    std::get<2>(out) = &(*automaton.getStates().find(head));
+    std::get<3>(out) = &(*automaton.getStates().find(tail));
+}
+
+} /* namespace re2fa */
diff --git a/alib2algo/src/conversions/re2fa/Thompson.h b/alib2algo/src/conversions/re2fa/Thompson.h
new file mode 100644
index 0000000000..ace518cd99
--- /dev/null
+++ b/alib2algo/src/conversions/re2fa/Thompson.h
@@ -0,0 +1,59 @@
+/*
+ * Thompson.h
+ *
+ *  Created on: 11. 1. 2014
+ *      Author: tomas
+ */
+
+#ifndef THOMPSON_H_
+#define THOMPSON_H_
+
+#include <regexp/RegExp.h>
+#include <regexp/formal/FormalRegExpElements.h>
+#include <regexp/unbounded/UnboundedRegExpElements.h>
+#include <automaton/FSM/EpsilonNFA.h>
+
+namespace re2fa
+{
+
+/**
+ * Converts regular expression to finite automaton using Thompson's Construction Algorithm (TCA).
+ * Sources:
+ *  Hopcroft, section 3.2.3
+ *  http://www.eecis.udel.edu/~cavazos/cisc672/lectures/Lecture-04.pdf
+ *  http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.21.7450&rep=rep1&type=ps
+ *  Melichar 2.112
+ */
+class Thompson : public regexp::VisitableRegExpBase::visitor_type, regexp::FormalRegExpElement::visitor_type, regexp::UnboundedRegExpElement::visitor_type
+{
+public:
+    /**
+     * Performs conversion.
+     * @return nondeterministic finite automaton with epsilon transitions accepting language described by the regexp
+     */
+    Thompson(void);
+    ~Thompson(void);
+    automaton::EpsilonNFA convert(const regexp::RegExp& regexp);
+
+private:
+     void Visit(void*, const regexp::UnboundedRegExp& regexp);
+     void Visit(void*, const regexp::FormalRegExp& regexp);
+
+     void Visit(void*, const regexp::UnboundedRegExpAlternation& alternation);
+     void Visit(void*, const regexp::UnboundedRegExpConcatenation& concatenation);
+     void Visit(void*, const regexp::UnboundedRegExpIteration& iteration);
+     void Visit(void*, const regexp::UnboundedRegExpSymbol& symbol);
+     void Visit(void*, const regexp::UnboundedRegExpEpsilon& epsilon);
+     void Visit(void*, const regexp::UnboundedRegExpEmpty& empty);
+
+     void Visit(void*, const regexp::FormalRegExpAlternation& alternation);
+     void Visit(void*, const regexp::FormalRegExpConcatenation& concatenation);
+     void Visit(void*, const regexp::FormalRegExpIteration& iteration);
+     void Visit(void*, const regexp::FormalRegExpSymbol& symbol);
+     void Visit(void*, const regexp::FormalRegExpEpsilon& epsilon);
+     void Visit(void*, const regexp::FormalRegExpEmpty& empty);
+};
+
+} /* namespace re2fa */
+
+#endif /* THOMPSON_H_ */
-- 
GitLab