From b9be7075fd8b901edbb39e5db85db07decb5f556 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz>
Date: Thu, 1 May 2014 22:47:11 +0200
Subject: [PATCH] aconversion: changes (see details)

 - FA -> LRG: fix double addition of rules
 - Glushkov: use different state/nonterminal names. Names are generated via hexavigesimal with symbol id appended
 - Thompson: use different state names
 - Brzozowski: fix numbering - start at 0 so hexavigesimal starts properly at A
 - RRG -> FA: if createUnique is used, then use A' instead of A0

 - tests: correct exit values to determine if process reached timeout or segfault
---
 .../src/aconversion/ConversionHandler.cpp     |  9 +++-----
 .../src/fa2rg/fa2lrg/FAtoLRGConverter.cpp     | 10 +++++++--
 aconversions/src/re2fa/Brzozowski.cpp         |  2 +-
 aconversions/src/re2fa/Glushkov.cpp           |  3 ++-
 aconversions/src/re2fa/Glushkov.h             |  1 +
 aconversions/src/re2fa/Thompson.cpp           | 21 ++++++++++---------
 aconversions/src/re2fa/Thompson.h             |  2 ++
 .../re2rg/re2rrg/BrzozowskiDerivationRRG.cpp  |  2 +-
 aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp |  4 +++-
 aconversions/src/re2rg/re2rrg/GlushkovRRG.h   |  2 ++
 .../src/rg2fa/rrg2fa/RRGtoFAConverter.cpp     |  2 +-
 tests.aconversion.sh                          |  4 ++--
 12 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/aconversions/src/aconversion/ConversionHandler.cpp b/aconversions/src/aconversion/ConversionHandler.cpp
index 85b86fa486..983e8944ba 100644
--- a/aconversions/src/aconversion/ConversionHandler.cpp
+++ b/aconversions/src/aconversion/ConversionHandler.cpp
@@ -103,7 +103,7 @@ void ConversionHandler::convertFSMtoRE( void )
 
 void ConversionHandler::convertFSMtoRG( void )
 {
-    if( m_target == RIGHT_REGULAR_GRAMMAR || REGULAR_GRAMMAR )
+    if( m_target == RIGHT_REGULAR_GRAMMAR )
         convertFSMtoRRG( );
     else if( m_target == LEFT_REGULAR_GRAMMAR )
         convertFSMtoLRG( );
@@ -172,7 +172,7 @@ void ConversionHandler::convertREtoFSM( void )
 
 void ConversionHandler::convertREtoRG( void )
 {
-    if( m_target == RIGHT_REGULAR_GRAMMAR || REGULAR_GRAMMAR )
+    if( m_target == RIGHT_REGULAR_GRAMMAR )
         convertREtoRRG( );
     else if( m_target == LEFT_REGULAR_GRAMMAR )
         throw AlibException( "ConversionHandler:: RE to LRG is not implemented. Please convert to RRG and then to LRG." );
@@ -361,10 +361,7 @@ ConversionHandler::TFormalism ConversionHandler::parseFormalismFromString( const
     if( target == "re" || target == "regexp" || target == "regex" )
         return REGULAR_EXPRESSION;
 
-    if( target == "rg" || target == "grammar" )
-        return REGULAR_GRAMMAR;
-
-    if( target == "rrg" )
+    if( target == "rrg" || target == "rg" || target == "grammar" )
         return RIGHT_REGULAR_GRAMMAR;
 
     if( target == "lrg" )
diff --git a/aconversions/src/fa2rg/fa2lrg/FAtoLRGConverter.cpp b/aconversions/src/fa2rg/fa2lrg/FAtoLRGConverter.cpp
index 7cc7fe416a..253fcf940a 100644
--- a/aconversions/src/fa2rg/fa2lrg/FAtoLRGConverter.cpp
+++ b/aconversions/src/fa2rg/fa2lrg/FAtoLRGConverter.cpp
@@ -51,9 +51,12 @@ LeftRegularGrammar FAtoLRGConverter::convert( void )
         {
             list<Symbol> leftSide, rightSide;
             leftSide.push_back( grammar.getStartSymbol( ) );
-            grammar.addRule( Rule( leftSide, rightSide ) );
             rightSide.push_back( nonterminalMap.find( transition.getFrom( ) )->second );
             rightSide.push_back( transition.getInput( ) );
+
+            Rule r( leftSide, rightSide );
+            if( ! isInSet( r, grammar.getRules( ) ) )
+                grammar.addRule( Rule( leftSide, rightSide ) );
         }
 
 
@@ -69,7 +72,10 @@ LeftRegularGrammar FAtoLRGConverter::convert( void )
                 list<Symbol> leftSide, rightSide;
                 leftSide.push_back( grammar.getStartSymbol( ) );
                 rightSide.push_back( transition.getInput( ) );
-                grammar.addRule( Rule( leftSide, rightSide ) );
+
+                Rule r( leftSide, rightSide );
+                if( ! isInSet( r, grammar.getRules( ) ) )
+                    grammar.addRule( Rule( leftSide, rightSide ) );
             }
         }
     }
diff --git a/aconversions/src/re2fa/Brzozowski.cpp b/aconversions/src/re2fa/Brzozowski.cpp
index e6779762db..8009aec9a1 100644
--- a/aconversions/src/re2fa/Brzozowski.cpp
+++ b/aconversions/src/re2fa/Brzozowski.cpp
@@ -76,7 +76,7 @@ FSM Brzozowski::convert( void )
 
     for( const auto & r : Q )
     {
-        State q( toBase26( ++ stateId ) );
+        State q( toBase26( stateId ++ ) );
         stateMap.insert( std::pair<RegExp,State>( r, q ) );
         automaton.addState( q );
     }
diff --git a/aconversions/src/re2fa/Glushkov.cpp b/aconversions/src/re2fa/Glushkov.cpp
index 9c86441c79..b2a847223b 100644
--- a/aconversions/src/re2fa/Glushkov.cpp
+++ b/aconversions/src/re2fa/Glushkov.cpp
@@ -50,9 +50,10 @@ FSM Glushkov::convert( void )
     State q0( "q0" );
     automaton.addState( q0 );
     automaton.addInitialState( q0 );
+    int stateId = 0;
     for( auto const& symbol : GlushkovTraversal::getSymbols( m_re ) )
     {
-        State q( to_string( symbol.getId( ) ) );
+        State q( toBase26( stateId ++ ) + to_string( symbol.getId( ) ) );
 
         m_stateMap.insert( std::pair<GlushkovSymbol, State>( symbol, q ) );
         automaton.addState( q );
diff --git a/aconversions/src/re2fa/Glushkov.h b/aconversions/src/re2fa/Glushkov.h
index b6ec1109e8..a1ba5e329a 100644
--- a/aconversions/src/re2fa/Glushkov.h
+++ b/aconversions/src/re2fa/Glushkov.h
@@ -17,6 +17,7 @@
 
 #include "../interface/IConversionFSM.h"
 #include "../shared/glushkov/GlushkovTraversal.h"
+#include "../shared/Hexavigesimal.h"
 
 namespace conversions
 {
diff --git a/aconversions/src/re2fa/Thompson.cpp b/aconversions/src/re2fa/Thompson.cpp
index c92832ec6e..b23f79c9e4 100644
--- a/aconversions/src/re2fa/Thompson.cpp
+++ b/aconversions/src/re2fa/Thompson.cpp
@@ -27,6 +27,7 @@ Thompson::~Thompson( void )
 FSM Thompson::convert( void )
 {
     m_fsm = FSM( );
+    m_stateId = 0;
 
     for( const auto & symbol : m_re.getAlphabet( ) )
         m_fsm.addInputSymbol( symbol.getSymbol( ) );
@@ -66,8 +67,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpElement *
 
 Thompson::SubexpressionTails Thompson::processRegExpNode( const Iteration * node )
 {
-    State head = m_fsm.createUniqueState( "iter__head", true );
-    State tail = m_fsm.createUniqueState( "iter__tail", true );
+    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
+    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
 
     SubexpressionTails st = processRegExpNode( node->getElement( ) );
 
@@ -81,8 +82,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const Iteration * node
 
 Thompson::SubexpressionTails Thompson::processRegExpNode( const Alternation * node )
 {
-    State head = m_fsm.createUniqueState( "alt__head", true );
-    State tail = m_fsm.createUniqueState( "alt__tail", true );
+    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
+    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
 
     for( const auto & element : node->getElements( ) )
     {
@@ -110,8 +111,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const Concatenation *
 Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpSymbol * node )
 {
     Symbol symb( node->getSymbol( ) );
-    State head = m_fsm.createUniqueState( "sym__start", true );
-    State tail = m_fsm.createUniqueState( "sym__end", true );
+    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
+    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
 
     m_fsm.addTransition( head, symb, tail );
 
@@ -121,8 +122,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpSymbol * n
 Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpEpsilon * node )
 {
     Symbol symb( "" );
-    State head = m_fsm.createUniqueState( "epssym__start", true );
-    State tail = m_fsm.createUniqueState( "epssym__end", true );
+    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
+    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
 
     m_fsm.addTransition( head, symb, tail );
 
@@ -131,8 +132,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpEpsilon *
 
 Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpEmpty * node )
 {
-    State head = m_fsm.createUniqueState( "empty__start", true );
-    State tail = m_fsm.createUniqueState( "empty__end", true );
+    State head = m_fsm.createUniqueState( toBase26( m_stateId    ) + "0", true );
+    State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true );
 
     return SubexpressionTails( head, tail );
 }
diff --git a/aconversions/src/re2fa/Thompson.h b/aconversions/src/re2fa/Thompson.h
index 970c77d399..ae2f5452bb 100644
--- a/aconversions/src/re2fa/Thompson.h
+++ b/aconversions/src/re2fa/Thompson.h
@@ -18,6 +18,7 @@
 
 #include "../interface/IConversionFSM.h"
 #include "../include/macros.h"
+#include "../shared/Hexavigesimal.h"
 
 
 namespace conversions
@@ -56,6 +57,7 @@ private:
      * output FSM ($\varepsilon$--NFA)
      */
     automaton::FSM m_fsm;
+    int m_stateId;
 
     /**
      * Stores head and tail state of "subautomaton" created in regexp subtree.
diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
index 8ba4a892b5..246199f696 100644
--- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
+++ b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp
@@ -75,7 +75,7 @@ RightRegularGrammar BrzozowskiDerivationRRG::convert( void )
 
     for( const auto & r : N )
     {
-        Symbol nt = grammar.createUniqueNonTerminalSymbol( toBase26( ++ nonterminalId ) );
+        Symbol nt = grammar.createUniqueNonTerminalSymbol( toBase26( nonterminalId ++ ) );
         nonterminalMap.insert( pair<RegExp, Symbol>( r, nt ) );
     }
 
diff --git a/aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp b/aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp
index e7dc539e77..f28ec8c927 100644
--- a/aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp
+++ b/aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp
@@ -46,9 +46,11 @@ RightRegularGrammar GlushkovRRG::convert( void )
     Symbol S = grammar.createUniqueNonTerminalSymbol( "S" );
     grammar.setStartSymbol( S );
 
+    int nonterminalId = 0;
+
     for( auto const& symbol : GlushkovTraversal::getSymbols( m_re ) )
     {
-        Symbol a = grammar.createUniqueNonTerminalSymbol( symbol.getInputSymbol( ).getSymbol( ) + to_string( symbol.getId( ) ) );
+        Symbol a = grammar.createUniqueNonTerminalSymbol( toBase26( nonterminalId ++ ) + to_string( symbol.getId( ) ) );
 
         m_symbolMap.insert( std::pair<GlushkovSymbol, Symbol>( symbol, a ) );
     }
diff --git a/aconversions/src/re2rg/re2rrg/GlushkovRRG.h b/aconversions/src/re2rg/re2rrg/GlushkovRRG.h
index fa9b747470..f464aad0d7 100644
--- a/aconversions/src/re2rg/re2rrg/GlushkovRRG.h
+++ b/aconversions/src/re2rg/re2rrg/GlushkovRRG.h
@@ -16,6 +16,8 @@
 #include "../../interface/IConversionRRG.h"
 #include "../../shared/glushkov/GlushkovTraversal.h"
 
+#include "../../shared/Hexavigesimal.h"
+
 namespace conversions
 {
 
diff --git a/aconversions/src/rg2fa/rrg2fa/RRGtoFAConverter.cpp b/aconversions/src/rg2fa/rrg2fa/RRGtoFAConverter.cpp
index ea121d2fdb..8c81927b11 100644
--- a/aconversions/src/rg2fa/rrg2fa/RRGtoFAConverter.cpp
+++ b/aconversions/src/rg2fa/rrg2fa/RRGtoFAConverter.cpp
@@ -28,7 +28,7 @@ FSM RRGtoFAConverter::convert( void )
     for( const auto & symbol : m_grammar.getNonTerminalSymbols( ) )
         automaton.addState( State( symbol.getSymbol( ) ) );
 
-    const State & AState = automaton.createUniqueState( "A", true );
+    const State & AState = automaton.createUniqueState( "A", false );
 
     for( const auto & rule : m_grammar.getRules( ) )
     {
diff --git a/tests.aconversion.sh b/tests.aconversion.sh
index 67d0c0975d..60d3d62f4a 100755
--- a/tests.aconversion.sh
+++ b/tests.aconversion.sh
@@ -2,7 +2,7 @@
 
 set -o pipefail
 
-TESTCASE_ITERATIONS=200
+TESTCASE_ITERATIONS=100
 TESTCASE_TIMEOUT=5
 LOGFILE="log_tests.txt"
 
@@ -58,7 +58,7 @@ function runTest2 {
 	RETTMP=$?
 
 	# segfault
-	if [ $RETTMP -eq 139 ]; then
+	if [ $RETTMP -eq 134 ]; then
 		return 3
 	fi
 
-- 
GitLab