From b9be7075fd8b901edbb39e5db85db07decb5f556 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Thu, 1 May 2014 22:47:11 +0200 Subject: [PATCH] aconversion: changes (see details) - FA -> LRG: fix double addition of rules - Glushkov: use different state/nonterminal names. Names are generated via hexavigesimal with symbol id appended - Thompson: use different state names - Brzozowski: fix numbering - start at 0 so hexavigesimal starts properly at A - RRG -> FA: if createUnique is used, then use A' instead of A0 - tests: correct exit values to determine if process reached timeout or segfault --- .../src/aconversion/ConversionHandler.cpp | 9 +++----- .../src/fa2rg/fa2lrg/FAtoLRGConverter.cpp | 10 +++++++-- aconversions/src/re2fa/Brzozowski.cpp | 2 +- aconversions/src/re2fa/Glushkov.cpp | 3 ++- aconversions/src/re2fa/Glushkov.h | 1 + aconversions/src/re2fa/Thompson.cpp | 21 ++++++++++--------- aconversions/src/re2fa/Thompson.h | 2 ++ .../re2rg/re2rrg/BrzozowskiDerivationRRG.cpp | 2 +- aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp | 4 +++- aconversions/src/re2rg/re2rrg/GlushkovRRG.h | 2 ++ .../src/rg2fa/rrg2fa/RRGtoFAConverter.cpp | 2 +- tests.aconversion.sh | 4 ++-- 12 files changed, 37 insertions(+), 25 deletions(-) diff --git a/aconversions/src/aconversion/ConversionHandler.cpp b/aconversions/src/aconversion/ConversionHandler.cpp index 85b86fa486..983e8944ba 100644 --- a/aconversions/src/aconversion/ConversionHandler.cpp +++ b/aconversions/src/aconversion/ConversionHandler.cpp @@ -103,7 +103,7 @@ void ConversionHandler::convertFSMtoRE( void ) void ConversionHandler::convertFSMtoRG( void ) { - if( m_target == RIGHT_REGULAR_GRAMMAR || REGULAR_GRAMMAR ) + if( m_target == RIGHT_REGULAR_GRAMMAR ) convertFSMtoRRG( ); else if( m_target == LEFT_REGULAR_GRAMMAR ) convertFSMtoLRG( ); @@ -172,7 +172,7 @@ void ConversionHandler::convertREtoFSM( void ) void ConversionHandler::convertREtoRG( void ) { - if( m_target == RIGHT_REGULAR_GRAMMAR || REGULAR_GRAMMAR ) + if( m_target == RIGHT_REGULAR_GRAMMAR ) convertREtoRRG( ); else if( m_target == LEFT_REGULAR_GRAMMAR ) throw AlibException( "ConversionHandler:: RE to LRG is not implemented. Please convert to RRG and then to LRG." ); @@ -361,10 +361,7 @@ ConversionHandler::TFormalism ConversionHandler::parseFormalismFromString( const if( target == "re" || target == "regexp" || target == "regex" ) return REGULAR_EXPRESSION; - if( target == "rg" || target == "grammar" ) - return REGULAR_GRAMMAR; - - if( target == "rrg" ) + if( target == "rrg" || target == "rg" || target == "grammar" ) return RIGHT_REGULAR_GRAMMAR; if( target == "lrg" ) diff --git a/aconversions/src/fa2rg/fa2lrg/FAtoLRGConverter.cpp b/aconversions/src/fa2rg/fa2lrg/FAtoLRGConverter.cpp index 7cc7fe416a..253fcf940a 100644 --- a/aconversions/src/fa2rg/fa2lrg/FAtoLRGConverter.cpp +++ b/aconversions/src/fa2rg/fa2lrg/FAtoLRGConverter.cpp @@ -51,9 +51,12 @@ LeftRegularGrammar FAtoLRGConverter::convert( void ) { list<Symbol> leftSide, rightSide; leftSide.push_back( grammar.getStartSymbol( ) ); - grammar.addRule( Rule( leftSide, rightSide ) ); rightSide.push_back( nonterminalMap.find( transition.getFrom( ) )->second ); rightSide.push_back( transition.getInput( ) ); + + Rule r( leftSide, rightSide ); + if( ! isInSet( r, grammar.getRules( ) ) ) + grammar.addRule( Rule( leftSide, rightSide ) ); } @@ -69,7 +72,10 @@ LeftRegularGrammar FAtoLRGConverter::convert( void ) list<Symbol> leftSide, rightSide; leftSide.push_back( grammar.getStartSymbol( ) ); rightSide.push_back( transition.getInput( ) ); - grammar.addRule( Rule( leftSide, rightSide ) ); + + Rule r( leftSide, rightSide ); + if( ! isInSet( r, grammar.getRules( ) ) ) + grammar.addRule( Rule( leftSide, rightSide ) ); } } } diff --git a/aconversions/src/re2fa/Brzozowski.cpp b/aconversions/src/re2fa/Brzozowski.cpp index e6779762db..8009aec9a1 100644 --- a/aconversions/src/re2fa/Brzozowski.cpp +++ b/aconversions/src/re2fa/Brzozowski.cpp @@ -76,7 +76,7 @@ FSM Brzozowski::convert( void ) for( const auto & r : Q ) { - State q( toBase26( ++ stateId ) ); + State q( toBase26( stateId ++ ) ); stateMap.insert( std::pair<RegExp,State>( r, q ) ); automaton.addState( q ); } diff --git a/aconversions/src/re2fa/Glushkov.cpp b/aconversions/src/re2fa/Glushkov.cpp index 9c86441c79..b2a847223b 100644 --- a/aconversions/src/re2fa/Glushkov.cpp +++ b/aconversions/src/re2fa/Glushkov.cpp @@ -50,9 +50,10 @@ FSM Glushkov::convert( void ) State q0( "q0" ); automaton.addState( q0 ); automaton.addInitialState( q0 ); + int stateId = 0; for( auto const& symbol : GlushkovTraversal::getSymbols( m_re ) ) { - State q( to_string( symbol.getId( ) ) ); + State q( toBase26( stateId ++ ) + to_string( symbol.getId( ) ) ); m_stateMap.insert( std::pair<GlushkovSymbol, State>( symbol, q ) ); automaton.addState( q ); diff --git a/aconversions/src/re2fa/Glushkov.h b/aconversions/src/re2fa/Glushkov.h index b6ec1109e8..a1ba5e329a 100644 --- a/aconversions/src/re2fa/Glushkov.h +++ b/aconversions/src/re2fa/Glushkov.h @@ -17,6 +17,7 @@ #include "../interface/IConversionFSM.h" #include "../shared/glushkov/GlushkovTraversal.h" +#include "../shared/Hexavigesimal.h" namespace conversions { diff --git a/aconversions/src/re2fa/Thompson.cpp b/aconversions/src/re2fa/Thompson.cpp index c92832ec6e..b23f79c9e4 100644 --- a/aconversions/src/re2fa/Thompson.cpp +++ b/aconversions/src/re2fa/Thompson.cpp @@ -27,6 +27,7 @@ Thompson::~Thompson( void ) FSM Thompson::convert( void ) { m_fsm = FSM( ); + m_stateId = 0; for( const auto & symbol : m_re.getAlphabet( ) ) m_fsm.addInputSymbol( symbol.getSymbol( ) ); @@ -66,8 +67,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpElement * Thompson::SubexpressionTails Thompson::processRegExpNode( const Iteration * node ) { - State head = m_fsm.createUniqueState( "iter__head", true ); - State tail = m_fsm.createUniqueState( "iter__tail", true ); + State head = m_fsm.createUniqueState( toBase26( m_stateId ) + "0", true ); + State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true ); SubexpressionTails st = processRegExpNode( node->getElement( ) ); @@ -81,8 +82,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const Iteration * node Thompson::SubexpressionTails Thompson::processRegExpNode( const Alternation * node ) { - State head = m_fsm.createUniqueState( "alt__head", true ); - State tail = m_fsm.createUniqueState( "alt__tail", true ); + State head = m_fsm.createUniqueState( toBase26( m_stateId ) + "0", true ); + State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true ); for( const auto & element : node->getElements( ) ) { @@ -110,8 +111,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const Concatenation * Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpSymbol * node ) { Symbol symb( node->getSymbol( ) ); - State head = m_fsm.createUniqueState( "sym__start", true ); - State tail = m_fsm.createUniqueState( "sym__end", true ); + State head = m_fsm.createUniqueState( toBase26( m_stateId ) + "0", true ); + State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true ); m_fsm.addTransition( head, symb, tail ); @@ -121,8 +122,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpSymbol * n Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpEpsilon * node ) { Symbol symb( "" ); - State head = m_fsm.createUniqueState( "epssym__start", true ); - State tail = m_fsm.createUniqueState( "epssym__end", true ); + State head = m_fsm.createUniqueState( toBase26( m_stateId ) + "0", true ); + State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true ); m_fsm.addTransition( head, symb, tail ); @@ -131,8 +132,8 @@ Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpEpsilon * Thompson::SubexpressionTails Thompson::processRegExpNode( const RegExpEmpty * node ) { - State head = m_fsm.createUniqueState( "empty__start", true ); - State tail = m_fsm.createUniqueState( "empty__end", true ); + State head = m_fsm.createUniqueState( toBase26( m_stateId ) + "0", true ); + State tail = m_fsm.createUniqueState( toBase26( m_stateId ++ ) + "1", true ); return SubexpressionTails( head, tail ); } diff --git a/aconversions/src/re2fa/Thompson.h b/aconversions/src/re2fa/Thompson.h index 970c77d399..ae2f5452bb 100644 --- a/aconversions/src/re2fa/Thompson.h +++ b/aconversions/src/re2fa/Thompson.h @@ -18,6 +18,7 @@ #include "../interface/IConversionFSM.h" #include "../include/macros.h" +#include "../shared/Hexavigesimal.h" namespace conversions @@ -56,6 +57,7 @@ private: * output FSM ($\varepsilon$--NFA) */ automaton::FSM m_fsm; + int m_stateId; /** * Stores head and tail state of "subautomaton" created in regexp subtree. diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp index 8ba4a892b5..246199f696 100644 --- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp +++ b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp @@ -75,7 +75,7 @@ RightRegularGrammar BrzozowskiDerivationRRG::convert( void ) for( const auto & r : N ) { - Symbol nt = grammar.createUniqueNonTerminalSymbol( toBase26( ++ nonterminalId ) ); + Symbol nt = grammar.createUniqueNonTerminalSymbol( toBase26( nonterminalId ++ ) ); nonterminalMap.insert( pair<RegExp, Symbol>( r, nt ) ); } diff --git a/aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp b/aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp index e7dc539e77..f28ec8c927 100644 --- a/aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp +++ b/aconversions/src/re2rg/re2rrg/GlushkovRRG.cpp @@ -46,9 +46,11 @@ RightRegularGrammar GlushkovRRG::convert( void ) Symbol S = grammar.createUniqueNonTerminalSymbol( "S" ); grammar.setStartSymbol( S ); + int nonterminalId = 0; + for( auto const& symbol : GlushkovTraversal::getSymbols( m_re ) ) { - Symbol a = grammar.createUniqueNonTerminalSymbol( symbol.getInputSymbol( ).getSymbol( ) + to_string( symbol.getId( ) ) ); + Symbol a = grammar.createUniqueNonTerminalSymbol( toBase26( nonterminalId ++ ) + to_string( symbol.getId( ) ) ); m_symbolMap.insert( std::pair<GlushkovSymbol, Symbol>( symbol, a ) ); } diff --git a/aconversions/src/re2rg/re2rrg/GlushkovRRG.h b/aconversions/src/re2rg/re2rrg/GlushkovRRG.h index fa9b747470..f464aad0d7 100644 --- a/aconversions/src/re2rg/re2rrg/GlushkovRRG.h +++ b/aconversions/src/re2rg/re2rrg/GlushkovRRG.h @@ -16,6 +16,8 @@ #include "../../interface/IConversionRRG.h" #include "../../shared/glushkov/GlushkovTraversal.h" +#include "../../shared/Hexavigesimal.h" + namespace conversions { diff --git a/aconversions/src/rg2fa/rrg2fa/RRGtoFAConverter.cpp b/aconversions/src/rg2fa/rrg2fa/RRGtoFAConverter.cpp index ea121d2fdb..8c81927b11 100644 --- a/aconversions/src/rg2fa/rrg2fa/RRGtoFAConverter.cpp +++ b/aconversions/src/rg2fa/rrg2fa/RRGtoFAConverter.cpp @@ -28,7 +28,7 @@ FSM RRGtoFAConverter::convert( void ) for( const auto & symbol : m_grammar.getNonTerminalSymbols( ) ) automaton.addState( State( symbol.getSymbol( ) ) ); - const State & AState = automaton.createUniqueState( "A", true ); + const State & AState = automaton.createUniqueState( "A", false ); for( const auto & rule : m_grammar.getRules( ) ) { diff --git a/tests.aconversion.sh b/tests.aconversion.sh index 67d0c0975d..60d3d62f4a 100755 --- a/tests.aconversion.sh +++ b/tests.aconversion.sh @@ -2,7 +2,7 @@ set -o pipefail -TESTCASE_ITERATIONS=200 +TESTCASE_ITERATIONS=100 TESTCASE_TIMEOUT=5 LOGFILE="log_tests.txt" @@ -58,7 +58,7 @@ function runTest2 { RETTMP=$? # segfault - if [ $RETTMP -eq 139 ]; then + if [ $RETTMP -eq 134 ]; then return 3 fi -- GitLab