From 5d9af68bec5280f64b7fe30d50c417598d32cf46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <tomaspecka@gmail.com> Date: Sat, 18 Jan 2014 20:23:45 +0100 Subject: [PATCH] Glushkov - symbol numbering --- .../src/conversions/re2fa/Glushkov.cpp | 35 ++++++++++++++++++ aconversions/src/conversions/re2fa/Glushkov.h | 22 +++++++++-- aconversions/src/conversions/re2fa/Makefile | 10 +++-- aconversions/src/utils/RegExpUtils.cpp | 37 +++++++++++++++++-- aconversions/src/utils/RegExpUtils.h | 11 +++++- aconversions/src/utils/utils.h | 1 + 6 files changed, 104 insertions(+), 12 deletions(-) diff --git a/aconversions/src/conversions/re2fa/Glushkov.cpp b/aconversions/src/conversions/re2fa/Glushkov.cpp index c784a226a5..024e034ce5 100644 --- a/aconversions/src/conversions/re2fa/Glushkov.cpp +++ b/aconversions/src/conversions/re2fa/Glushkov.cpp @@ -20,7 +20,42 @@ Glushkov::Glushkov( const RegExp & re ) : AbstractREtoFAConverter( re ) const FSM Glushkov::convert( void ) { + initNumberSymbols( ); + /* + constructBeginSymbolSet( ); + constructNeighbourSet( ); + constructEndSymbolSet( ); + */ + return m_fsm; } +void Glushkov::initNumberSymbols( void ) +{ + map<RegExpSymbol, int> iter; + + for( auto symb : RegExpUtils::getRegExpSymbols( m_re ) ) + { + if( ! isKeyInMap( symb, iter ) ) + iter.insert( std::pair<RegExpSymbol,int>( symb, 0 ) ); + + m_numberedSymbols.push_back( NumberedSymbol( symb, iter[ symb ] ++ ) ); + } +} + +// ---------------------------------------------------------------------------- + +Glushkov::NumberedSymbol::NumberedSymbol( const RegExpSymbol & symbol, int i ) : m_symbol( symbol ), m_i( i ) +{ + +} + +std::string Glushkov::NumberedSymbol::constructStateName( void ) +{ + ostringstream oss; + oss << m_symbol.getSymbol( ) << "_" << m_i; + + return oss.str( ); +} + } /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/Glushkov.h b/aconversions/src/conversions/re2fa/Glushkov.h index 5da8240f33..c528a333f1 100644 --- a/aconversions/src/conversions/re2fa/Glushkov.h +++ b/aconversions/src/conversions/re2fa/Glushkov.h @@ -8,9 +8,13 @@ #ifndef GLUSHKOV_H_ #define GLUSHKOV_H_ +#include <sstream> +#include <map> + +#include <alphabet/Symbol.h> #include <regexp/RegExp.h> -#include <regexp/RegExpSymbol.h> +#include "../../utils/RegExpUtils.h" #include "AbstractREtoFAConverter.h" namespace conversions @@ -18,6 +22,9 @@ namespace conversions /** * Converts regular expression to finite automata using Glushkov's NFA construction algorithm. + * Sources: + * - Melichar 2.107 + * */ class Glushkov : public AbstractREtoFAConverter { @@ -26,13 +33,20 @@ public: const automaton::FSM convert( void ); private: - struct Struct + struct NumberedSymbol { - regexp::RegExpSymbol & m_symbol; + const regexp::RegExpSymbol & m_symbol; int m_i; - std::string constructStateName(); + NumberedSymbol( const regexp::RegExpSymbol & symb, int i ); + std::string constructStateName( void ); }; + + + void initNumberSymbols( void ); + + std::list<NumberedSymbol> m_numberedSymbols; + }; } /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/Makefile b/aconversions/src/conversions/re2fa/Makefile index e8758d4474..d1943d04da 100644 --- a/aconversions/src/conversions/re2fa/Makefile +++ b/aconversions/src/conversions/re2fa/Makefile @@ -1,10 +1,10 @@ all: are2fa.glushkov are2fa.brzozowski are2fa.thompson - #mv are2fa.glushkov $(BIN_DIR) + mv are2fa.glushkov $(BIN_DIR) #mv are2fa.brzozowski $(BIN_DIR) mv are2fa.thompson $(BIN_DIR) -are2fa.glushkov: are2fa.glushkov.o Glushkov.o AbstractREtoFAConverter.o - #$(LD) $(LDFLAGS) $^ -o $@ +are2fa.glushkov: are2fa.glushkov.o Glushkov.o AbstractREtoFAConverter.o RegExpUtils.o ConversionException.o + $(LD) $(LDFLAGS) $^ -o $@ are2fa.brzozowski: are2fa.brzozowski.o Brzozowski.o AbstractREtoFAConverter.o #$(LD) $(LDFLAGS) $^ -o $@ @@ -22,7 +22,6 @@ are2fa.glushkov.o: are2fa.glushkov.cpp Glushkov.h AbstractREtoFAConverter.h are2fa.thompson.o: are2fa.thompson.cpp Thompson.h AbstractREtoFAConverter.h $(CXX) $(CXXFLAGS) $< -o $@ - AbstractREtoFAConverter.o: AbstractREtoFAConverter.cpp AbstractREtoFAConverter.h $(CXX) $(CXXFLAGS) $< -o $@ @@ -39,6 +38,9 @@ Thompson.o: Thompson.cpp Thompson.h AbstractREtoFAConverter.h ../../utils/Automa AutomatonUtils.o: ../../utils/AutomatonUtils.cpp ../../utils/AutomatonUtils.h ../../utils/utils.h $(CXX) $(CXXFLAGS) $< -o $@ +RegExpUtils.o: ../../utils/RegExpUtils.cpp ../../utils/RegExpUtils.h ../../utils/utils.h ../../utils/ConversionException.h + $(CXX) $(CXXFLAGS) $< -o $@ + ConversionException.o: ../../utils/ConversionException.cpp ../../utils/ConversionException.h $(CXX) $(CXXFLAGS) $< -o $@ diff --git a/aconversions/src/utils/RegExpUtils.cpp b/aconversions/src/utils/RegExpUtils.cpp index cc4532bf52..6d7feab78c 100644 --- a/aconversions/src/utils/RegExpUtils.cpp +++ b/aconversions/src/utils/RegExpUtils.cpp @@ -1,17 +1,48 @@ #include "RegExpUtils.h" using namespace alphabet; +using namespace regexp; using namespace std; namespace conversions { -set<Symbol> getAlphabet( void ) +list<RegExpSymbol> RegExpUtils::getRegExpSymbols( const RegExp & re ) { - std::set<Symbol> alphabet; - // iterate through + // returning list to preserver ordering of symbols in the regexp tree + + list<RegExpSymbol> alphabet; + + RegExp& r = const_cast<RegExp&>( re ); + traverseSymbols( r.getRegExp(), alphabet ); return alphabet; } + +void RegExpUtils::traverseSymbols( RegExpElement * element, list<RegExpSymbol> & alphabet ) +{ + Alternation* alternation = dynamic_cast<Alternation*>(element); + Concatenation* concatenation = dynamic_cast<Concatenation*>(element); + Iteration* iteration = dynamic_cast<Iteration*>(element); + RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>(element); + + if( symbol ) + alphabet.push_back( * symbol ); + + else if( alternation ) + for( auto element : alternation->getElements() ) + traverseSymbols( element, alphabet ); + + else if( concatenation ) + for( auto element : concatenation->getElements() ) + traverseSymbols( element, alphabet ); + + else if( iteration ) + traverseSymbols( iteration->getElement(), alphabet ); + + + throw ConversionException( "Captain's log. Stardate 3413.6. Approaching TraverseUtils, class RegExpUtils planet. Encountered invalid RegExpElement. Sending away team to explore." ); +} + } /* namespace conversions */ diff --git a/aconversions/src/utils/RegExpUtils.h b/aconversions/src/utils/RegExpUtils.h index aff77de130..e67b227bf3 100644 --- a/aconversions/src/utils/RegExpUtils.h +++ b/aconversions/src/utils/RegExpUtils.h @@ -1,7 +1,15 @@ #ifndef REGEXPUTILS_H_ #define REGEXPUTILS_H_ +#include <regexp/RegExp.h> +#include <regexp/RegExpElement.h> +#include <regexp/Alternation.h> +#include <regexp/Concatenation.h> +#include <regexp/Iteration.h> +#include <regexp/RegExpSymbol.h> + #include "utils.h" +#include "ConversionException.h" namespace conversions { @@ -9,9 +17,10 @@ namespace conversions class RegExpUtils { public: - static std::set<alphabet::Symbol> getAlphabet( void ); + static std::list<regexp::RegExpSymbol> getRegExpSymbols( const regexp::RegExp & re ); private: + static void traverseSymbols( regexp::RegExpElement * element, std::list<regexp::RegExpSymbol> & alphabet ); }; } /* namespace conversions */ diff --git a/aconversions/src/utils/utils.h b/aconversions/src/utils/utils.h index 4d14be70ae..609a4e61b1 100644 --- a/aconversions/src/utils/utils.h +++ b/aconversions/src/utils/utils.h @@ -12,6 +12,7 @@ namespace conversions { #define isInSet(x,set) ( (set).find((x)) != (set).end()) +#define isKeyInMap(key,map) ( (map).find((key)) != (map).end()) enum SuffixType { -- GitLab