From 354f07f28f117108af1c42f762c1b29945f81c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <tomaspecka@gmail.com> Date: Fri, 24 Jan 2014 14:03:29 +0100 Subject: [PATCH] Move RE derivatives, rewrite, aderivative executable, RE normalize --- aconversions/Makefile | 7 + aconversions/src/conversions/Makefile | 6 - .../src/conversions/re2fa/Brzozowski.cpp | 105 +++++++++++- .../src/conversions/re2fa/Brzozowski.h | 34 +++- aconversions/src/conversions/re2fa/Makefile | 12 +- .../src/conversions/re2fa/RegExpNormalize.cpp | 66 -------- .../src/conversions/re2fa/RegExpNormalize.h | 36 ----- .../BrzozowskiDerivative.cpp | 38 +++-- .../BrzozowskiDerivative.h | 4 +- aconversions/src/derivatives/Makefile | 26 +++ .../src/derivatives/RegExpNormalize.cpp | 149 ++++++++++++++++++ .../src/derivatives/RegExpNormalize.h | 38 +++++ aconversions/src/derivatives/aderivative.cpp | 50 ++++++ aconversions/src/utils/RegExpUtils.cpp | 20 ++- aconversions/src/utils/RegExpUtils.h | 2 + 15 files changed, 449 insertions(+), 144 deletions(-) delete mode 100644 aconversions/src/conversions/re2fa/RegExpNormalize.cpp delete mode 100644 aconversions/src/conversions/re2fa/RegExpNormalize.h rename aconversions/src/{conversions/re2fa => derivatives}/BrzozowskiDerivative.cpp (76%) rename aconversions/src/{conversions/re2fa => derivatives}/BrzozowskiDerivative.h (95%) create mode 100644 aconversions/src/derivatives/Makefile create mode 100644 aconversions/src/derivatives/RegExpNormalize.cpp create mode 100644 aconversions/src/derivatives/RegExpNormalize.h create mode 100644 aconversions/src/derivatives/aderivative.cpp diff --git a/aconversions/Makefile b/aconversions/Makefile index e914767b00..93ed9c830f 100644 --- a/aconversions/Makefile +++ b/aconversions/Makefile @@ -3,14 +3,21 @@ export ALIB_SRC = $(realpath ../alib/src) export ALIB_LIB = $(realpath ../alib/Debug) export BIN_DIR = $(addsuffix /$(OUT_DIR), $(realpath .)) +export CXX=g++ +export CXXFLAGS=-O2 -g -std=c++11 -c -Wall -pedantic -I/usr/include/libxml2 -I$(ALIB_SRC) +export LD=g++ +export LDFLAGS=-L$(ALIB_LIB) -lalib + .PHONY: clean all: @mkdir -p $(OUT_DIR) @$(MAKE) -C src/conversions + @$(MAKE) -C src/derivatives clean: @echo "[Clean] Cleaning up" @$(MAKE) -C src/conversions clean + @$(MAKE) -C src/derivatives clean @rm -f $(OUT_DIR)/* @rm -rf $(OUT_DIR) \ No newline at end of file diff --git a/aconversions/src/conversions/Makefile b/aconversions/src/conversions/Makefile index 0b61d66a9b..d8910754dc 100644 --- a/aconversions/src/conversions/Makefile +++ b/aconversions/src/conversions/Makefile @@ -1,9 +1,3 @@ -export CXX=g++ -export CXXFLAGS=-O3 -std=c++11 -c -Wall -pedantic -I/usr/include/libxml2 -I$(ALIB_SRC) -export LD=g++ -export LDFLAGS=-L$(ALIB_LIB) -lalib -EXECUTABLE= - CONVERSIONS_DIRS = fa2re fa2rg re2fa re2rg rg2fa rg2re .PHONY: clean $(CONVERSIONS_DIRS) diff --git a/aconversions/src/conversions/re2fa/Brzozowski.cpp b/aconversions/src/conversions/re2fa/Brzozowski.cpp index d7ad39b0b8..801b20e2ef 100644 --- a/aconversions/src/conversions/re2fa/Brzozowski.cpp +++ b/aconversions/src/conversions/re2fa/Brzozowski.cpp @@ -22,14 +22,115 @@ Brzozowski::Brzozowski( const RegExp & re ) : AbstractREtoFAConverter( re ) const FSM Brzozowski::convert( void ) { + /* BrzozowskiDerivative bd( m_re ); - list<RegExpSymbol> string; - string.push_back( RegExpSymbol( "0" ) ); + list<RegExpSymbol> string( 1, RegExpSymbol( "0" ) ); bd.derivative( string ).toXML( cout ); + */ + /* + list<RegExpSymbol*> alphabet = RegExpUtils::getRegExpSymbols( m_re ); + set<RegExp> Q = { m_re }, Qprev = { m_re }, Qcurr; + + int i = 1; + while( true ) + { + for( const auto & regexp : Qprev ) + { + BrzozowskiDerivative bd( regexp ); + for( const auto & symbol : alphabet ) + { + const RegExp re = bd.derivative( list<RegExpSymbol>( 1, * symbol ) ); + if( ! RegExpUtils::isRegExpEmpty( re ) ) + { + Qcurr.insert( re ); + m_transitions.push_back( BrzozowskiTransition( regexp, symbol, re ) ); + } + } + } + + if( Qcurr.size( ) == 0 ) + break; + + set<RegExp> setunion; + set_union( Q.begin( ), Q.end( ), Qcurr.begin( ), Qcurr.end( ), setunion.begin( ) ); + + Q = setunion; + Qprev = Qcurr; + Qcurr.clear( ); + + i += 1; + } + + + StateBuilder builder; + + for( const auto & regexp : Q ) + { + const State s = builder.constructState( regexp ); + m_fsm.addState( s ); + if( RegExpUtils::containsEpsilon( regexp ) ) + m_fsm.addFinalState( s ); + } + m_fsm.addInitialState( builder.constructState ( m_re ) ); + + for( const auto & symbol : alphabet ) + m_fsm.addInputSymbol( Symbol( symbol->getSymbol( ) ) ); + + for( const auto & t : m_transitions ) + { + const State from = builder.constructState( t.m_from ); + const State to = builder.constructState( t.m_to ); + const Symbol symb = Symbol( t.m_regexpSymbol->getSymbol( ) ); + + m_fsm.addTransition( from, symb, to ); + } + + */ return m_fsm; } +// ---------------------------------------------------------------------------- + +Brzozowski::BrzozowskiTransition::BrzozowskiTransition( const RegExp & from, const RegExpSymbol * symb, const RegExp & to ) + : m_from( from ), m_to( to ), m_regexpSymbol( symb ) +{ + +} + +// ---------------------------------------------------------------------------- +/* + +Brzozowski::StateBuilder::StateBuilder( void ) +{ + m_stateId = 0; +} + +const State & Brzozowski::StateBuilder::constructState( const RegExp & re ) +{ + if( ! isKeyInMap( re, m_states ) ) + m_states[ re ] = State( createNewName( ) ); + + return m_states[ re ]; +} + +const string Brzozowski::StateBuilder::createNewName( void ) +{ + // http://en.wikipedia.org/wiki/Hexavigesimal + + unsigned int n = m_stateId ++; + string name; + do + { + unsigned int remainder = n % 26; + name.insert( 0, ( char )( remainder + 'A' ), 1 ); + n = (n - remainder) / 26; + } while (n > 0); + + return name; +} + +*/ } /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/Brzozowski.h b/aconversions/src/conversions/re2fa/Brzozowski.h index b3b3cc73de..7e36f8f585 100644 --- a/aconversions/src/conversions/re2fa/Brzozowski.h +++ b/aconversions/src/conversions/re2fa/Brzozowski.h @@ -8,8 +8,14 @@ #ifndef BRZOZOWSKI_H_ #define BRZOZOWSKI_H_ +#include <algorithm> +#include <map> +#include <set> + #include "AbstractREtoFAConverter.h" -#include "BrzozowskiDerivative.h" +#include "../../derivatives/BrzozowskiDerivative.h" +#include "../../utils/RegExpUtils.h" +#include "../../utils/AutomatonUtils.h" namespace conversions { @@ -22,6 +28,32 @@ class Brzozowski : public AbstractREtoFAConverter public: Brzozowski( const regexp::RegExp & re ); const automaton::FSM convert( void ); + +private: + /* + class StateBuilder + { + public: + StateBuilder( void ); + const automaton::State & constructState( const regexp::RegExp & re ); + + private: + const std::string createNewName( void ); + + std::map<const regexp::RegExp, automaton::State> m_states; + unsigned int m_stateId; + }; + */ + + struct BrzozowskiTransition + { + const regexp::RegExp & m_from, & m_to; + const regexp::RegExpSymbol * m_regexpSymbol; + + BrzozowskiTransition( const regexp::RegExp & from, const regexp::RegExpSymbol * symb, const regexp::RegExp & to ); + }; + + std::list<BrzozowskiTransition> m_transitions; }; } /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/Makefile b/aconversions/src/conversions/re2fa/Makefile index 862fa69b94..a681699b99 100644 --- a/aconversions/src/conversions/re2fa/Makefile +++ b/aconversions/src/conversions/re2fa/Makefile @@ -13,7 +13,7 @@ are2fa.thompson: are2fa.thompson.o Thompson.o AbstractREtoFAConverter.o Automato $(LD) $(LDFLAGS) $^ -o $@ -are2fa.brzozowski.o: are2fa.brzozowski.cpp Brzozowski.h BrzozowskiDerivative.h RegExpNormalize.h AbstractREtoFAConverter.h +are2fa.brzozowski.o: are2fa.brzozowski.cpp Brzozowski.h ../../derivatives/BrzozowskiDerivative.h ../../derivatives/RegExpNormalize.h AbstractREtoFAConverter.h $(CXX) $(CXXFLAGS) $< -o $@ are2fa.glushkov.o: are2fa.glushkov.cpp Glushkov.h AbstractREtoFAConverter.h @@ -29,18 +29,18 @@ AbstractREtoFAConverter.o: AbstractREtoFAConverter.cpp AbstractREtoFAConverter.h Brzozowski.o: Brzozowski.cpp Brzozowski.h AbstractREtoFAConverter.h $(CXX) $(CXXFLAGS) $< -o $@ -BrzozowskiDerivative.o: BrzozowskiDerivative.cpp BrzozowskiDerivative.h RegExpNormalize.h ../../utils/ConversionException.h ../../utils/RegExpUtils.h +Glushkov.o: Glushkov.cpp Glushkov.h AbstractREtoFAConverter.h $(CXX) $(CXXFLAGS) $< -o $@ -RegExpNormalize.o: RegExpNormalize.cpp RegExpNormalize.h ../../utils/ConversionException.h +Thompson.o: Thompson.cpp Thompson.h AbstractREtoFAConverter.h ../../utils/AutomatonUtils.h ../../utils/ConversionException.h $(CXX) $(CXXFLAGS) $< -o $@ -Glushkov.o: Glushkov.cpp Glushkov.h AbstractREtoFAConverter.h - $(CXX) $(CXXFLAGS) $< -o $@ -Thompson.o: Thompson.cpp Thompson.h AbstractREtoFAConverter.h ../../utils/AutomatonUtils.h ../../utils/ConversionException.h +BrzozowskiDerivative.o: ../../derivatives/BrzozowskiDerivative.cpp ../../derivatives/BrzozowskiDerivative.h ../../derivatives/RegExpNormalize.h ../../utils/ConversionException.h ../../utils/RegExpUtils.h $(CXX) $(CXXFLAGS) $< -o $@ +RegExpNormalize.o: ../../derivatives/RegExpNormalize.cpp ../../derivatives/RegExpNormalize.o ../../utils/RegExpUtils.h + $(CXX) $(CXXFLAGS) $< -o $@ AutomatonUtils.o: ../../utils/AutomatonUtils.cpp ../../utils/AutomatonUtils.h ../../utils/utils.h $(CXX) $(CXXFLAGS) $< -o $@ diff --git a/aconversions/src/conversions/re2fa/RegExpNormalize.cpp b/aconversions/src/conversions/re2fa/RegExpNormalize.cpp deleted file mode 100644 index 7257c3c32b..0000000000 --- a/aconversions/src/conversions/re2fa/RegExpNormalize.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * RegExpNormalize.cpp - * - * Created on: 20. 1. 2014 - * Author: tomas - */ - -#include "RegExpNormalize.h" - -using namespace regexp; - -namespace conversions -{ - -RegExpElement* RegExpNormalize::normalize( RegExpElement * element ) const -{ - Alternation* alternation = dynamic_cast<Alternation*>( element ); - Concatenation* concatenation = dynamic_cast<Concatenation*>( element ); - Iteration* iteration = dynamic_cast<Iteration*>( element ); - RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element ); - - if( alternation ) - return normalizeAlternation( alternation ); - else if( concatenation ) - return normalizeConcatenation( concatenation ); - else if( iteration ) - return normalizeIteration( iteration ); - else if( symbol ) - return normalizeSymbol( symbol ); - - throw ConversionException( "BrzozowskiDerivativeNormalize::normalize - unknown RegExpElement type" ); -} - -RegExpElement * RegExpNormalize::normalizeAlternation( Alternation * element ) const -{ - list<RegExpElement*> l = element->getElements( ); - - if( l.size( ) == 1 ) - { - RegExpElement* child = * l.begin( ); - l.pop_front( ); - delete element; - return child; - } - - return element; - -} - -RegExpElement * RegExpNormalize::normalizeConcatenation( Concatenation * element ) const -{ - return element; -} - -RegExpElement * RegExpNormalize::normalizeIteration( Iteration * element ) const -{ - // element->setElement( normalize( element->getElement( ) ) ); - return element; -} - -RegExpElement * RegExpNormalize::normalizeSymbol( RegExpSymbol * element ) const -{ - return element; -} - -} /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/RegExpNormalize.h b/aconversions/src/conversions/re2fa/RegExpNormalize.h deleted file mode 100644 index 2069ef88fd..0000000000 --- a/aconversions/src/conversions/re2fa/RegExpNormalize.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * RegExpNormalize.h - * - * Created on: 20. 1. 2014 - * Author: tomas - */ - -#ifndef REGEXPNORMALIZE_H_ -#define REGEXPNORMALIZE_H_ - -#include <regexp/RegExpElement.h> -#include <regexp/Alternation.h> -#include <regexp/Concatenation.h> -#include <regexp/Iteration.h> -#include <regexp/RegExpSymbol.h> - -#include "../../utils/ConversionException.h" - -namespace conversions -{ - -class RegExpNormalize -{ -public: - regexp::RegExpElement * normalize( regexp::RegExpElement* element ) const; - -private: - regexp::RegExpElement * normalizeAlternation( regexp::Alternation * element ) const; - regexp::RegExpElement * normalizeConcatenation( regexp::Concatenation * element ) const; - regexp::RegExpElement * normalizeIteration( regexp::Iteration * element ) const; - regexp::RegExpElement * normalizeSymbol( regexp::RegExpSymbol * element) const; -}; - -} /* namespace conversions */ - -#endif /* REGEXPNORMALIZE_H_ */ diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp b/aconversions/src/derivatives/BrzozowskiDerivative.cpp similarity index 76% rename from aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp rename to aconversions/src/derivatives/BrzozowskiDerivative.cpp index de8fc86a58..f73e38eb4d 100644 --- a/aconversions/src/conversions/re2fa/BrzozowskiDerivative.cpp +++ b/aconversions/src/derivatives/BrzozowskiDerivative.cpp @@ -22,17 +22,17 @@ BrzozowskiDerivative::BrzozowskiDerivative( const RegExp & re ) : m_re( re ) RegExp BrzozowskiDerivative::derivative ( const list<RegExpSymbol> & string ) const { - if( string.size( ) == 0 ) // dV/d(eps) = V - return RegExp ( m_regexpRoot->clone( ) ); - RegExpElement * expression = m_regexpRoot; - for( auto symbol : string ) // dV/d(ab) = d( dV/da )/db - expression = derivative( expression, symbol ); - - // BrzozowskiDerivativeNormalize normalizer; - // expression = normalizer.normalize( expression ); + for( const auto & dSymbol : string ) // dV/d(ab) = d( dV/da )/db + { + if( dSymbol.getSymbol( ) == "" ) + expression = expression->clone( ); + else + expression = derivative( expression, dSymbol ); + } - return RegExp( expression ); + RegExpNormalize norm; + return norm.normalize( expression ); } RegExpElement * BrzozowskiDerivative::derivative( RegExpElement * element, const RegExpSymbol & dSymbol ) const @@ -79,25 +79,23 @@ RegExpElement * BrzozowskiDerivative::derivative( Concatenation * element, const for( auto elem = element->getElements( ).begin( ); elem != element->getElements( ).end( ); elem ++ ) { - RegExpElement* d = derivative( * elem, dSymbol ); - if( d != NULL ) // if d(elem)/d(dSym) is 0, then whole concat is 0 (Melichar, 2.87, A7) - { - Concatenation* concat = new Concatenation( ); - list<RegExpElement*> & concatElements = concat->getElements( ); + Concatenation* concat = new Concatenation( ); + list<RegExpElement*> & concatElements = concat->getElements( ); - concatElements.push_back( d ); + concatElements.push_back( derivative( * elem, dSymbol ) ); - auto succeedingElem = elem; - while( ++ succeedingElem != element->getElements( ).end( ) ) - concatElements.push_back( ( * succeedingElem )->clone( ) ); + auto succeedingElem = elem; + while( ++ succeedingElem != element->getElements( ).end( ) ) + concatElements.push_back( ( * succeedingElem )->clone( ) ); - altElements.push_back( concat ); - } + // altElements.push_back( RegExpNormalize::normalize( concat ) ); + altElements.push_back( concat ); if( ! RegExpUtils::containsEpsilon( * elem ) ) break; } + // return RegExpNormalize::normalize( alt ); return alt; } diff --git a/aconversions/src/conversions/re2fa/BrzozowskiDerivative.h b/aconversions/src/derivatives/BrzozowskiDerivative.h similarity index 95% rename from aconversions/src/conversions/re2fa/BrzozowskiDerivative.h rename to aconversions/src/derivatives/BrzozowskiDerivative.h index bce8cee9cc..c04521fea0 100644 --- a/aconversions/src/conversions/re2fa/BrzozowskiDerivative.h +++ b/aconversions/src/derivatives/BrzozowskiDerivative.h @@ -18,8 +18,8 @@ #include <list> #include "RegExpNormalize.h" -#include "../../utils/ConversionException.h" -#include "../../utils/RegExpUtils.h" +#include "../utils/ConversionException.h" +#include "../utils/RegExpUtils.h" namespace conversions { diff --git a/aconversions/src/derivatives/Makefile b/aconversions/src/derivatives/Makefile new file mode 100644 index 0000000000..9483ace132 --- /dev/null +++ b/aconversions/src/derivatives/Makefile @@ -0,0 +1,26 @@ +all: aderivative + mv aderivative $(BIN_DIR) + +aderivative: aderivative.o BrzozowskiDerivative.o RegExpNormalize.o ConversionException.o RegExpUtils.o + $(LD) $(LDFLAGS) $^ -o aderivative + + +aderivative.o: aderivative.cpp BrzozowskiDerivative.h + $(CXX) $(CXXFLAGS) $< -o $@ + +BrzozowskiDerivative.o: BrzozowskiDerivative.cpp BrzozowskiDerivative.h RegExpNormalize.h ../utils/RegExpUtils.h + $(CXX) $(CXXFLAGS) $< -o $@ + +RegExpNormalize.o: RegExpNormalize.cpp RegExpNormalize.h ../utils/ConversionException.h ../utils/RegExpUtils.h + $(CXX) $(CXXFLAGS) $< -o $@ + +ConversionException.o: ../utils/ConversionException.cpp ../utils/ConversionException.h + $(CXX) $(CXXFLAGS) $< -o $@ + +RegExpUtils.o: ../utils/RegExpUtils.cpp ../utils/RegExpUtils.h + $(CXX) $(CXXFLAGS) $< -o $@ + + +.PHONY: clean +clean: + rm -f *.o diff --git a/aconversions/src/derivatives/RegExpNormalize.cpp b/aconversions/src/derivatives/RegExpNormalize.cpp new file mode 100644 index 0000000000..d3ba54110a --- /dev/null +++ b/aconversions/src/derivatives/RegExpNormalize.cpp @@ -0,0 +1,149 @@ +/* + * RegExpNormalize.cpp + * + * Created on: 20. 1. 2014 + * Author: tomas + */ + +#include "RegExpNormalize.h" +#include <iostream> + +using namespace regexp; + +namespace conversions +{ + +RegExp RegExpNormalize::normalize( RegExpElement * element ) +{ + return RegExp( optimize( element ) ); +} + + +RegExpElement* RegExpNormalize::optimize( RegExpElement * element ) +{ + if( element == NULL ) + return NULL; + + Alternation* alternation = dynamic_cast<Alternation*>( element ); + Concatenation* concatenation = dynamic_cast<Concatenation*>( element ); + Iteration* iteration = dynamic_cast<Iteration*>( element ); + RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( element ); + + if( alternation ) + return optimize( alternation ); + else if( concatenation ) + return optimize( concatenation ); + else if( iteration ) + return optimize( iteration ); + else if( symbol ) + return optimize( symbol ); + + throw ConversionException( "RegExpNormalize::optimize - unknown RegExpElement type" ); +} + +RegExpElement * RegExpNormalize::optimize( Alternation * element ) +{ + for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); it ++ ) + * it = optimize( * it ); + + // 0 + x = x - erases all empty sets from Alternation + for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); ) + { + if ( * it == NULL ) + it = element->getElements( ).erase( it ); + else + it ++; + } + + if( element->getElements( ).size( ) == 0 ) + { + delete element; + return NULL; + } + else if( element->getElements( ).size( ) == 1 ) + { + RegExpElement * ret = * element->getElements( ).begin( ); + element->getElements( ).clear( ); + delete element; + return ret; + } + + return element; +} + +RegExpElement * RegExpNormalize::optimize( Concatenation * element ) +{ + for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); it ++ ) + * it = optimize( * it ); + + // a.(b.c) = (a.b).c + for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); it ++ ) + { + Concatenation* c = dynamic_cast<Concatenation*>( * it ); + if( ! c ) + continue; + + for( const auto & e : c->getElements( ) ) + element->getElements( ).insert( it, e ); + + c->getElements( ).clear( ); + delete c; + it = element->getElements( ).erase( it ); + } + + // 0.x = 0 + for( auto const & e : element->getElements( ) ) + { + if( e == NULL ) + { + element->getElements( ).clear( ); + delete element; + return NULL; + } + } + + // delete all redundant epsilons + for( auto it = element->getElements( ).begin( ) ; it != element->getElements( ).end( ); ) + { + RegExpSymbol* symbol = dynamic_cast<RegExpSymbol*>( * it ); + + if( symbol && symbol->getSymbol( ) == "" && element->getElements( ).size( ) > 1 ) + it = element->getElements( ).erase( it ); + else + it ++; + } + + if( element->getElements( ).size( ) == 1 ) + { + RegExpElement * ret = * element->getElements( ).begin( ); + element->getElements( ).clear( ); + delete element; + return ret; + } + + return element; +} + +RegExpElement * RegExpNormalize::optimize( Iteration * element ) +{ + element->setElement( optimize( element->getElement( ) ) ); + + // a********* = a* + Iteration* iteration; + while( ( iteration = dynamic_cast<Iteration*>( element->getElement( ) ) ) != NULL ) + { + element->setElement( iteration->getElement( ) ); + iteration->setElement( NULL ); + delete iteration; + } + + return element; +} + +RegExpElement * RegExpNormalize::optimize( RegExpSymbol * element ) +{ + return element; +} + + +} /* namespace conversions */ diff --git a/aconversions/src/derivatives/RegExpNormalize.h b/aconversions/src/derivatives/RegExpNormalize.h new file mode 100644 index 0000000000..9ebad0e398 --- /dev/null +++ b/aconversions/src/derivatives/RegExpNormalize.h @@ -0,0 +1,38 @@ +/* + * RegExpNormalize.h + * + * Created on: 20. 1. 2014 + * Author: tomas + */ + +#ifndef REGEXPNORMALIZE_H_ +#define REGEXPNORMALIZE_H_ + +#include <regexp/RegExp.h> +#include <regexp/RegExpElement.h> +#include <regexp/Alternation.h> +#include <regexp/Concatenation.h> +#include <regexp/Iteration.h> +#include <regexp/RegExpSymbol.h> + +#include "../utils/ConversionException.h" + +namespace conversions +{ + +class RegExpNormalize +{ +public: + regexp::RegExp normalize( regexp::RegExpElement* element ); + +private: + regexp::RegExpElement * optimize( regexp::RegExpElement* element ); + regexp::RegExpElement * optimize( regexp::Alternation * element ); + regexp::RegExpElement * optimize( regexp::Concatenation * element ); + regexp::RegExpElement * optimize( regexp::Iteration * element ); + regexp::RegExpElement * optimize( regexp::RegExpSymbol * element); +}; + +} /* namespace conversions */ + +#endif /* REGEXPNORMALIZE_H_ */ diff --git a/aconversions/src/derivatives/aderivative.cpp b/aconversions/src/derivatives/aderivative.cpp new file mode 100644 index 0000000000..9f8570bd4b --- /dev/null +++ b/aconversions/src/derivatives/aderivative.cpp @@ -0,0 +1,50 @@ +#include <iostream> +#include <string> + +#include <AlibException.h> +#include <regexp/RegExp.h> +#include <regexp/RegExpParser.h> +#include <regexp/RegExpSymbol.h> +#include <sax/SaxInterface.h> + +#include "BrzozowskiDerivative.h" + + +using namespace alib; +using namespace conversions; +using namespace regexp; +using namespace std; + +/* + * Usage: + * aderivative "a" "b" "cc" < regexp.xml + * ; aderivative regexp.xml "a" "b" "cc" + */ + +int main(int argc, char** argv) +{ + try + { + list<Token> tokens; + string input(istreambuf_iterator<char>(cin), (istreambuf_iterator<char>())); + SaxInterface::parseMemory(input, tokens); + RegExp re = RegExpParser::parse(tokens); + + list<RegExpSymbol> dString; + for( int i = 1; i < argc ; i++ ) + { + string symbol( argv[ i ] ); + // cout << "'" << symbol << "'" << endl; + dString.push_back( RegExpSymbol( symbol ) ); + } + BrzozowskiDerivative d( re ); + d.derivative( dString ).toXML( cout ); + } + catch( AlibException & e ) + { + cout << e.what() << endl; + return 1; + } + + return 0; +} diff --git a/aconversions/src/utils/RegExpUtils.cpp b/aconversions/src/utils/RegExpUtils.cpp index c38ac80ee0..259a4f019f 100644 --- a/aconversions/src/utils/RegExpUtils.cpp +++ b/aconversions/src/utils/RegExpUtils.cpp @@ -7,11 +7,16 @@ using namespace std; namespace conversions { +bool RegExpUtils::isRegExpEmpty( const RegExp & re ) +{ + return const_cast<RegExp&>( re ).getRegExp( ) == NULL; +} + list<RegExpSymbol*> RegExpUtils::getRegExpSymbols( const RegExp & re ) { // returning list to preserver ordering of symbols in the regexp tree list<RegExpSymbol*> alphabet; - traverseSymbols( const_cast<RegExp&>( re ).getRegExp(), alphabet ); + traverseSymbols( const_cast<RegExp&>( re ).getRegExp( ), alphabet ); return alphabet; } @@ -31,14 +36,14 @@ void RegExpUtils::traverseSymbols( RegExpElement * element, list<RegExpSymbol*> else if( alternation ) { - for( auto element : alternation->getElements() ) + for( const auto & element : alternation->getElements() ) traverseSymbols( element, alphabet ); return; } else if( concatenation ) { - for( auto element : concatenation->getElements() ) + for( const auto & element : concatenation->getElements() ) traverseSymbols( element, alphabet ); return; } @@ -53,6 +58,11 @@ void RegExpUtils::traverseSymbols( RegExpElement * element, list<RegExpSymbol*> throw ConversionException( "Captain's log. Stardate 3413.6. Approaching TraverseSymbols, class RegExpUtils planet. Encountered invalid RegExpElement. Sending away team to explore." ); } +bool RegExpUtils::containsEpsilon( const RegExp & re ) +{ + return containsEpsilon( const_cast<RegExp&>( re ).getRegExp( ) ); +} + bool RegExpUtils::containsEpsilon( RegExpElement * element ) { Alternation* alternation = dynamic_cast<Alternation*>( element ); @@ -62,7 +72,7 @@ bool RegExpUtils::containsEpsilon( RegExpElement * element ) if( alternation ) { - for( auto e : alternation->getElements( ) ) + for( const auto & e : alternation->getElements( ) ) if( containsEpsilon( e ) ) return true; @@ -71,7 +81,7 @@ bool RegExpUtils::containsEpsilon( RegExpElement * element ) if( concatenation ) { - for( auto e : concatenation->getElements( ) ) + for( const auto & e : concatenation->getElements( ) ) if( ! containsEpsilon( e ) ) return false; diff --git a/aconversions/src/utils/RegExpUtils.h b/aconversions/src/utils/RegExpUtils.h index 684f02e6e7..cdfd791ab3 100644 --- a/aconversions/src/utils/RegExpUtils.h +++ b/aconversions/src/utils/RegExpUtils.h @@ -19,6 +19,8 @@ class RegExpUtils public: static std::list<regexp::RegExpSymbol*> getRegExpSymbols( const regexp::RegExp & re ); static bool containsEpsilon( regexp::RegExpElement * element ); + static bool containsEpsilon( const regexp::RegExp & re ); + static bool isRegExpEmpty( const regexp::RegExp & re ); private: static void traverseSymbols( regexp::RegExpElement * element, std::list<regexp::RegExpSymbol*> & alphabet ); -- GitLab