From 4bed3eeea4272f2cef3467fa08357b2e2f5a7774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Fri, 7 Feb 2014 11:20:55 +0100 Subject: [PATCH] Working version of Brzozowski NFA. Infinite loops for some REs. --- .../src/conversions/re2fa/Brzozowski.cpp | 88 +++++----- .../src/conversions/re2fa/Brzozowski.h | 16 +- aconversions/src/conversions/re2fa/Makefile | 7 +- .../conversions/re2fa/RegExpComparator.cpp | 157 ++++++++++++++++++ .../src/conversions/re2fa/RegExpComparator.h | 46 +++++ .../conversions/re2fa/are2fa.brzozowski.cpp | 2 +- .../src/derivatives/BrzozowskiDerivative.cpp | 7 + .../src/derivatives/BrzozowskiDerivative.h | 1 + aconversions/src/utils/RegExpUtils.cpp | 2 +- 9 files changed, 270 insertions(+), 56 deletions(-) create mode 100644 aconversions/src/conversions/re2fa/RegExpComparator.cpp create mode 100644 aconversions/src/conversions/re2fa/RegExpComparator.h diff --git a/aconversions/src/conversions/re2fa/Brzozowski.cpp b/aconversions/src/conversions/re2fa/Brzozowski.cpp index 801b20e2ef..9fe6f6bf78 100644 --- a/aconversions/src/conversions/re2fa/Brzozowski.cpp +++ b/aconversions/src/conversions/re2fa/Brzozowski.cpp @@ -6,15 +6,16 @@ */ #include "Brzozowski.h" +#include <iostream> using namespace automaton; using namespace regexp; -#include <iostream> - namespace conversions { +#define DEBUG false + Brzozowski::Brzozowski( const RegExp & re ) : AbstractREtoFAConverter( re ) { @@ -22,29 +23,40 @@ Brzozowski::Brzozowski( const RegExp & re ) : AbstractREtoFAConverter( re ) const FSM Brzozowski::convert( void ) { - /* - BrzozowskiDerivative bd( m_re ); - list<RegExpSymbol> string( 1, RegExpSymbol( "0" ) ); - bd.derivative( string ).toXML( cout ); - */ - - /* - list<RegExpSymbol*> alphabet = RegExpUtils::getRegExpSymbols( m_re ); - set<RegExp> Q = { m_re }, Qprev = { m_re }, Qcurr; + set<RegExpSymbol> alphabet = RegExpUtils::getRegExpAlphabet( m_re ); + set<RegExp, RegExpComparator> Q = { m_re }, Qprev = { m_re }, Qcurr; int i = 1; while( true ) { + if( DEBUG ) std::cout << "Round " << i << std::endl; for( const auto & regexp : Qprev ) { + + auto itFrom = Q.find( regexp ); + if( DEBUG ) std::cout << "DERIVUJI:" << std::endl; + if( DEBUG ) const_cast<RegExp&>(regexp).toXML( cout ); BrzozowskiDerivative bd( regexp ); + if( DEBUG ) std::cout << "----" << std::endl; for( const auto & symbol : alphabet ) { - const RegExp re = bd.derivative( list<RegExpSymbol>( 1, * symbol ) ); + if( DEBUG ) std::cout << "Podle: " << symbol.getSymbol() << std::endl; + const RegExp re = bd.derivative( list<RegExpElement*>( 1, new RegExpSymbol( symbol.getSymbol( ) ) ) ); + if( DEBUG ) const_cast<RegExp&>(re).toXML( cout ); if( ! RegExpUtils::isRegExpEmpty( re ) ) { - Qcurr.insert( re ); - m_transitions.push_back( BrzozowskiTransition( regexp, symbol, re ) ); + if( ! isInSet( re, Q ) ) + { + Qcurr.insert( re ); + Q.insert( re ); + auto itTo = Q.find( re ); + m_transitions.push_back( BrzozowskiTransition( * itFrom, symbol, * itTo ) ); + } + else + { + auto itTo = Q.find( re ); + m_transitions.push_back( BrzozowskiTransition( * itFrom, symbol, * itTo ) ); + } } } } @@ -52,70 +64,60 @@ const FSM Brzozowski::convert( void ) if( Qcurr.size( ) == 0 ) break; - set<RegExp> setunion; - set_union( Q.begin( ), Q.end( ), Qcurr.begin( ), Qcurr.end( ), setunion.begin( ) ); - - Q = setunion; Qprev = Qcurr; Qcurr.clear( ); + if( DEBUG ) std::cout << "-------------------------------------------------------" << std::endl; i += 1; + } + // -------------------------------- StateBuilder builder; - for( const auto & regexp : Q ) - { - const State s = builder.constructState( regexp ); - m_fsm.addState( s ); - if( RegExpUtils::containsEpsilon( regexp ) ) - m_fsm.addFinalState( s ); - } - m_fsm.addInitialState( builder.constructState ( m_re ) ); + for( const auto & r : Q ) + m_fsm.addState( builder.getState( r ) ); for( const auto & symbol : alphabet ) - m_fsm.addInputSymbol( Symbol( symbol->getSymbol( ) ) ); + m_fsm.addInputSymbol( symbol.getSymbol( ) ); for( const auto & t : m_transitions ) - { - const State from = builder.constructState( t.m_from ); - const State to = builder.constructState( t.m_to ); - const Symbol symb = Symbol( t.m_regexpSymbol->getSymbol( ) ); + m_fsm.addTransition( TransitionFSM( builder.getState( t.m_from ), Symbol( t.m_regexpSymbol.getSymbol( ) ), builder.getState( t.m_to ) ) ); - m_fsm.addTransition( from, symb, to ); - } + m_fsm.addInitialState( builder.getState( m_re ) ); - */ + for( const auto & r : Q ) + if( RegExpUtils::containsEpsilon( r ) ) + m_fsm.addFinalState( builder.getState( r ) ); return m_fsm; } // ---------------------------------------------------------------------------- -Brzozowski::BrzozowskiTransition::BrzozowskiTransition( const RegExp & from, const RegExpSymbol * symb, const RegExp & to ) +Brzozowski::BrzozowskiTransition::BrzozowskiTransition( const RegExp & from, const RegExpSymbol & symb, const RegExp & to ) : m_from( from ), m_to( to ), m_regexpSymbol( symb ) { } // ---------------------------------------------------------------------------- -/* Brzozowski::StateBuilder::StateBuilder( void ) { m_stateId = 0; } -const State & Brzozowski::StateBuilder::constructState( const RegExp & re ) +const State & Brzozowski::StateBuilder::getState( const RegExp & re ) { if( ! isKeyInMap( re, m_states ) ) - m_states[ re ] = State( createNewName( ) ); + m_states[ re ] = new State( createNewName( ) ); - return m_states[ re ]; + return * m_states[ re ]; } -const string Brzozowski::StateBuilder::createNewName( void ) +string Brzozowski::StateBuilder::createNewName( void ) { // http://en.wikipedia.org/wiki/Hexavigesimal @@ -124,13 +126,11 @@ const string Brzozowski::StateBuilder::createNewName( void ) do { unsigned int remainder = n % 26; - name.insert( 0, ( char )( remainder + 'A' ), 1 ); + name += ( char )( remainder + 'A' ); n = (n - remainder) / 26; } while (n > 0); - return name; + return string( name.rbegin( ), name.rend( ) ); } -*/ - } /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/Brzozowski.h b/aconversions/src/conversions/re2fa/Brzozowski.h index 7e36f8f585..2a07024236 100644 --- a/aconversions/src/conversions/re2fa/Brzozowski.h +++ b/aconversions/src/conversions/re2fa/Brzozowski.h @@ -12,10 +12,12 @@ #include <map> #include <set> +#include <automaton/State.h> + #include "AbstractREtoFAConverter.h" +#include "RegExpComparator.h" #include "../../derivatives/BrzozowskiDerivative.h" #include "../../utils/RegExpUtils.h" -#include "../../utils/AutomatonUtils.h" namespace conversions { @@ -30,27 +32,25 @@ public: const automaton::FSM convert( void ); private: - /* class StateBuilder { public: StateBuilder( void ); - const automaton::State & constructState( const regexp::RegExp & re ); + const automaton::State & getState( const regexp::RegExp & re ); private: - const std::string createNewName( void ); + std::string createNewName( void ); - std::map<const regexp::RegExp, automaton::State> m_states; + std::map<const regexp::RegExp, automaton::State*, RegExpComparator> m_states; unsigned int m_stateId; }; - */ struct BrzozowskiTransition { const regexp::RegExp & m_from, & m_to; - const regexp::RegExpSymbol * m_regexpSymbol; + const regexp::RegExpSymbol & m_regexpSymbol; - BrzozowskiTransition( const regexp::RegExp & from, const regexp::RegExpSymbol * symb, const regexp::RegExp & to ); + BrzozowskiTransition( const regexp::RegExp & from, const regexp::RegExpSymbol & symb, const regexp::RegExp & to ); }; std::list<BrzozowskiTransition> m_transitions; diff --git a/aconversions/src/conversions/re2fa/Makefile b/aconversions/src/conversions/re2fa/Makefile index a9def9363d..7d9ba70052 100644 --- a/aconversions/src/conversions/re2fa/Makefile +++ b/aconversions/src/conversions/re2fa/Makefile @@ -6,7 +6,7 @@ all: are2fa.glushkov are2fa.brzozowski are2fa.thompson are2fa.glushkov: are2fa.glushkov.o Glushkov.o AbstractREtoFAConverter.o RegExpUtils.o ConversionException.o $(LD) $(LDFLAGS) $^ -o $@ -are2fa.brzozowski: are2fa.brzozowski.o Brzozowski.o AbstractREtoFAConverter.o BrzozowskiDerivative.o ConversionException.o RegExpNormalize.o RegExpUtils.o +are2fa.brzozowski: are2fa.brzozowski.o Brzozowski.o AbstractREtoFAConverter.o BrzozowskiDerivative.o ConversionException.o RegExpComparator.o RegExpNormalize.o RegExpUtils.o $(LD) $(LDFLAGS) $^ -o $@ are2fa.thompson: are2fa.thompson.o Thompson.o AbstractREtoFAConverter.o AutomatonUtils.o ConversionException.o RegExpUtils.o @@ -26,7 +26,10 @@ are2fa.thompson.o: are2fa.thompson.cpp Thompson.h AbstractREtoFAConverter.h AbstractREtoFAConverter.o: AbstractREtoFAConverter.cpp AbstractREtoFAConverter.h $(CXX) $(CXXFLAGS) $< -o $@ -Brzozowski.o: Brzozowski.cpp Brzozowski.h AbstractREtoFAConverter.h +Brzozowski.o: Brzozowski.cpp Brzozowski.h AbstractREtoFAConverter.h RegExpComparator.h + $(CXX) $(CXXFLAGS) $< -o $@ + +RegExpComparator.o: RegExpComparator.cpp RegExpComparator.h $(CXX) $(CXXFLAGS) $< -o $@ Glushkov.o: Glushkov.cpp Glushkov.h AbstractREtoFAConverter.h diff --git a/aconversions/src/conversions/re2fa/RegExpComparator.cpp b/aconversions/src/conversions/re2fa/RegExpComparator.cpp new file mode 100644 index 0000000000..0f2313bbdb --- /dev/null +++ b/aconversions/src/conversions/re2fa/RegExpComparator.cpp @@ -0,0 +1,157 @@ +/* + * RegExpComparator.cpp + * + * Created on: 5. 2. 2014 + * Author: tomas + */ + +#include "RegExpComparator.h" +#include <typeinfo> +#include <iostream> +using namespace regexp; + +namespace conversions +{ + +RegExpComparator::RegExpComparator( void ) +{ + +} + +bool RegExpComparator::operator() ( const RegExp & lhs, const RegExp & rhs ) //const +{ + RegExpElement * leftRoot = const_cast<RegExp&>( lhs ).getRegExp( ); + RegExpElement * rightRoot = const_cast<RegExp&>( rhs ).getRegExp( ); + + if( ! leftRoot ){ + std::cout << "NOLEFT" << std::endl; + const_cast<RegExp&>(lhs).toXML( cout ); + } + if( ! rightRoot ){ + std::cout << "NORIGHT" << std::endl; + const_cast<RegExp&>(rhs).toXML( cout ); + } + + return compare( leftRoot, rightRoot ) == -1; +} + +int RegExpComparator::compare( RegExpElement * lhs, RegExpElement * rhs ) const +{ + Alternation* lhsAlt = dynamic_cast<Alternation*>( lhs ), *rhsAlt = dynamic_cast<Alternation*>( rhs ); + Concatenation* lhsConcat = dynamic_cast<Concatenation*>( lhs ), *rhsConcat = dynamic_cast<Concatenation*>( rhs ); + Iteration* lhsIter = dynamic_cast<Iteration*>( lhs ), *rhsIter = dynamic_cast<Iteration*>( rhs ); + RegExpSymbol* lhsSymb = dynamic_cast<RegExpSymbol*>( lhs ), *rhsSymb = dynamic_cast<RegExpSymbol*>( rhs ); + RegExpEmpty* lhsEmpty = dynamic_cast<RegExpEmpty*>( lhs ), *rhsEmpty = dynamic_cast<RegExpEmpty*>( rhs ); + RegExpEpsilon* lhsEps = dynamic_cast<RegExpEpsilon*>( lhs ), *rhsEps = dynamic_cast<RegExpEpsilon*>( rhs ); + + if( ( lhsAlt && rhsAlt ) || ( lhsConcat && rhsConcat ) || ( lhsIter && rhsIter ) || ( lhsSymb && rhsSymb ) || + ( lhsEmpty && rhsEmpty ) || ( lhsEps && rhsEps ) ) + { + if( lhsAlt ) + return compare( lhsAlt, rhsAlt ); + if( lhsConcat ) + return compare( lhsConcat, rhsConcat ); + if( lhsIter ) + return compare( lhsIter, rhsIter ); + if( lhsSymb ) + return compare( lhsSymb, rhsSymb ); + if( lhsEmpty ) + return compare( lhsEmpty, rhsEmpty ); + if( lhsEps ) + return compare( lhsEps, rhsEps ); + } + else + { + int leftOrder = 0, rightOrder = 0; + + if( lhsAlt ) leftOrder = 1; + if( lhsConcat ) leftOrder = 2; + if( lhsIter ) leftOrder = 3; + if( lhsSymb ) leftOrder = 4; + if( lhsEmpty ) leftOrder = 5; + if( lhsEps ) leftOrder = 6; + + if( rhsAlt ) rightOrder = 1; + if( rhsConcat ) rightOrder = 2; + if( rhsIter ) rightOrder = 3; + if( rhsSymb ) rightOrder = 4; + if( rhsEmpty ) rightOrder = 5; + if( rhsEps ) rightOrder = 6; + + if ( leftOrder < rightOrder ) return -1; + if ( leftOrder > rightOrder ) return 1; + return 0; + } + + throw ConversionException( "such exception, many errors" ); +} + +int RegExpComparator::compare( Alternation * lhs, Alternation * rhs ) const +{ + auto lhsEnd = lhs->getElements( ).end( ); + auto rhsEnd = rhs->getElements( ).end( ); + + for( auto lhsIt = lhs->getElements( ).begin( ), rhsIt = rhs->getElements( ).begin( ); ; lhsIt ++, rhsIt ++ ) + { + if( lhsIt == lhsEnd && rhsIt != rhsEnd ) + return -1; + if( lhsIt != lhsEnd && rhsIt == rhsEnd ) + return 1; + if( lhsIt == lhsEnd && rhsIt == rhsEnd ) + return 0; + + int res = compare ( *lhsIt, *rhsIt ); + if( res != 0 ) + return res; + } + + return 0; +} + +int RegExpComparator::compare( Concatenation * lhs, Concatenation * rhs ) const +{ + auto lhsEnd = lhs->getElements( ).end( ); + auto rhsEnd = rhs->getElements( ).end( ); + + for( auto lhsIt = lhs->getElements( ).begin( ), rhsIt = rhs->getElements( ).begin( ); ; lhsIt ++, rhsIt ++ ) + { + if( lhsIt == lhsEnd && rhsIt != rhsEnd ) + return -1; + if( lhsIt != lhsEnd && rhsIt == rhsEnd ) + return 1; + if( lhsIt == lhsEnd && rhsIt == rhsEnd ) + return 0; + + int res = compare ( *lhsIt, *rhsIt ); + if( res != 0 ) + return res; + } + + return 0; +} + +int RegExpComparator::compare( Iteration * lhs, Iteration * rhs ) const +{ + return compare( lhs->getElement( ), rhs->getElement( ) ); +} + +int RegExpComparator::compare( RegExpSymbol * lhs, RegExpSymbol * rhs ) const +{ + if( lhs->getSymbol( ) < rhs->getSymbol( ) ) + return -1; + if( lhs->getSymbol( ) > rhs->getSymbol( ) ) + return 1; + return 0; +} + +int RegExpComparator::compare( RegExpEmpty * lhs, RegExpEmpty * rhs ) const +{ + return 0; +} + +int RegExpComparator::compare( RegExpEpsilon * lhs, RegExpEpsilon * rhs ) const +{ + return 0; +} + +} /* namespace conversions */ diff --git a/aconversions/src/conversions/re2fa/RegExpComparator.h b/aconversions/src/conversions/re2fa/RegExpComparator.h new file mode 100644 index 0000000000..dc267d5293 --- /dev/null +++ b/aconversions/src/conversions/re2fa/RegExpComparator.h @@ -0,0 +1,46 @@ +/* + * RegExpComparator.h + * + * Created on: 5. 2. 2014 + * Author: tomas + */ + +#ifndef REGEXPCOMPARATOR_H_ +#define REGEXPCOMPARATOR_H_ + +#include <map> +#include <string> + +#include <regexp/RegExp.h> +#include <regexp/RegExpElement.h> +#include <regexp/Alternation.h> +#include <regexp/Concatenation.h> +#include <regexp/Iteration.h> +#include <regexp/RegExpSymbol.h> +#include <regexp/RegExpEmpty.h> +#include <regexp/RegExpEpsilon.h> + +#include "../../utils/ConversionException.h" + +namespace conversions +{ + +class RegExpComparator +{ +public: + RegExpComparator( void ); + bool operator() ( const regexp::RegExp & lhs, const regexp::RegExp & rhs ); //const; + +private: + int compare( regexp::RegExpElement * lhs, regexp::RegExpElement * rhs ) const; + int compare( regexp::Alternation * lhs, regexp::Alternation * rhs ) const; + int compare( regexp::Concatenation * lhs, regexp::Concatenation * rhs ) const; + int compare( regexp::Iteration * lhs, regexp::Iteration * rhs ) const; + int compare( regexp::RegExpSymbol * lhs, regexp::RegExpSymbol * rhs ) const; + int compare( regexp::RegExpEmpty * lhs, regexp::RegExpEmpty * rhs ) const; + int compare( regexp::RegExpEpsilon * lhs, regexp::RegExpEpsilon * rhs ) const; +}; + +} /* namespace conversions */ + +#endif /* REGEXPCOMPARATOR_H_ */ diff --git a/aconversions/src/conversions/re2fa/are2fa.brzozowski.cpp b/aconversions/src/conversions/re2fa/are2fa.brzozowski.cpp index 63efce4a2e..e23912e899 100644 --- a/aconversions/src/conversions/re2fa/are2fa.brzozowski.cpp +++ b/aconversions/src/conversions/re2fa/are2fa.brzozowski.cpp @@ -25,7 +25,7 @@ int main(int argc, char** argv) RegExp re = RegExpParser::parse(tokens); Brzozowski conv( re ); - conv.convert( );//.toXML( cout ); + conv.convert( ).toXML( cout ); } catch( AlibException & e ) { diff --git a/aconversions/src/derivatives/BrzozowskiDerivative.cpp b/aconversions/src/derivatives/BrzozowskiDerivative.cpp index 6bf15d2e57..c74652bb99 100644 --- a/aconversions/src/derivatives/BrzozowskiDerivative.cpp +++ b/aconversions/src/derivatives/BrzozowskiDerivative.cpp @@ -17,12 +17,19 @@ namespace conversions BrzozowskiDerivative::BrzozowskiDerivative( const RegExp & re ) : m_re( re ) { //FIXME in alib! + RegExpNormalize norm; m_regexpRoot = const_cast<RegExp&>( m_re ).getRegExp( ); + + m_reNorm = norm.normalize( m_regexpRoot ); + m_regexpRoot = m_reNorm.getRegExp( ); + + // const_cast<RegExp&>(m_reNorm).toXML( cout ); } RegExp BrzozowskiDerivative::derivative ( const list<RegExpElement*> & dString ) const { RegExpElement * expression = m_regexpRoot; + for( const auto & dSymbol : dString ) // dV/d(ab) = d( dV/da )/db { // FIXME: memory leak diff --git a/aconversions/src/derivatives/BrzozowskiDerivative.h b/aconversions/src/derivatives/BrzozowskiDerivative.h index d9cc458e47..135bdf4aff 100644 --- a/aconversions/src/derivatives/BrzozowskiDerivative.h +++ b/aconversions/src/derivatives/BrzozowskiDerivative.h @@ -48,6 +48,7 @@ private: regexp::RegExpElement * derivative( regexp::RegExpEmpty * element, const regexp::RegExpSymbol & dSymbol ) const; const regexp::RegExp & m_re; + regexp::RegExp m_reNorm; regexp::RegExpElement* m_regexpRoot; }; diff --git a/aconversions/src/utils/RegExpUtils.cpp b/aconversions/src/utils/RegExpUtils.cpp index 86b444a438..431ba67ba8 100644 --- a/aconversions/src/utils/RegExpUtils.cpp +++ b/aconversions/src/utils/RegExpUtils.cpp @@ -11,7 +11,7 @@ namespace conversions bool RegExpUtils::isRegExpEmpty( const RegExp & re ) { - return const_cast<RegExp&>( re ).getRegExp( ) == NULL; + return dynamic_cast<RegExpEmpty*>( const_cast<RegExp&>( re ).getRegExp( ) ) != NULL; } set<RegExpSymbol> RegExpUtils::getRegExpAlphabet( const RegExp & re ) -- GitLab