From eb00bf77f770eaee57b07ec40864700d1b03c5ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Sun, 7 Sep 2014 21:36:35 +0200 Subject: [PATCH] algo: re2rg: Brzozowski --- .../re2rg/re2rrg/BrzozowskiDerivationRRG.cpp | 134 ------------------ .../re2rg/re2rrg/BrzozowskiDerivationRRG.h | 58 -------- aconversions2/src/ConversionHandler.cpp | 11 +- .../re2rg/re2rrg/BrzozowskiDerivationRRG.cpp | 132 +++++++++++++++++ .../re2rg/re2rrg/BrzozowskiDerivationRRG.h | 42 ++++++ 5 files changed, 179 insertions(+), 198 deletions(-) delete mode 100644 aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp delete mode 100644 aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h create mode 100644 alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp create mode 100644 alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp deleted file mode 100644 index 246199f696..0000000000 --- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * BrzozowskiDerivationRRG.cpp - * - * Created on: 6. 3. 2014 - * Author: tomas - */ - -#include "BrzozowskiDerivationRRG.h" - -using namespace alib; -using namespace alphabet; -using namespace grammar; -using namespace regexp; - -namespace conversions -{ - -BrzozowskiDerivationRRG::BrzozowskiDerivationRRG( const RegExp & re ) : m_re( re ) -{ - -} - -BrzozowskiDerivationRRG::~BrzozowskiDerivationRRG( void ) -{ - -} - -RightRegularGrammar BrzozowskiDerivationRRG::convert( void ) -{ - RegExpOptimize opt; - - // 1. - RegExp V = opt.optimize( m_re ); - - set<Symbol> alphabet = m_re.getAlphabet( ); - - set<RegExp> N = { V }; - deque<set<RegExp>> Ni; - - Ni.push_back( set<RegExp>( ) ); - Ni.at( 0 ).insert( V ); - - int i = 1; - - // 2. - while( ! Ni.at( i - 1 ).empty( ) ) - { - Ni.push_back( set<RegExp>( ) ); // initialize set Q_i - - for( const auto & regexp : Ni.at( i - 1 ) ) - { - RegExpDerivation deriv( regexp ); - - for( const auto & a : alphabet ) - { - RegExp derived = deriv.derivation( a ); - derived = opt.optimize( derived ); - - if( ! isInSet( derived, N ) ) // if this state has already been found, do not add - Ni.at( i ).insert( derived ); - } - } - - N.insert( Ni.at( i ).begin( ), Ni.at( i ).end( ) ); - i += 1; - } - - // 3. - RightRegularGrammar grammar; - map<RegExp, Symbol> nonterminalMap; - int nonterminalId = 0; - - for( const auto & s : alphabet ) - grammar.addTerminalSymbol( s.getSymbol( ) ); - - for( const auto & r : N ) - { - Symbol nt = grammar.createUniqueNonTerminalSymbol( toBase26( nonterminalId ++ ) ); - nonterminalMap.insert( pair<RegExp, Symbol>( r, nt ) ); - } - - for( const auto & r : N ) - { - RegExpDerivation deriv( r ); - - for( const auto & a : alphabet ) - { - RegExp derived = deriv.derivation( a ); - derived = opt.optimize( derived ); - - list<Symbol> leftSide = { nonterminalMap.find( r )->second }; - list<Symbol> rightSide = { a, nonterminalMap.find( derived )->second }; - - Rule r( leftSide, rightSide ); - grammar.addRule( r ); - - if( derived.containsEmptyString( ) ) - { - list<Symbol> rightSide = { a }; - Rule r( leftSide, rightSide ); - grammar.addRule( r ); - } - } - } - - grammar.setStartSymbol( nonterminalMap.find( V )->second ); - - if( V.containsEmptyString( ) ) - { - list<Symbol> leftSide = { nonterminalMap.find( V )->second }; - list<Symbol> rightSide; - - if( grammar.isNonTerminalOnRightSideOfAnyRule( grammar.getStartSymbol( ) ) ) - { - Symbol newStart = grammar.createUniqueNonTerminalSymbol( grammar.getStartSymbol( ).getSymbol( ), false ); - - list<Symbol> leftSideNewStart = { newStart }; - for( const auto & rule : grammar.getRules( ) ) - if( rule.getLeftSide( ).front( ) == grammar.getStartSymbol( ) ) - grammar.addRule( Rule( leftSideNewStart, rule.getRightSide( ) ) ); - - grammar.setStartSymbol( newStart ); - grammar.addRule( Rule( leftSideNewStart, rightSide ) ); - } - else - { - grammar.addRule( Rule ( leftSide, rightSide ) ); - } - } - - return grammar; -} - -} /* namespace conversions */ diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h deleted file mode 100644 index 28be812b35..0000000000 --- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * BrzozowskiDerivationRRG.h - * - * Created on: 6. 3. 2014 - * Author: tomas - */ - -#ifndef BRZOZOWSKIDERIVATIONRRG_H_ -#define BRZOZOWSKIDERIVATIONRRG_H_ - -#include <deque> -#include <set> -#include <map> - -#include <alphabet/Symbol.h> -#include <grammar/Regular/RightRegularGrammar.h> -#include <regexp/RegExp.h> - -#include "../../include/macros.h" -#include "../../interface/IConversionRRG.h" -#include "../../shared/Hexavigesimal.h" - -#include "RegExpOptimize.h" -#include "RegExpDerivation.h" - -namespace conversions -{ - -/** - * Converts reg. expression to right regular grammar using brzozowski derivation algorithm. - * Source: Melichar 2.137 - */ -class BrzozowskiDerivationRRG : public IConversionRRG -{ -public: - /** - * @param re Source regular expression. - */ - BrzozowskiDerivationRRG( const regexp::RegExp & re ); - - ~BrzozowskiDerivationRRG( void ); - - /** - * Performs conversion. - * @return right regular grammar equivalent to source regexp. - */ - grammar::RightRegularGrammar convert( void ); - -protected: - /* - * input regexp - */ - const regexp::RegExp & m_re; -}; - -} /* namespace conversions */ - -#endif /* BRZOZOWSKIDERIVATIONRRG_H_ */ diff --git a/aconversions2/src/ConversionHandler.cpp b/aconversions2/src/ConversionHandler.cpp index 18673ede36..a3c7a957a7 100644 --- a/aconversions2/src/ConversionHandler.cpp +++ b/aconversions2/src/ConversionHandler.cpp @@ -24,7 +24,7 @@ //#include "conversions/rg2re/lrg2re/LRGAlgebraic.h" //#include "conversions/re2rg/re2rrg/GlushkovRRG.h" -//#include "conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h" +#include "conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h" #include "conversions/rg2rg/lrg2rrg/LeftToRightRegularGrammar.h" #include "conversions/rg2rg/rrg2lrg/RightToLeftRegularGrammar.h" @@ -208,14 +208,13 @@ void ConversionHandler::convertREtoRG( void ) void ConversionHandler::convertREtoRRG( void ) { - const regexp::UnboundedRegExp regexp = alib::DataFactory::fromTokens<regexp::UnboundedRegExp>( m_tokens ); + const regexp::RegExp regexp = alib::DataFactory::fromTokens<regexp::RegExp>(m_tokens); - switch( m_algorithm ) + switch(m_algorithm) { case BRZOZOWSKI_DERIVATION: { -/* re2rg::BrzozowskiDerivationRRG conv( regexp ); - grammar::RightRG rrg = conv.convert(); - alib::DataFactory::toStdout(rrg);*/ + re2rg::BrzozowskiDerivationRRG conv; + alib::DataFactory::toStdout(conv.convert(regexp)); break; } default: { diff --git a/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp b/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp new file mode 100644 index 0000000000..af145f5ca2 --- /dev/null +++ b/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp @@ -0,0 +1,132 @@ +/* + * BrzozowskiDerivationRRG.cpp + * + * Created on: 6. 3. 2014 + * Author: tomas + */ + +#include "BrzozowskiDerivationRRG.h" + +#include <set> +#include <deque> +#include <set> +#include <vector> + +#include <label/StringLabel.h> +#include <std/hexavigesimal.h> + +#include "../../../regexp/RegExpOptimize.h" +#include "../../../regexp/RegExpDerivation.h" + +namespace re2rg +{ + +BrzozowskiDerivationRRG::BrzozowskiDerivationRRG(void){} +BrzozowskiDerivationRRG::~BrzozowskiDerivationRRG(void){} + + +void BrzozowskiDerivationRRG::Visit(void* userData, const regexp::FormalRegExp& regexp) +{ + std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData; + out.first = regexp.getAlphabet(); + out.second = regexp.containsEmptyString(); +} +void BrzozowskiDerivationRRG::Visit(void* userData, const regexp::UnboundedRegExp& regexp) +{ + std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData; + out.first = regexp.getAlphabet(); + out.second = regexp.containsEmptyString(); +} + +grammar::RightRG BrzozowskiDerivationRRG::convert(const regexp::RegExp& regexp) +{ + // 1. + // regexp::RegExpOptimize opt; + // RegExp V = opt.optimize(regexp); + regexp::RegExp V = regexp; + + std::pair<std::set<alphabet::Symbol>, bool> out({}, false); + regexp.getData().Accept((void*) &out, *this); + const std::set<alphabet::Symbol>& alphabet = out.first; + + std::set<regexp::RegExp> N = { V }; + std::deque<std::set<regexp::RegExp>> Ni; + + Ni.push_back(std::set<regexp::RegExp>()); + Ni.at(0).insert(V); + + int i = 1; + + // 2. + while(! Ni.at(i - 1).empty()) + { + Ni.push_back(std::set<regexp::RegExp>()); // initialize set Q_i + + for(const auto & dregexp : Ni.at( i - 1 )) + { + regexp::RegExpDerivation deriv; + + for(const auto & a : alphabet) + { + string::LinearString string(std::vector<alphabet::Symbol>{a}); + regexp::RegExp derived = deriv.derivation(dregexp, string); + // derived = opt.optimize(derived); + + // this will also add \emptyset as a regexp (and as FA state) + if(N.count(derived) == 0) // if this state has already been found, do not add + Ni.at(i).insert(derived); + } + } + + N.insert(Ni.at(i).begin(), Ni.at(i).end()); + i += 1; + } + + // ------------------------------------------------------------------------ + // 3. + + int nonterminalId = 0; + std::map<regexp::RegExp, alphabet::Symbol> nonterminalMap; + + alphabet::Symbol ntV(alphabet::LabeledSymbol(label::Label(label::StringLabel(std::toBase26(nonterminalId++))))); + nonterminalMap.insert(std::make_pair(V, ntV)); + + grammar::RightRG grammar(ntV); + grammar.setTerminalAlphabet(alphabet); + + for(const auto & r : N) + { + if(V == r) continue; + + alphabet::Symbol nt = alphabet::createUniqueSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel(std::toBase26(nonterminalId++))))), grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet()); + nonterminalMap.insert(std::make_pair(r, nt)); + } + + for(const auto & r : N) + { + regexp::RegExpDerivation deriv; + + for(const auto & a : alphabet) + { + string::LinearString string(std::vector<alphabet::Symbol>{a}); + regexp::RegExp derived = deriv.derivation(r, string); + // derived = opt.optimize(derived); + + grammar.addRule(nonterminalMap.find(r)->second, std::make_pair(a, nonterminalMap.find(derived)->second)); + + derived.getData().Accept((void*) &out, *this); + if(out.second) // if(derived.containsEmptyString()) + grammar.addRule(nonterminalMap.find(r)->second, a); + } + } + + grammar.setInitialSymbol(nonterminalMap.find(V)->second); + + V.getData().Accept((void*) &out, *this); + if(out.second) // if(V.containsEmptyString()) + grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY! + + return grammar; +} + +} /* namespace re2rg */ diff --git a/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h b/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h new file mode 100644 index 0000000000..b61df215fa --- /dev/null +++ b/alib2algo/src/conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h @@ -0,0 +1,42 @@ +/* + * BrzozowskiDerivationRRG.h + * + * Created on: 6. 3. 2014 + * Author: tomas + */ + +#ifndef BRZOZOWSKIDERIVATIONRRG_H_ +#define BRZOZOWSKIDERIVATIONRRG_H_ + +#include <grammar/Regular/RightRG.h> +#include <regexp/RegExp.h> +#include <regexp/formal/FormalRegExp.h> +#include <regexp/unbounded/UnboundedRegExp.h> + +namespace re2rg +{ + +/** + * Converts reg. expression to right regular grammar using brzozowski derivation algorithm. + * Source: Melichar 2.137 + */ +class BrzozowskiDerivationRRG : public regexp::VisitableRegExpBase::visitor_type +{ +public: + BrzozowskiDerivationRRG(void); + ~BrzozowskiDerivationRRG(void); + + /** + * Performs conversion. + * @return right regular grammar equivalent to source regexp. + */ + grammar::RightRG convert(const regexp::RegExp& regexp); + +private: + void Visit(void*, const regexp::FormalRegExp& regexp); + void Visit(void*, const regexp::UnboundedRegExp& regexp); +}; + +} /* namespace re2rg */ + +#endif /* BRZOZOWSKIDERIVATIONRRG_H_ */ -- GitLab