Skip to content
Snippets Groups Projects
Commit eb00bf77 authored by Tomáš Pecka's avatar Tomáš Pecka
Browse files

algo: re2rg: Brzozowski

parent 2509b901
No related branches found
No related tags found
No related merge requests found
/*
* BrzozowskiDerivationRRG.cpp
*
* Created on: 6. 3. 2014
* Author: tomas
*/
#include "BrzozowskiDerivationRRG.h"
using namespace alib;
using namespace alphabet;
using namespace grammar;
using namespace regexp;
namespace conversions
{
BrzozowskiDerivationRRG::BrzozowskiDerivationRRG( const RegExp & re ) : m_re( re )
{
}
BrzozowskiDerivationRRG::~BrzozowskiDerivationRRG( void )
{
}
RightRegularGrammar BrzozowskiDerivationRRG::convert( void )
{
RegExpOptimize opt;
// 1.
RegExp V = opt.optimize( m_re );
set<Symbol> alphabet = m_re.getAlphabet( );
set<RegExp> N = { V };
deque<set<RegExp>> Ni;
Ni.push_back( set<RegExp>( ) );
Ni.at( 0 ).insert( V );
int i = 1;
// 2.
while( ! Ni.at( i - 1 ).empty( ) )
{
Ni.push_back( set<RegExp>( ) ); // initialize set Q_i
for( const auto & regexp : Ni.at( i - 1 ) )
{
RegExpDerivation deriv( regexp );
for( const auto & a : alphabet )
{
RegExp derived = deriv.derivation( a );
derived = opt.optimize( derived );
if( ! isInSet( derived, N ) ) // if this state has already been found, do not add
Ni.at( i ).insert( derived );
}
}
N.insert( Ni.at( i ).begin( ), Ni.at( i ).end( ) );
i += 1;
}
// 3.
RightRegularGrammar grammar;
map<RegExp, Symbol> nonterminalMap;
int nonterminalId = 0;
for( const auto & s : alphabet )
grammar.addTerminalSymbol( s.getSymbol( ) );
for( const auto & r : N )
{
Symbol nt = grammar.createUniqueNonTerminalSymbol( toBase26( nonterminalId ++ ) );
nonterminalMap.insert( pair<RegExp, Symbol>( r, nt ) );
}
for( const auto & r : N )
{
RegExpDerivation deriv( r );
for( const auto & a : alphabet )
{
RegExp derived = deriv.derivation( a );
derived = opt.optimize( derived );
list<Symbol> leftSide = { nonterminalMap.find( r )->second };
list<Symbol> rightSide = { a, nonterminalMap.find( derived )->second };
Rule r( leftSide, rightSide );
grammar.addRule( r );
if( derived.containsEmptyString( ) )
{
list<Symbol> rightSide = { a };
Rule r( leftSide, rightSide );
grammar.addRule( r );
}
}
}
grammar.setStartSymbol( nonterminalMap.find( V )->second );
if( V.containsEmptyString( ) )
{
list<Symbol> leftSide = { nonterminalMap.find( V )->second };
list<Symbol> rightSide;
if( grammar.isNonTerminalOnRightSideOfAnyRule( grammar.getStartSymbol( ) ) )
{
Symbol newStart = grammar.createUniqueNonTerminalSymbol( grammar.getStartSymbol( ).getSymbol( ), false );
list<Symbol> leftSideNewStart = { newStart };
for( const auto & rule : grammar.getRules( ) )
if( rule.getLeftSide( ).front( ) == grammar.getStartSymbol( ) )
grammar.addRule( Rule( leftSideNewStart, rule.getRightSide( ) ) );
grammar.setStartSymbol( newStart );
grammar.addRule( Rule( leftSideNewStart, rightSide ) );
}
else
{
grammar.addRule( Rule ( leftSide, rightSide ) );
}
}
return grammar;
}
} /* namespace conversions */
......@@ -24,7 +24,7 @@
//#include "conversions/rg2re/lrg2re/LRGAlgebraic.h"
 
//#include "conversions/re2rg/re2rrg/GlushkovRRG.h"
//#include "conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h"
#include "conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h"
 
#include "conversions/rg2rg/lrg2rrg/LeftToRightRegularGrammar.h"
#include "conversions/rg2rg/rrg2lrg/RightToLeftRegularGrammar.h"
......@@ -208,14 +208,13 @@ void ConversionHandler::convertREtoRG( void )
 
void ConversionHandler::convertREtoRRG( void )
{
const regexp::UnboundedRegExp regexp = alib::DataFactory::fromTokens<regexp::UnboundedRegExp>( m_tokens );
const regexp::RegExp regexp = alib::DataFactory::fromTokens<regexp::RegExp>(m_tokens);
 
switch( m_algorithm )
switch(m_algorithm)
{
case BRZOZOWSKI_DERIVATION: {
/* re2rg::BrzozowskiDerivationRRG conv( regexp );
grammar::RightRG rrg = conv.convert();
alib::DataFactory::toStdout(rrg);*/
re2rg::BrzozowskiDerivationRRG conv;
alib::DataFactory::toStdout(conv.convert(regexp));
break;
}
default: {
......
/*
* BrzozowskiDerivationRRG.cpp
*
* Created on: 6. 3. 2014
* Author: tomas
*/
#include "BrzozowskiDerivationRRG.h"
#include <set>
#include <deque>
#include <set>
#include <vector>
#include <label/StringLabel.h>
#include <std/hexavigesimal.h>
#include "../../../regexp/RegExpOptimize.h"
#include "../../../regexp/RegExpDerivation.h"
namespace re2rg
{
BrzozowskiDerivationRRG::BrzozowskiDerivationRRG(void){}
BrzozowskiDerivationRRG::~BrzozowskiDerivationRRG(void){}
void BrzozowskiDerivationRRG::Visit(void* userData, const regexp::FormalRegExp& regexp)
{
std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData;
out.first = regexp.getAlphabet();
out.second = regexp.containsEmptyString();
}
void BrzozowskiDerivationRRG::Visit(void* userData, const regexp::UnboundedRegExp& regexp)
{
std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData;
out.first = regexp.getAlphabet();
out.second = regexp.containsEmptyString();
}
grammar::RightRG BrzozowskiDerivationRRG::convert(const regexp::RegExp& regexp)
{
// 1.
// regexp::RegExpOptimize opt;
// RegExp V = opt.optimize(regexp);
regexp::RegExp V = regexp;
std::pair<std::set<alphabet::Symbol>, bool> out({}, false);
regexp.getData().Accept((void*) &out, *this);
const std::set<alphabet::Symbol>& alphabet = out.first;
std::set<regexp::RegExp> N = { V };
std::deque<std::set<regexp::RegExp>> Ni;
Ni.push_back(std::set<regexp::RegExp>());
Ni.at(0).insert(V);
int i = 1;
// 2.
while(! Ni.at(i - 1).empty())
{
Ni.push_back(std::set<regexp::RegExp>()); // initialize set Q_i
for(const auto & dregexp : Ni.at( i - 1 ))
{
regexp::RegExpDerivation deriv;
for(const auto & a : alphabet)
{
string::LinearString string(std::vector<alphabet::Symbol>{a});
regexp::RegExp derived = deriv.derivation(dregexp, string);
// derived = opt.optimize(derived);
// this will also add \emptyset as a regexp (and as FA state)
if(N.count(derived) == 0) // if this state has already been found, do not add
Ni.at(i).insert(derived);
}
}
N.insert(Ni.at(i).begin(), Ni.at(i).end());
i += 1;
}
// ------------------------------------------------------------------------
// 3.
int nonterminalId = 0;
std::map<regexp::RegExp, alphabet::Symbol> nonterminalMap;
alphabet::Symbol ntV(alphabet::LabeledSymbol(label::Label(label::StringLabel(std::toBase26(nonterminalId++)))));
nonterminalMap.insert(std::make_pair(V, ntV));
grammar::RightRG grammar(ntV);
grammar.setTerminalAlphabet(alphabet);
for(const auto & r : N)
{
if(V == r) continue;
alphabet::Symbol nt = alphabet::createUniqueSymbol(alphabet::Symbol(alphabet::LabeledSymbol(label::Label(label::StringLabel(std::toBase26(nonterminalId++))))), grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet());
nonterminalMap.insert(std::make_pair(r, nt));
}
for(const auto & r : N)
{
regexp::RegExpDerivation deriv;
for(const auto & a : alphabet)
{
string::LinearString string(std::vector<alphabet::Symbol>{a});
regexp::RegExp derived = deriv.derivation(r, string);
// derived = opt.optimize(derived);
grammar.addRule(nonterminalMap.find(r)->second, std::make_pair(a, nonterminalMap.find(derived)->second));
derived.getData().Accept((void*) &out, *this);
if(out.second) // if(derived.containsEmptyString())
grammar.addRule(nonterminalMap.find(r)->second, a);
}
}
grammar.setInitialSymbol(nonterminalMap.find(V)->second);
V.getData().Accept((void*) &out, *this);
if(out.second) // if(V.containsEmptyString())
grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY!
return grammar;
}
} /* namespace re2rg */
......@@ -8,51 +8,35 @@
#ifndef BRZOZOWSKIDERIVATIONRRG_H_
#define BRZOZOWSKIDERIVATIONRRG_H_
 
#include <deque>
#include <set>
#include <map>
#include <alphabet/Symbol.h>
#include <grammar/Regular/RightRegularGrammar.h>
#include <grammar/Regular/RightRG.h>
#include <regexp/RegExp.h>
#include <regexp/formal/FormalRegExp.h>
#include <regexp/unbounded/UnboundedRegExp.h>
 
#include "../../include/macros.h"
#include "../../interface/IConversionRRG.h"
#include "../../shared/Hexavigesimal.h"
#include "RegExpOptimize.h"
#include "RegExpDerivation.h"
namespace conversions
namespace re2rg
{
 
/**
* Converts reg. expression to right regular grammar using brzozowski derivation algorithm.
* Source: Melichar 2.137
*/
class BrzozowskiDerivationRRG : public IConversionRRG
class BrzozowskiDerivationRRG : public regexp::VisitableRegExpBase::visitor_type
{
public:
/**
* @param re Source regular expression.
*/
BrzozowskiDerivationRRG( const regexp::RegExp & re );
~BrzozowskiDerivationRRG( void );
BrzozowskiDerivationRRG(void);
~BrzozowskiDerivationRRG(void);
 
/**
* Performs conversion.
* @return right regular grammar equivalent to source regexp.
*/
grammar::RightRegularGrammar convert( void );
grammar::RightRG convert(const regexp::RegExp& regexp);
 
protected:
/*
* input regexp
*/
const regexp::RegExp & m_re;
private:
void Visit(void*, const regexp::FormalRegExp& regexp);
void Visit(void*, const regexp::UnboundedRegExp& regexp);
};
 
} /* namespace conversions */
} /* namespace re2rg */
 
#endif /* BRZOZOWSKIDERIVATIONRRG_H_ */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment