Skip to content
Snippets Groups Projects
Commit 15781e4c authored by Tomáš Pecka's avatar Tomáš Pecka
Browse files

algo: re2rrg: Glushkov

parent 9daecb34
No related branches found
No related tags found
No related merge requests found
/*
* GlushkovRRG.cpp
*
* Created on: 11. 1. 2014
* Author: tomas
*/
#include "GlushkovRRG.h"
using namespace alib;
using namespace alphabet;
using namespace grammar;
using namespace regexp;
using namespace std;
namespace conversions
{
GlushkovRRG::GlushkovRRG( const RegExp & re ) : m_re( re )
{
}
GlushkovRRG::~GlushkovRRG( void )
{
}
RightRegularGrammar GlushkovRRG::convert( void )
{
RightRegularGrammar grammar;
// step 1
for( auto const& symbol : m_re.getAlphabet( ) )
grammar.addTerminalSymbol( symbol.getSymbol( ) );
// steps 2, 3, 4
m_first = GlushkovTraversal::first( m_re );
m_last = GlushkovTraversal::last( m_re );
for( auto const& x : GlushkovTraversal::getSymbols( m_re ) )
for( auto const& f : GlushkovTraversal::follow( m_re, x ) )
m_pairs.insert( GlushkovPair( x, f ) );
// \e in q0 check is in step 7
// step 5
Symbol S = grammar.createUniqueNonTerminalSymbol( "S" );
grammar.setStartSymbol( S );
for( auto const& symbol : GlushkovTraversal::getSymbols( m_re ) )
{
Symbol a = grammar.createUniqueNonTerminalSymbol( symbol.getInputSymbol().getSymbol() + to_string( symbol.getId( ) ) );
m_symbolMap.insert( std::pair<GlushkovSymbol, Symbol>( symbol, a ) );
}
// step 6
for( auto const& symbol : m_first )
{
Symbol const& a = m_symbolMap.find( symbol )->second;
list<Symbol> leftSide = { S };
list<Symbol> rightSide = { symbol.getInputSymbol( ), a };
grammar.addRule( Rule( leftSide, rightSide ) );
}
for( auto const& pair : m_pairs )
{
Symbol const& a = m_symbolMap.find( pair.getFirst( ) )->second;
Symbol const& b = m_symbolMap.find( pair.getSecond( ) )->second;
list<Symbol> leftSide = { a };
list<Symbol> rightSide = { pair.getSecond( ).getInputSymbol( ), b };
grammar.addRule( Rule( leftSide, rightSide ) );
}
// step 7
for( auto const& symbol : m_last )
{
/*
* for all rules where ns.m_nonTerminal is on rightSide:
* add Rule: leftSide -> symbol.getSymbol( )
* unless it already exists
*/
Symbol const& a = m_symbolMap.find( symbol )->second;
for( auto const & rule : grammar.getRules( ) )
{
if( isInList( a, rule.getRightSide( ) ) )
{
list<Symbol> leftSide = rule.getLeftSide( );
list<Symbol> rightSide = { symbol.getInputSymbol( ) };
Rule r( leftSide, rightSide );
if( ! isInSet( r, grammar.getRules( ) ) )
grammar.addRule( r );
}
}
}
if( m_re.containsEmptyString( ) )
{
list<Symbol> leftSide = { S };
list<Symbol> rightSide = { };
grammar.addRule( Rule( leftSide, rightSide ) );
}
return grammar;
}
} /* namespace conversions */
......@@ -24,7 +24,7 @@
//#include "conversions/rg2re/rrg2re/RRGAlgebraic.h"
//#include "conversions/rg2re/lrg2re/LRGAlgebraic.h"
 
//#include "conversions/re2rg/re2rrg/GlushkovRRG.h"
#include "conversions/re2rg/re2rrg/GlushkovRRG.h"
#include "conversions/re2rg/re2rrg/BrzozowskiDerivationRRG.h"
 
#include "conversions/rg2rg/lrg2rrg/LeftToRightRegularGrammar.h"
......@@ -249,9 +249,7 @@ void ConversionHandler::convertREtoRRG( void )
break;
}
default: {
/* re2rg::GlushkoRRG conv( regexp );
grammar::RightRG rrg = conv.convert();
alib::DataFactory::toStdout(rrg);*/
alib::DataFactory::toStdout(re2rg::GlushkovRRG::convert(regexp));
break;
}
}
......
/*
* GlushkovRRG.cpp
*
* Created on: 11. 1. 2014
* Author: Tomas Pecka
*/
#include "GlushkovRRG.h"
#include <algorithm>
#include <alphabet/LabeledSymbol.h>
#include <label/CharacterLabel.h>
#include <label/IntegerLabel.h>
#include <label/LabelPairLabel.h>
#include <label/StringLabel.h>
#include "../../../regexp/GlushkovTraversal.h"
#include "../../../regexp/GlushkovPair.h"
#include "../../../regexp/GlushkovSymbol.h"
namespace re2rg
{
grammar::Grammar GlushkovRRG::convert(const regexp::RegExp& regexp)
{
grammar::Grammar* out = NULL;
regexp.getData().Accept((void*) &out, GlushkovRRG::GLUSHKOV_RRG);
grammar::Grammar res = std::move(*out);
delete out;
return res;
}
grammar::RightRG GlushkovRRG::convert(const regexp::UnboundedRegExp& regexp)
{
alphabet::Symbol S(alphabet::LabeledSymbol(label::Label(label::StringLabel("S"))));
grammar::RightRG grammar(S);
// step 1
grammar.setTerminalAlphabet(regexp.getAlphabet());
// steps 2, 3, 4
std::set<regexp::GlushkovPair> pairs;
const std::set<regexp::GlushkovSymbol> first = regexp::GlushkovTraversal::first(regexp);
const std::set<regexp::GlushkovSymbol> last = regexp::GlushkovTraversal::last(regexp);
for(const auto& x : regexp::GlushkovTraversal::getSymbols(regexp))
for(const auto& f : regexp::GlushkovTraversal::follow(regexp, x))
pairs.insert(regexp::GlushkovPair(x, f));
// \e in q0 check is in step 7
// step 5
std::map<regexp::GlushkovSymbol, alphabet::Symbol> symbolMap;
for(const auto& symbol : regexp::GlushkovTraversal::getSymbols(regexp))
{
alphabet::Symbol nt(alphabet::LabeledSymbol(label::Label(label::LabelPairLabel(std::make_pair(label::Label(label::StringLabel(symbol.getInputSymbol())), label::Label(label::IntegerLabel(symbol.getId())))))));
symbolMap.insert(std::make_pair(symbol, nt));
grammar.addNonterminalSymbol(nt);
}
// step 6
for(const auto& symbol : first)
grammar.addRule(S, std::make_pair(symbol.getInputSymbol(), symbolMap.find(symbol)->second));
for(const auto& pair : pairs)
{
const alphabet::Symbol& a = symbolMap.find(pair.getFirst())->second;
const alphabet::Symbol& b = symbolMap.find(pair.getSecond())->second;
grammar.addRule(a, std::make_pair(pair.getSecond().getInputSymbol(), b));
}
// step 7
for(const auto& symbol : last)
{
/*
* for all rules where ns.m_nonTerminal is on rightSide:
* add Rule: leftSide -> symbol.getSymbol( )
* unless it already exists
*/
const alphabet::Symbol& a = symbolMap.find(symbol)->second;
for(const auto& rule : grammar.getRawRules())
for(const auto& rhs : rule.second)
if(std::find(rhs.begin(), rhs.end(), a) != rhs.end())
grammar.addRule(rule.first, rhs.at(0));
}
if(regexp.containsEmptyString( ) )
grammar.setGeneratesEpsilon(true);
return grammar;
}
void GlushkovRRG::Visit(void* userData, const regexp::FormalRegExp& regexp) const
{
/*
grammar::Grammar* & out = *((grammar::Grammar**) userData);
out = new grammar::Grammar(this->convert(regexp));
*/
throw exception::AlibException("GlushkovRRG: Converting FormalRegExp NYI"); // TODO
}
void GlushkovRRG::Visit(void* userData, const regexp::UnboundedRegExp& regexp) const
{
grammar::Grammar* & out = *((grammar::Grammar**) userData);
out = new grammar::Grammar(this->convert(regexp));
}
const GlushkovRRG GlushkovRRG::GLUSHKOV_RRG;
} /* namespace re2rg */
......@@ -2,56 +2,46 @@
* GlushkovRRG.h
*
* Created on: 11. 1. 2014
* Author: tomas
* Author: Tomas Pecka
*/
 
#ifndef GLUSHKOVRRG_H_
#define GLUSHKOVRRG_H_
 
#include <map>
#include <grammar/Grammar.h>
#include <grammar/Regular/RightRG.h>
 
#include <grammar/Regular/RightRegularGrammar.h>
#include <regexp/RegExp.h>
#include <regexp/formal/FormalRegExp.h>
#include <regexp/unbounded/UnboundedRegExp.h>
 
#include "../../interface/IConversionRRG.h"
#include "../../shared/glushkov/GlushkovTraversal.h"
 
#include "../../shared/Hexavigesimal.h"
namespace conversions
namespace re2rg
{
 
/**
* Converts regular expression to right regular grammar using Glushkov algorithm.
* Source: None yet.
*/
class GlushkovRRG : public IConversionRRG
class GlushkovRRG : public regexp::VisitableRegExpBase::const_visitor_type
{
public:
/**
* @param re Source regular expresison.
*/
GlushkovRRG( const regexp::RegExp & re );
~GlushkovRRG( void );
/**
* Performs conversion.
* @return right regular grammar equivalent to source regexp.
*/
grammar::RightRegularGrammar convert( void );
static grammar::Grammar convert(const regexp::RegExp& regexp);
static grammar::RightRG convert(const regexp::FormalRegExp& regexp);
static grammar::RightRG convert(const regexp::UnboundedRegExp& regexp);
 
private:
std::map<GlushkovSymbol, alphabet::Symbol> m_symbolMap;
std::set<GlushkovSymbol> m_first, m_last;
std::set<GlushkovPair> m_pairs;
void Visit(void*, const regexp::FormalRegExp& regexp) const;
void Visit(void*, const regexp::UnboundedRegExp& regexp) const;
 
/*
* input regexp
*/
const regexp::RegExp & m_re;
static const GlushkovRRG GLUSHKOV_RRG;
};
 
} /* namespace conversions */
} /* namespace re2rg */
 
#endif /* GLUSHKOVRRG_H_ */
......@@ -174,9 +174,9 @@ runTest "./aconversions2 -t RE -a elimination | ./aconversions2 -t FA -a glushko
# FA -> RE -> RRG -> LRG -> FA
# covers: FA -> RE (Brz. algebraic, elimination), RE -> RRG ( Brz. derivation, Glushkov), RRG -> LRG, LRG -> FA
runTest "./aconversions2 -t RE -a algebraic | ./aconversions2 -t RRG -a brzozowski | ./aconversions2 -t LRG | ./aconversions2 -t FA"
#runTest "./aconversions2 -ts2-t RE -a algebraic | ./aconversions2 -t RRG -a glushkov | ./aconversions2 -t LRG | ./aconversions2 -t FA"
runTest "./aconversions2 -t RE -a algebraic | ./aconversions2 -t RRG -a glushkov | ./aconversions2 -t LRG | ./aconversions2 -t FA"
runTest "./aconversions2 -t RE -a elimination | ./aconversions2 -t RRG -a brzozowski | ./aconversions2 -t LRG | ./aconversions2 -t FA"
#runTest "./aconversions2 -ts2-t RE -a elimination | ./aconversions2 -t RRG -a glushkov | ./aconversions2 -t LRG | ./aconversions2 -t FA"
runTest "./aconversions2 -t RE -a elimination | ./aconversions2 -t RRG -a glushkov | ./aconversions2 -t LRG | ./aconversions2 -t FA"
 
# FA -> RRG -> RE -> FA
# covers: FA -> RRG, FA -> LRG, RRG -> RE, LRG -> RE, RE -> FA (Brz. derivation, Thompson, Glushkov)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment