-
Jan Trávníček authoredJan Trávníček authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
ToGrammarRightRGDerivation.h 3.13 KiB
/*
* ToGrammarRightRGDerivation.h
*
* Created on: 6. 3. 2014
* Author: Tomas Pecka
*/
#ifndef TO_GRAMMAR_RIGHT_RG_DERIVATION_H_
#define TO_GRAMMAR_RIGHT_RG_DERIVATION_H_
#include <grammar/Regular/RightRG.h>
#include <regexp/RegExp.h>
#include <regexp/formal/FormalRegExp.h>
#include <regexp/unbounded/UnboundedRegExp.h>
#include <alib/set>
#include <alib/deque>
#include <alib/vector>
#include <alib/hexavigesimal>
#include <common/createUnique.hpp>
#include <regexp/simplify/RegExpOptimize.h>
#include <regexp/transform/RegExpDerivation.h>
#include <regexp/properties/RegExpEpsilon.h>
namespace regexp {
namespace convert {
/**
* Converts regular expression to right regular grammar using Brzozowski's derivation algorithm.
* Source: Melichar 2.137
*/
class ToGrammarRightRGDerivation {
public:
/**
* Implements conversion of regular expressions to regular grammars usign Brzozowski's derivation algorithm.
*
* \tparam T the type of regular expression to convert
* \tparam SymbolType the type of symbols in the regular expression
*
* \param regexp the regexp to convert
*
* \return right regular grammar generating the language described by the original regular expression
*/
template < class T, class SymbolType = typename regexp::SymbolTypeOfRegexp < T > >
static grammar::RightRG < SymbolType, unsigned > convert ( const T & regexp );
};
template < class T, class SymbolType >
grammar::RightRG < SymbolType, unsigned > ToGrammarRightRGDerivation::convert ( const T & regexp ) {
// 1.
T V = regexp::simplify::RegExpOptimize::optimize(regexp);
// 2., 3.
unsigned nonterminalId = 0;
ext::map < T, unsigned > nonterminalMap;
unsigned ntV = common::createUnique ( nonterminalId ++, regexp.getAlphabet ( ) );
nonterminalMap.insert ( std::make_pair ( V, ntV ) );
grammar::RightRG < SymbolType, unsigned > grammar(ntV);
grammar.setTerminalAlphabet ( regexp.getAlphabet ( ) );
ext::deque < T > Ni;
Ni.push_back ( V );
while(! Ni.empty()) {
T r = std::move ( Ni.back ( ) );
Ni.pop_back ( );
for(const auto & a : regexp.getAlphabet()) {
T derived = regexp::RegExpDerivation::derivation(r, a);
derived = regexp::simplify::RegExpOptimize::optimize(derived);
// this will also add \emptyset as a regexp (and as FA state)
if ( nonterminalMap.count(derived) == 0) { // if this state has already been found, do not add
Ni.push_back(derived);
unsigned nt = common::createUnique ( nonterminalId ++, grammar.getTerminalAlphabet ( ), grammar.getNonterminalAlphabet ( ) );
grammar.addNonterminalSymbol ( nt );
nonterminalMap.insert ( derived, nt );
}
if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(derived))
grammar.addRule(nonterminalMap.at(r), a);
grammar.addRule(nonterminalMap.at(r), ext::make_pair(a, nonterminalMap.at(derived)));
}
}
if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(V))
grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY!
return grammar;
}
} /* namespace convert */
} /* namespace regexp */
#endif /* TO_GRAMMAR_RIGHT_RG_DERIVATION_H_ */