Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
ToGrammarRightRGDerivation.h 3.13 KiB
/*
 * ToGrammarRightRGDerivation.h
 *
 *  Created on: 6. 3. 2014
 *	  Author: Tomas Pecka
 */

#ifndef TO_GRAMMAR_RIGHT_RG_DERIVATION_H_
#define TO_GRAMMAR_RIGHT_RG_DERIVATION_H_

#include <grammar/Regular/RightRG.h>
#include <regexp/RegExp.h>
#include <regexp/formal/FormalRegExp.h>
#include <regexp/unbounded/UnboundedRegExp.h>

#include <alib/set>
#include <alib/deque>
#include <alib/vector>
#include <alib/hexavigesimal>

#include <common/createUnique.hpp>

#include <regexp/simplify/RegExpOptimize.h>
#include <regexp/transform/RegExpDerivation.h>
#include <regexp/properties/RegExpEpsilon.h>

namespace regexp {

namespace convert {

/**
 * Converts regular expression to right regular grammar using Brzozowski's derivation algorithm.
 * Source: Melichar 2.137
 */
class ToGrammarRightRGDerivation {
public:
	/**
	 * Implements conversion of regular expressions to regular grammars usign Brzozowski's derivation algorithm.
	 *
	 * \tparam T the type of regular expression to convert
	 * \tparam SymbolType the type of symbols in the regular expression
	 *
	 * \param regexp the regexp to convert
	 *
	 * \return right regular grammar generating the language described by the original regular expression
	 */
	template  < class T, class SymbolType = typename regexp::SymbolTypeOfRegexp < T > >
	static grammar::RightRG < SymbolType, unsigned > convert ( const T & regexp );

};

template < class T, class SymbolType >
grammar::RightRG < SymbolType, unsigned > ToGrammarRightRGDerivation::convert ( const T & regexp ) {
	// 1.
	T V = regexp::simplify::RegExpOptimize::optimize(regexp);

	// 2., 3.
	unsigned nonterminalId = 0;
	ext::map < T, unsigned > nonterminalMap;

	unsigned ntV = common::createUnique ( nonterminalId ++, regexp.getAlphabet ( ) );
	nonterminalMap.insert ( std::make_pair ( V, ntV ) );

	grammar::RightRG < SymbolType, unsigned > grammar(ntV);
	grammar.setTerminalAlphabet ( regexp.getAlphabet ( ) );

	ext::deque < T > Ni;

	Ni.push_back ( V );
	while(! Ni.empty()) {
		T r = std::move ( Ni.back ( ) );
		Ni.pop_back ( );

		for(const auto & a : regexp.getAlphabet()) {
			T derived = regexp::RegExpDerivation::derivation(r, a);
			derived = regexp::simplify::RegExpOptimize::optimize(derived);

			// this will also add \emptyset as a regexp (and as FA state)
			if ( nonterminalMap.count(derived) == 0) { // if this state has already been found, do not add
				Ni.push_back(derived);
				unsigned nt = common::createUnique ( nonterminalId ++, grammar.getTerminalAlphabet ( ), grammar.getNonterminalAlphabet ( ) );
				grammar.addNonterminalSymbol ( nt );
				nonterminalMap.insert ( derived, nt );
			}

			if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(derived))
				grammar.addRule(nonterminalMap.at(r), a);

			grammar.addRule(nonterminalMap.at(r), ext::make_pair(a, nonterminalMap.at(derived)));
		}
	}

	if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(V))
		grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY!

	return grammar;
}

} /* namespace convert */

} /* namespace regexp */

#endif /* TO_GRAMMAR_RIGHT_RG_DERIVATION_H_ */