Skip to content
Snippets Groups Projects
Commit fe65fdb2 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

improve ToCNF algo

parent e85826a3
No related branches found
No related tags found
No related merge requests found
......@@ -15,120 +15,124 @@
 
#include <grammar/RawRules.h>
#include <grammar/AddRawRule.h>
#include <object/ObjectFactory.h>
 
namespace grammar {
 
namespace simplify {
 
template<class T>
ext::pair<DefaultSymbolType, DefaultSymbolType> splitToPairs(T& grammar, const ext::vector<DefaultSymbolType>& rhs, unsigned from, unsigned size, ext::map<DefaultSymbolType, DefaultSymbolType>& createdSymbols) {
if(size == 2) {
return ext::make_pair(rhs[from], rhs[from + 1]);
} else if(size == 3) {
DefaultSymbolType firstLhs {rhs[from]};
auto second = splitToPairs(grammar, rhs, from + 1, 2, createdSymbols);
DefaultSymbolType secondProposal{DefaultSymbolsPairType(second)};
if(!createdSymbols.count(secondProposal)) {
createdSymbols.insert(std::make_pair(secondProposal, common::createUnique(secondProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet())));
void splitRule ( const DefaultSymbolType & lhs, ext::vector < DefaultSymbolType > rhs, grammar::CNF < DefaultSymbolType > & result ) { //After templating the CNF make rhs const reference
switch ( rhs.size ( ) ) {
case 2: {
result.addRule ( lhs, ext::make_pair ( std::move ( rhs [ 0 ] ), std::move ( rhs [ 1 ] ) ) );
break;
}
grammar.addNonterminalSymbol(createdSymbols.find(secondProposal)->second);
grammar::AddRawRule::addRawRule(grammar,createdSymbols.find(secondProposal)->second, {std::move(second.first), std::move(second.second)});
return ext::make_pair(std::move(firstLhs), createdSymbols.find(secondProposal)->second);
} else {
auto first = splitToPairs(grammar, rhs, from, size / 2, createdSymbols);
DefaultSymbolType firstProposal{DefaultSymbolsPairType(first)};
if(!createdSymbols.count(firstProposal)) {
createdSymbols.insert(std::make_pair(firstProposal, common::createUnique(firstProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet())));
case 3: {
ext::vector < DefaultSymbolType > right ( std::make_move_iterator ( std::next ( rhs.begin ( ) ) ), std::make_move_iterator ( rhs.end ( ) ) );
DefaultSymbolType second = object::ObjectFactory::make ( right );
if ( result.addNonterminalSymbol ( second ) )
splitRule ( second, std::move ( right ), result );
result.addRule ( lhs, ext::make_pair ( std::move ( rhs [ 0 ] ), std::move ( second ) ) );
break;
}
grammar.addNonterminalSymbol(createdSymbols.find(firstProposal)->second);
grammar::AddRawRule::addRawRule ( grammar, createdSymbols.find(firstProposal)->second, {std::move(first.first), std::move(first.second)});
default: {
ext::vector < DefaultSymbolType > left;
for ( unsigned i = 0; i < rhs.size ( ) / 2; ++ i )
left.push_back ( std::move ( rhs [ i ] ) );
 
auto second = splitToPairs(grammar, rhs, from + size / 2, size - size / 2, createdSymbols);
DefaultSymbolType secondProposal{DefaultSymbolsPairType(second)};
if(!createdSymbols.count(secondProposal)) {
createdSymbols.insert(std::make_pair(secondProposal, common::createUnique(secondProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet())));
}
grammar.addNonterminalSymbol(createdSymbols.find(secondProposal)->second);
grammar::AddRawRule::addRawRule(grammar,createdSymbols.find(secondProposal)->second, {std::move(second.first), std::move(second.second)});
DefaultSymbolType first = object::ObjectFactory::make ( left );
if ( result.addNonterminalSymbol ( first ) )
splitRule ( first, std::move ( left ), result );
ext::vector < DefaultSymbolType > right;
for ( unsigned i = rhs.size ( ) / 2; i < rhs.size ( ); ++ i )
right.push_back ( std::move ( rhs [ i ] ) );
 
return ext::make_pair(createdSymbols.find(firstProposal)->second, createdSymbols.find(secondProposal)->second);
DefaultSymbolType second = object::ObjectFactory::make ( right );
if ( result.addNonterminalSymbol ( second ) )
splitRule ( second, std::move ( right ), result );
result.addRule ( lhs, ext::make_pair ( std::move ( first ), std::move ( second ) ) );
}
}
}
 
template<class T>
grammar::CNF < > convertInternal( const T & origGrammar ) {
T grammarTmp(origGrammar.getInitialSymbol());
grammarTmp.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet() );
grammarTmp.setTerminalAlphabet( origGrammar.getTerminalAlphabet() );
ext::map<DefaultSymbolType, DefaultSymbolType> createdSymbols;
auto origRules = grammar::RawRules::getRawRules(origGrammar);
for( const auto & origRule : origRules ) {
for( const auto& origRhs : origRule.second ) {
if(origRhs.size() == 1 || origRhs.size() == 2)
grammar::AddRawRule::addRawRule(grammarTmp,origRule.first, origRhs);
else if(origRhs.size() > 2) {
auto second = splitToPairs(grammarTmp, origRhs, 0, origRhs.size(), createdSymbols);
grammar::AddRawRule::addRawRule(grammarTmp,origRule.first, {std::move(second.first), std::move(second.second)});
}
}
}
grammarTmp.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon());
grammar::CNF < > result ( origGrammar.getInitialSymbol ( ) );
 
grammar::CNF < > grammar(grammarTmp.getInitialSymbol());
result.setNonterminalAlphabet ( origGrammar.getNonterminalAlphabet ( ) );
result.setTerminalAlphabet ( origGrammar.getTerminalAlphabet ( ) );
 
grammar.setNonterminalAlphabet( grammarTmp.getNonterminalAlphabet() );
grammar.setTerminalAlphabet( grammarTmp.getTerminalAlphabet() );
ext::map<DefaultSymbolType, DefaultSymbolType> terminalToShadowNonterminal;
for( const auto & symbol : grammarTmp.getTerminalAlphabet() ) {
DefaultSymbolType shadowSymbol = common::createUnique(symbol, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet());
terminalToShadowNonterminal.insert( std::make_pair( symbol, shadowSymbol ));
grammar.addNonterminalSymbol( shadowSymbol );
grammar.addRule(std::move(shadowSymbol), symbol);
ext::map < DefaultSymbolType, DefaultSymbolType > terminalToShadowNonterminal;
for ( const DefaultSymbolType & symbol : origGrammar.getTerminalAlphabet ( ) ) {
DefaultSymbolType shadowSymbol = common::createUnique ( symbol, result.getTerminalAlphabet ( ), result.getNonterminalAlphabet ( ) );
terminalToShadowNonterminal.insert ( std::make_pair ( symbol, shadowSymbol ) );
result.addNonterminalSymbol ( shadowSymbol );
result.addRule ( std::move ( shadowSymbol ), symbol );
}
 
auto tmpRules = grammar::RawRules::getRawRules(grammarTmp);
for( const auto & tmpRule : tmpRules ) {
for( const auto& tmpRhs : tmpRule.second ) {
if(tmpRhs.size() == 2) {
if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[0])) {
if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[1])) {
grammar::AddRawRule::addRawRule(grammar,tmpRule.first, tmpRhs);
} else {
grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {tmpRhs[0], terminalToShadowNonterminal.find(tmpRhs[1])->second});
}
} else {
if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[1])) {
grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {terminalToShadowNonterminal.find(tmpRhs[0])->second, tmpRhs[1]});
} else {
grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {terminalToShadowNonterminal.find(tmpRhs[0])->second, terminalToShadowNonterminal.find(tmpRhs[1])->second });
}
for ( const auto & rules : origGrammar.getRules ( ) ) {
for ( const ext::vector < DefaultSymbolType > & rhs : rules.second ) {
if ( rhs.size ( ) == 1 ) {
result.addRule ( rules.first, rhs [ 0 ] );
} else {
ext::vector < DefaultSymbolType > rawRule;
for ( const DefaultSymbolType & symbol : rhs ) {
if ( origGrammar.getTerminalAlphabet ( ).count ( symbol ) )
rawRule.push_back ( terminalToShadowNonterminal.at ( symbol ) );
else
rawRule.push_back ( symbol );
}
} else // tmpRhs.size() == 1
grammar::AddRawRule::addRawRule(grammar,tmpRule.first, tmpRhs);
splitRule ( rules.first, std::move ( rawRule ), result );
}
}
}
 
grammar.setGeneratesEpsilon(grammarTmp.getGeneratesEpsilon());
result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) );
 
return grammar;
return result;
}
 
grammar::CNF < > ToCNF::convert(const grammar::CFG < > & origGrammar) {
return convertInternal(grammar::simplify::SimpleRulesRemover::remove(grammar::simplify::EpsilonRemover::remove(origGrammar)));
return convert(grammar::simplify::EpsilonRemover::remove(origGrammar));
}
 
grammar::CNF < > ToCNF::convert(const grammar::EpsilonFreeCFG < > & origGrammar) {
return convertInternal(grammar::simplify::SimpleRulesRemover::remove(origGrammar));
}
 
grammar::CNF < > ToCNF::convert(const grammar::GNF < > & origGrammar) {
return convertInternal(origGrammar);
grammar::CNF < > ToCNF::convert ( const grammar::GNF < > & origGrammar ) {
grammar::CNF < > result ( origGrammar.getInitialSymbol ( ) );
result.setNonterminalAlphabet ( origGrammar.getNonterminalAlphabet ( ) );
result.setTerminalAlphabet ( origGrammar.getTerminalAlphabet ( ) );
ext::map < DefaultSymbolType, DefaultSymbolType > terminalToShadowNonterminal;
for ( const DefaultSymbolType & symbol : origGrammar.getTerminalAlphabet ( ) ) {
DefaultSymbolType shadowSymbol = common::createUnique ( symbol, result.getTerminalAlphabet ( ), result.getNonterminalAlphabet ( ) );
terminalToShadowNonterminal.insert ( std::make_pair ( symbol, shadowSymbol ) );
result.addNonterminalSymbol ( shadowSymbol );
result.addRule ( std::move ( shadowSymbol ), symbol );
}
for ( const auto & rules : origGrammar.getRules ( ) ) {
for ( const ext::pair < DefaultSymbolType, ext::vector < DefaultSymbolType > > & rhs : rules.second ) {
if ( rhs.second.size ( ) == 0 )
result.addRule ( rules.first, rhs.first );
else {
ext::vector < DefaultSymbolType > rawRule { terminalToShadowNonterminal.at ( rhs.first ) };
rawRule.insert ( rawRule.end ( ), rhs.second.begin ( ), rhs.second.end ( ) );
splitRule ( rules.first, std::move ( rawRule ), result );
}
}
}
result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) );
return result;
}
 
grammar::CNF < > ToCNF::convert(const grammar::LG < > & origGrammar) {
......
......@@ -79,6 +79,8 @@ grammar::CNF < SymbolType > ToCNF::convert ( const grammar::LeftRG < SymbolType
}
}
 
result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) );
return result;
}
 
......@@ -108,6 +110,8 @@ grammar::CNF < SymbolType > ToCNF::convert ( const grammar::RightRG < SymbolType
}
}
 
result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) );
return result;
}
 
......
......@@ -74,9 +74,9 @@ void GrammarToCNFTest::testToCNFRules2() {
DefaultSymbolType aP = DefaultSymbolType("a'");
DefaultSymbolType bP = DefaultSymbolType("b'");
DefaultSymbolType cP = DefaultSymbolType("c'");
DefaultSymbolType Xb = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(X, b))));
DefaultSymbolType aX = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(a, X))));
DefaultSymbolType bX = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(b, X))));
DefaultSymbolType Xb = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { X, bP } );
DefaultSymbolType aX = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { aP, X } );
DefaultSymbolType bX = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { bP, X } );
 
grammar::CNF < > grammar3(S);
grammar3.setNonterminalAlphabet({S, X, Y, aP, bP, cP, Xb, aX, bX});
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment