From fe65fdb2c4cf77fe68e915b3fe590555e15519be Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Wed, 12 Sep 2018 21:39:53 +0200 Subject: [PATCH] improve ToCNF algo --- alib2algo/src/grammar/simplify/ToCNF.cpp | 168 +++++++++--------- alib2algo/src/grammar/simplify/ToCNF.h | 4 + .../grammar/simplify/GrammarToCNFTest.cpp | 6 +- 3 files changed, 93 insertions(+), 85 deletions(-) diff --git a/alib2algo/src/grammar/simplify/ToCNF.cpp b/alib2algo/src/grammar/simplify/ToCNF.cpp index 4c658b89f0..903a644e95 100644 --- a/alib2algo/src/grammar/simplify/ToCNF.cpp +++ b/alib2algo/src/grammar/simplify/ToCNF.cpp @@ -15,120 +15,124 @@ #include <grammar/RawRules.h> #include <grammar/AddRawRule.h> +#include <object/ObjectFactory.h> namespace grammar { namespace simplify { -template<class T> -ext::pair<DefaultSymbolType, DefaultSymbolType> splitToPairs(T& grammar, const ext::vector<DefaultSymbolType>& rhs, unsigned from, unsigned size, ext::map<DefaultSymbolType, DefaultSymbolType>& createdSymbols) { - if(size == 2) { - return ext::make_pair(rhs[from], rhs[from + 1]); - } else if(size == 3) { - DefaultSymbolType firstLhs {rhs[from]}; - - auto second = splitToPairs(grammar, rhs, from + 1, 2, createdSymbols); - DefaultSymbolType secondProposal{DefaultSymbolsPairType(second)}; - if(!createdSymbols.count(secondProposal)) { - createdSymbols.insert(std::make_pair(secondProposal, common::createUnique(secondProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet()))); +void splitRule ( const DefaultSymbolType & lhs, ext::vector < DefaultSymbolType > rhs, grammar::CNF < DefaultSymbolType > & result ) { //After templating the CNF make rhs const reference + switch ( rhs.size ( ) ) { + case 2: { + result.addRule ( lhs, ext::make_pair ( std::move ( rhs [ 0 ] ), std::move ( rhs [ 1 ] ) ) ); + break; } - grammar.addNonterminalSymbol(createdSymbols.find(secondProposal)->second); - grammar::AddRawRule::addRawRule(grammar,createdSymbols.find(secondProposal)->second, {std::move(second.first), std::move(second.second)}); - - return ext::make_pair(std::move(firstLhs), createdSymbols.find(secondProposal)->second); - } else { - auto first = splitToPairs(grammar, rhs, from, size / 2, createdSymbols); - DefaultSymbolType firstProposal{DefaultSymbolsPairType(first)}; - if(!createdSymbols.count(firstProposal)) { - createdSymbols.insert(std::make_pair(firstProposal, common::createUnique(firstProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet()))); + case 3: { + ext::vector < DefaultSymbolType > right ( std::make_move_iterator ( std::next ( rhs.begin ( ) ) ), std::make_move_iterator ( rhs.end ( ) ) ); + DefaultSymbolType second = object::ObjectFactory::make ( right ); + if ( result.addNonterminalSymbol ( second ) ) + splitRule ( second, std::move ( right ), result ); + + result.addRule ( lhs, ext::make_pair ( std::move ( rhs [ 0 ] ), std::move ( second ) ) ); + break; } - grammar.addNonterminalSymbol(createdSymbols.find(firstProposal)->second); - grammar::AddRawRule::addRawRule ( grammar, createdSymbols.find(firstProposal)->second, {std::move(first.first), std::move(first.second)}); + default: { + ext::vector < DefaultSymbolType > left; + for ( unsigned i = 0; i < rhs.size ( ) / 2; ++ i ) + left.push_back ( std::move ( rhs [ i ] ) ); - auto second = splitToPairs(grammar, rhs, from + size / 2, size - size / 2, createdSymbols); - DefaultSymbolType secondProposal{DefaultSymbolsPairType(second)}; - if(!createdSymbols.count(secondProposal)) { - createdSymbols.insert(std::make_pair(secondProposal, common::createUnique(secondProposal, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet()))); - } - grammar.addNonterminalSymbol(createdSymbols.find(secondProposal)->second); - grammar::AddRawRule::addRawRule(grammar,createdSymbols.find(secondProposal)->second, {std::move(second.first), std::move(second.second)}); + DefaultSymbolType first = object::ObjectFactory::make ( left ); + if ( result.addNonterminalSymbol ( first ) ) + splitRule ( first, std::move ( left ), result ); + + ext::vector < DefaultSymbolType > right; + for ( unsigned i = rhs.size ( ) / 2; i < rhs.size ( ); ++ i ) + right.push_back ( std::move ( rhs [ i ] ) ); - return ext::make_pair(createdSymbols.find(firstProposal)->second, createdSymbols.find(secondProposal)->second); + DefaultSymbolType second = object::ObjectFactory::make ( right ); + if ( result.addNonterminalSymbol ( second ) ) + splitRule ( second, std::move ( right ), result ); + + result.addRule ( lhs, ext::make_pair ( std::move ( first ), std::move ( second ) ) ); + } } } template<class T> grammar::CNF < > convertInternal( const T & origGrammar ) { - T grammarTmp(origGrammar.getInitialSymbol()); - - grammarTmp.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet() ); - grammarTmp.setTerminalAlphabet( origGrammar.getTerminalAlphabet() ); - - ext::map<DefaultSymbolType, DefaultSymbolType> createdSymbols; - auto origRules = grammar::RawRules::getRawRules(origGrammar); - for( const auto & origRule : origRules ) { - for( const auto& origRhs : origRule.second ) { - if(origRhs.size() == 1 || origRhs.size() == 2) - grammar::AddRawRule::addRawRule(grammarTmp,origRule.first, origRhs); - else if(origRhs.size() > 2) { - auto second = splitToPairs(grammarTmp, origRhs, 0, origRhs.size(), createdSymbols); - grammar::AddRawRule::addRawRule(grammarTmp,origRule.first, {std::move(second.first), std::move(second.second)}); - } - } - } - - grammarTmp.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon()); + grammar::CNF < > result ( origGrammar.getInitialSymbol ( ) ); - grammar::CNF < > grammar(grammarTmp.getInitialSymbol()); + result.setNonterminalAlphabet ( origGrammar.getNonterminalAlphabet ( ) ); + result.setTerminalAlphabet ( origGrammar.getTerminalAlphabet ( ) ); - grammar.setNonterminalAlphabet( grammarTmp.getNonterminalAlphabet() ); - grammar.setTerminalAlphabet( grammarTmp.getTerminalAlphabet() ); - - ext::map<DefaultSymbolType, DefaultSymbolType> terminalToShadowNonterminal; - for( const auto & symbol : grammarTmp.getTerminalAlphabet() ) { - DefaultSymbolType shadowSymbol = common::createUnique(symbol, grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet()); - terminalToShadowNonterminal.insert( std::make_pair( symbol, shadowSymbol )); - grammar.addNonterminalSymbol( shadowSymbol ); - grammar.addRule(std::move(shadowSymbol), symbol); + ext::map < DefaultSymbolType, DefaultSymbolType > terminalToShadowNonterminal; + for ( const DefaultSymbolType & symbol : origGrammar.getTerminalAlphabet ( ) ) { + DefaultSymbolType shadowSymbol = common::createUnique ( symbol, result.getTerminalAlphabet ( ), result.getNonterminalAlphabet ( ) ); + terminalToShadowNonterminal.insert ( std::make_pair ( symbol, shadowSymbol ) ); + result.addNonterminalSymbol ( shadowSymbol ); + result.addRule ( std::move ( shadowSymbol ), symbol ); } - auto tmpRules = grammar::RawRules::getRawRules(grammarTmp); - for( const auto & tmpRule : tmpRules ) { - for( const auto& tmpRhs : tmpRule.second ) { - if(tmpRhs.size() == 2) { - if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[0])) { - if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[1])) { - grammar::AddRawRule::addRawRule(grammar,tmpRule.first, tmpRhs); - } else { - grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {tmpRhs[0], terminalToShadowNonterminal.find(tmpRhs[1])->second}); - } - } else { - if(grammarTmp.getNonterminalAlphabet().count(tmpRhs[1])) { - grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {terminalToShadowNonterminal.find(tmpRhs[0])->second, tmpRhs[1]}); - } else { - grammar::AddRawRule::addRawRule(grammar,tmpRule.first, {terminalToShadowNonterminal.find(tmpRhs[0])->second, terminalToShadowNonterminal.find(tmpRhs[1])->second }); - } + for ( const auto & rules : origGrammar.getRules ( ) ) { + for ( const ext::vector < DefaultSymbolType > & rhs : rules.second ) { + if ( rhs.size ( ) == 1 ) { + result.addRule ( rules.first, rhs [ 0 ] ); + } else { + ext::vector < DefaultSymbolType > rawRule; + for ( const DefaultSymbolType & symbol : rhs ) { + if ( origGrammar.getTerminalAlphabet ( ).count ( symbol ) ) + rawRule.push_back ( terminalToShadowNonterminal.at ( symbol ) ); + else + rawRule.push_back ( symbol ); } - } else // tmpRhs.size() == 1 - grammar::AddRawRule::addRawRule(grammar,tmpRule.first, tmpRhs); + + splitRule ( rules.first, std::move ( rawRule ), result ); + } } } - grammar.setGeneratesEpsilon(grammarTmp.getGeneratesEpsilon()); + result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) ); - return grammar; + return result; } grammar::CNF < > ToCNF::convert(const grammar::CFG < > & origGrammar) { - return convertInternal(grammar::simplify::SimpleRulesRemover::remove(grammar::simplify::EpsilonRemover::remove(origGrammar))); + return convert(grammar::simplify::EpsilonRemover::remove(origGrammar)); } grammar::CNF < > ToCNF::convert(const grammar::EpsilonFreeCFG < > & origGrammar) { return convertInternal(grammar::simplify::SimpleRulesRemover::remove(origGrammar)); } -grammar::CNF < > ToCNF::convert(const grammar::GNF < > & origGrammar) { - return convertInternal(origGrammar); +grammar::CNF < > ToCNF::convert ( const grammar::GNF < > & origGrammar ) { + grammar::CNF < > result ( origGrammar.getInitialSymbol ( ) ); + + result.setNonterminalAlphabet ( origGrammar.getNonterminalAlphabet ( ) ); + result.setTerminalAlphabet ( origGrammar.getTerminalAlphabet ( ) ); + + ext::map < DefaultSymbolType, DefaultSymbolType > terminalToShadowNonterminal; + for ( const DefaultSymbolType & symbol : origGrammar.getTerminalAlphabet ( ) ) { + DefaultSymbolType shadowSymbol = common::createUnique ( symbol, result.getTerminalAlphabet ( ), result.getNonterminalAlphabet ( ) ); + terminalToShadowNonterminal.insert ( std::make_pair ( symbol, shadowSymbol ) ); + result.addNonterminalSymbol ( shadowSymbol ); + result.addRule ( std::move ( shadowSymbol ), symbol ); + } + + for ( const auto & rules : origGrammar.getRules ( ) ) { + for ( const ext::pair < DefaultSymbolType, ext::vector < DefaultSymbolType > > & rhs : rules.second ) { + if ( rhs.second.size ( ) == 0 ) + result.addRule ( rules.first, rhs.first ); + else { + ext::vector < DefaultSymbolType > rawRule { terminalToShadowNonterminal.at ( rhs.first ) }; + rawRule.insert ( rawRule.end ( ), rhs.second.begin ( ), rhs.second.end ( ) ); + splitRule ( rules.first, std::move ( rawRule ), result ); + } + } + } + + result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) ); + + return result; } grammar::CNF < > ToCNF::convert(const grammar::LG < > & origGrammar) { diff --git a/alib2algo/src/grammar/simplify/ToCNF.h b/alib2algo/src/grammar/simplify/ToCNF.h index 88165da160..8b39636ea3 100644 --- a/alib2algo/src/grammar/simplify/ToCNF.h +++ b/alib2algo/src/grammar/simplify/ToCNF.h @@ -79,6 +79,8 @@ grammar::CNF < SymbolType > ToCNF::convert ( const grammar::LeftRG < SymbolType } } + result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) ); + return result; } @@ -108,6 +110,8 @@ grammar::CNF < SymbolType > ToCNF::convert ( const grammar::RightRG < SymbolType } } + result.setGeneratesEpsilon ( origGrammar.getGeneratesEpsilon ( ) ); + return result; } diff --git a/alib2algo/test-src/grammar/simplify/GrammarToCNFTest.cpp b/alib2algo/test-src/grammar/simplify/GrammarToCNFTest.cpp index 4f8c030352..fb2951651c 100644 --- a/alib2algo/test-src/grammar/simplify/GrammarToCNFTest.cpp +++ b/alib2algo/test-src/grammar/simplify/GrammarToCNFTest.cpp @@ -74,9 +74,9 @@ void GrammarToCNFTest::testToCNFRules2() { DefaultSymbolType aP = DefaultSymbolType("a'"); DefaultSymbolType bP = DefaultSymbolType("b'"); DefaultSymbolType cP = DefaultSymbolType("c'"); - DefaultSymbolType Xb = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(X, b)))); - DefaultSymbolType aX = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(a, X)))); - DefaultSymbolType bX = DefaultSymbolType(DefaultSymbolType(DefaultSymbolsPairType(ext::make_pair(b, X)))); + DefaultSymbolType Xb = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { X, bP } ); + DefaultSymbolType aX = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { aP, X } ); + DefaultSymbolType bX = object::ObjectFactory::make ( ext::vector < DefaultSymbolType > { bP, X } ); grammar::CNF < > grammar3(S); grammar3.setNonterminalAlphabet({S, X, Y, aP, bP, cP, Xb, aX, bX}); -- GitLab