From ece08f3dc1fc2468579cb46c3b20cb1ffb585439 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Sat, 3 Dec 2016 09:14:08 +0100 Subject: [PATCH] template LeftRecursionRemover algorithm --- .../grammar/simplify/LeftRecursionRemover.cpp | 160 --------------- .../grammar/simplify/LeftRecursionRemover.h | 189 +++++++++++++++++- 2 files changed, 182 insertions(+), 167 deletions(-) diff --git a/alib2algo/src/grammar/simplify/LeftRecursionRemover.cpp b/alib2algo/src/grammar/simplify/LeftRecursionRemover.cpp index dea34db7fa..5758a2a683 100644 --- a/alib2algo/src/grammar/simplify/LeftRecursionRemover.cpp +++ b/alib2algo/src/grammar/simplify/LeftRecursionRemover.cpp @@ -7,176 +7,16 @@ #include "LeftRecursionRemover.h" -#include "../convert/ToGrammarRightRG.h" -#include <exception/CommonException.h> - -#include <vector> -#include <common/createUnique.hpp> - namespace grammar { namespace simplify { -grammar::EpsilonFreeCFG < > directLeftRecursionRemoveAsOrder(const grammar::EpsilonFreeCFG < > & origGrammar) { - grammar::EpsilonFreeCFG < > res(origGrammar.getInitialSymbol()); - res.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet()); - res.setTerminalAlphabet(origGrammar.getTerminalAlphabet()); - res.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon()); - - for(const auto& nonterminal : origGrammar.getNonterminalAlphabet()) { - if(origGrammar.getRules().find(nonterminal) == origGrammar.getRules().end()) continue; - - if(std::any_of(origGrammar.getRules().find(nonterminal)->second.begin(), origGrammar.getRules().find(nonterminal)->second.end(), [&](const std::vector<alphabet::Symbol>& singleRHS) { - return singleRHS[0] == nonterminal; // is there a direct left recursion? - } ) && std::all_of(origGrammar.getRules().find(nonterminal)->second.begin(), origGrammar.getRules().find(nonterminal)->second.end(), [&](const std::vector<alphabet::Symbol>& singleRHS) { - return origGrammar.getTerminalAlphabet().count(singleRHS[0]) || singleRHS[0] >= nonterminal; // only remove left recursion when all nonterminals are bigger than the left hand side - })) { - alphabet::Symbol primed = common::createUnique(nonterminal, res.getTerminalAlphabet(), res.getNonterminalAlphabet()); - res.addNonterminalSymbol(primed); - for(const std::vector<alphabet::Symbol>& singleRHS : origGrammar.getRules().find(nonterminal)->second) { // do the removal - if(singleRHS[0] == nonterminal) { // A -> A alpha - std::vector<alphabet::Symbol> tmpRHS(singleRHS.begin() + 1, singleRHS.end()); - - res.addRule(primed, tmpRHS); // A' -> alpha - - tmpRHS.push_back(primed); - res.addRule(primed, tmpRHS); // A' -> alpha A' - } else { // a -> beta - std::vector<alphabet::Symbol> tmpRHS(singleRHS); - - res.addRule(nonterminal, tmpRHS); // A -> beta - - tmpRHS.push_back(primed); - res.addRule(nonterminal, tmpRHS); // A -> beta A' - } - } - } else { - for(const std::vector<alphabet::Symbol>& singleRHS : origGrammar.getRules().find(nonterminal)->second) { - res.addRule(nonterminal, singleRHS); - } - } - } - return res; -} - -grammar::EpsilonFreeCFG < > assignAsOrder(const grammar::EpsilonFreeCFG < > & origGrammar, unsigned i, const std::set<alphabet::Symbol>& origNonterminals) { - grammar::EpsilonFreeCFG < > res(origGrammar.getInitialSymbol()); - res.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet()); - res.setTerminalAlphabet(origGrammar.getTerminalAlphabet()); - res.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon()); - - for(const alphabet::Symbol& lhs : origGrammar.getNonterminalAlphabet()) { - if(i > 0) { - if(origGrammar.getRules().find(lhs) == origGrammar.getRules().end()) continue; - for(const std::vector<alphabet::Symbol>& rule : origGrammar.getRules().find(lhs)->second) { - res.addRule(lhs, rule); - } - - i--; - continue; // substitue only in i-th up to n-th nonterminals - } - if(origGrammar.getRules().find(lhs) == origGrammar.getRules().end()) continue; - if(!origNonterminals.count(lhs)) { // do not subsitute in nonoriginal nonterminals - for(const std::vector<alphabet::Symbol>& rule : origGrammar.getRules().find(lhs)->second) { - res.addRule(lhs, rule); - } - continue; - } - - const std::pair<alphabet::Symbol, std::set<std::vector<alphabet::Symbol>>>& rule = *origGrammar.getRules().find(lhs); - - for(const std::vector<alphabet::Symbol>& singleRHS : rule.second) { - if(res.getTerminalAlphabet().count(singleRHS[0])) { //do not substitute terminals - res.addRule(lhs, singleRHS); - continue; - } - const alphabet::Symbol& secondLHS = singleRHS[0]; - if(secondLHS >= lhs) { // substitute only by 0th up to i-th nonterminals right hand sides - res.addRule(lhs, singleRHS); - continue; - } - if(origGrammar.getRules().find(secondLHS) == origGrammar.getRules().end()) { //is there any right hand side to substitue with? - //if not well this rule does not generate anything anyway - continue; - } - - for(const std::vector<alphabet::Symbol>& secondSingleRHS : origGrammar.getRules().find(secondLHS)->second) { // do the substitution - std::vector<alphabet::Symbol> newRHS(secondSingleRHS); - newRHS.insert(newRHS.end(), singleRHS.begin() + 1, singleRHS.end()); - res.addRule(lhs, newRHS); - } - } - } - return res; -} - -grammar::EpsilonFreeCFG < > LeftRecursionRemover::remove(const grammar::EpsilonFreeCFG < > & origGrammar) { - grammar::EpsilonFreeCFG < > step = origGrammar; - for(const alphabet::Symbol& nonterminal : step.getNonterminalAlphabet()) { // remove identities - step.removeRule(nonterminal, std::vector<alphabet::Symbol>{nonterminal}); - } - unsigned i = 0; - while(i < origGrammar.getNonterminalAlphabet().size()) { - grammar::EpsilonFreeCFG < > nextStep = assignAsOrder(directLeftRecursionRemoveAsOrder(step), i, origGrammar.getNonterminalAlphabet()); - - if(step == nextStep) break; - step = std::move(nextStep); - i++; - }; - - return step; -} - auto LeftRecursionRemoverEpsilonFreeCFG = LeftRecursionRemover::RegistratorWrapper<grammar::EpsilonFreeCFG < >, grammar::EpsilonFreeCFG < > >(LeftRecursionRemover::remove); - -grammar::EpsilonFreeCFG < > LeftRecursionRemover::remove(const grammar::CNF < > & origGrammar) { - EpsilonFreeCFG < > tmp(origGrammar.getInitialSymbol()); - tmp.setTerminalAlphabet(origGrammar.getTerminalAlphabet()); - tmp.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet()); - tmp.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon()); - for(const auto& rule : origGrammar.getRules()) { - for(const auto& rhs : rule.second) { - if(rhs.is<alphabet::Symbol>()) { - tmp.addRule(rule.first, {rhs.get<alphabet::Symbol>()}); - } else { - const auto& rhsPair = rhs.get<std::pair<alphabet::Symbol, alphabet::Symbol>>(); - tmp.addRule(rule.first, {rhsPair.first, rhsPair.second}); - } - } - } - return remove(tmp); -} - auto LeftRecursionRemoverCNF = LeftRecursionRemover::RegistratorWrapper<grammar::EpsilonFreeCFG < >, grammar::CNF < > >(LeftRecursionRemover::remove); - -grammar::GNF < > LeftRecursionRemover::remove(const grammar::GNF < > & origGrammar) { - return origGrammar; -} - auto LeftRecursionRemoverGNF = LeftRecursionRemover::RegistratorWrapper<grammar::GNF < >, grammar::GNF < > >(LeftRecursionRemover::remove); - -grammar::RightRG < > LeftRecursionRemover::remove(const grammar::RightRG < > & origGrammar) { - return origGrammar; -} - auto LeftRecursionRemoverRightRG = LeftRecursionRemover::RegistratorWrapper<grammar::RightRG < >, grammar::RightRG < > >(LeftRecursionRemover::remove); - -grammar::RightLG < > LeftRecursionRemover::remove(const grammar::RightLG < > & origGrammar) { - return origGrammar; -} - auto LeftRecursionRemoverRightLG = LeftRecursionRemover::RegistratorWrapper<grammar::RightLG < >, grammar::RightLG < > >(LeftRecursionRemover::remove); - -grammar::RightRG < > LeftRecursionRemover::remove(const grammar::LeftRG < > & origGrammar) { - return convert::ToGrammarRightRG::convert(origGrammar); -} - auto LeftRecursionRemoverLeftRG = LeftRecursionRemover::RegistratorWrapper<grammar::RightRG < >, grammar::LeftRG < > >(LeftRecursionRemover::remove); - -grammar::RightLG < > LeftRecursionRemover::remove(const grammar::LeftLG < > & /* origGrammar */) { - throw exception::CommonException("LeftRecursionRemover: Removing from LeftLG NYI"); // TODO -} - auto LeftRecursionRemoverLeftLG = LeftRecursionRemover::RegistratorWrapper<grammar::RightLG < >, grammar::LeftLG < > >(LeftRecursionRemover::remove); grammar::Grammar LeftRecursionRemover::remove(const grammar::Grammar& grammar) { diff --git a/alib2algo/src/grammar/simplify/LeftRecursionRemover.h b/alib2algo/src/grammar/simplify/LeftRecursionRemover.h index 0eb65ca59b..f70d028150 100644 --- a/alib2algo/src/grammar/simplify/LeftRecursionRemover.h +++ b/alib2algo/src/grammar/simplify/LeftRecursionRemover.h @@ -24,23 +24,198 @@ #include <grammar/Regular/RightLG.h> #include <grammar/Regular/RightRG.h> +#include "../convert/ToGrammarRightRG.h" +#include <exception/CommonException.h> + +#include <vector> +#include <common/createUnique.hpp> + namespace grammar { namespace simplify { class LeftRecursionRemover : public std::SingleDispatch<LeftRecursionRemover, grammar::Grammar, const grammar::GrammarBase &> { + template < class SymbolType > + static grammar::EpsilonFreeCFG < SymbolType > directLeftRecursionRemoveAsOrder ( const grammar::EpsilonFreeCFG < SymbolType > & origGrammar ); + + template < class SymbolType > + static grammar::EpsilonFreeCFG < SymbolType > assignAsOrder ( const grammar::EpsilonFreeCFG < SymbolType > & origGrammar, unsigned i, const std::set< SymbolType >& origNonterminals ); public: static grammar::Grammar remove( const grammar::Grammar & grammar ); - static grammar::EpsilonFreeCFG < > remove( const grammar::EpsilonFreeCFG < > & grammar ); - static grammar::EpsilonFreeCFG < > remove( const grammar::CNF < > & grammar ); - static grammar::GNF < > remove( const grammar::GNF < > & grammar ); - static grammar::RightRG < > remove( const grammar::RightRG < > & grammar ); - static grammar::RightLG < > remove( const grammar::RightLG < > & grammar ); - static grammar::RightRG < > remove( const grammar::LeftRG < > & grammar ); - static grammar::RightLG < > remove( const grammar::LeftLG < > & grammar ); + template < class SymbolType > + static grammar::EpsilonFreeCFG < SymbolType > remove( const grammar::EpsilonFreeCFG < SymbolType > & grammar ); + template < class SymbolType > + static grammar::EpsilonFreeCFG < SymbolType > remove( const grammar::CNF < SymbolType > & grammar ); + template < class SymbolType > + static grammar::GNF < SymbolType > remove( const grammar::GNF < SymbolType > & grammar ); + template < class SymbolType > + static grammar::RightRG < SymbolType > remove( const grammar::RightRG < SymbolType > & grammar ); + template < class SymbolType > + static grammar::RightLG < SymbolType > remove( const grammar::RightLG < SymbolType > & grammar ); + template < class SymbolType > + static grammar::RightRG < SymbolType > remove( const grammar::LeftRG < SymbolType > & grammar ); + template < class SymbolType > + static grammar::RightLG < SymbolType > remove( const grammar::LeftLG < SymbolType > & grammar ); }; +template < class SymbolType > +grammar::EpsilonFreeCFG < SymbolType > LeftRecursionRemover::directLeftRecursionRemoveAsOrder(const grammar::EpsilonFreeCFG < SymbolType > & origGrammar) { + grammar::EpsilonFreeCFG < SymbolType > res(origGrammar.getInitialSymbol()); + res.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet()); + res.setTerminalAlphabet(origGrammar.getTerminalAlphabet()); + res.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon()); + + for(const auto& nonterminal : origGrammar.getNonterminalAlphabet()) { + if(origGrammar.getRules().find(nonterminal) == origGrammar.getRules().end()) continue; + + if(std::any_of(origGrammar.getRules().find(nonterminal)->second.begin(), origGrammar.getRules().find(nonterminal)->second.end(), [&](const std::vector<SymbolType>& singleRHS) { + return singleRHS[0] == nonterminal; // is there a direct left recursion? + } ) && std::all_of(origGrammar.getRules().find(nonterminal)->second.begin(), origGrammar.getRules().find(nonterminal)->second.end(), [&](const std::vector<SymbolType>& singleRHS) { + return origGrammar.getTerminalAlphabet().count(singleRHS[0]) || singleRHS[0] >= nonterminal; // only remove left recursion when all nonterminals are bigger than the left hand side + })) { + SymbolType primed = common::createUnique(nonterminal, res.getTerminalAlphabet(), res.getNonterminalAlphabet()); + res.addNonterminalSymbol(primed); + for(const std::vector<SymbolType>& singleRHS : origGrammar.getRules().find(nonterminal)->second) { // do the removal + if(singleRHS[0] == nonterminal) { // A -> A alpha + std::vector<SymbolType> tmpRHS(singleRHS.begin() + 1, singleRHS.end()); + + res.addRule(primed, tmpRHS); // A' -> alpha + + tmpRHS.push_back(primed); + res.addRule(primed, tmpRHS); // A' -> alpha A' + } else { // a -> beta + std::vector<SymbolType> tmpRHS(singleRHS); + + res.addRule(nonterminal, tmpRHS); // A -> beta + + tmpRHS.push_back(primed); + res.addRule(nonterminal, tmpRHS); // A -> beta A' + } + } + } else { + for(const std::vector<SymbolType>& singleRHS : origGrammar.getRules().find(nonterminal)->second) { + res.addRule(nonterminal, singleRHS); + } + } + } + return res; +} + +template < class SymbolType > +grammar::EpsilonFreeCFG < SymbolType > LeftRecursionRemover::assignAsOrder(const grammar::EpsilonFreeCFG < SymbolType > & origGrammar, unsigned i, const std::set<SymbolType>& origNonterminals) { + grammar::EpsilonFreeCFG < SymbolType > res(origGrammar.getInitialSymbol()); + res.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet()); + res.setTerminalAlphabet(origGrammar.getTerminalAlphabet()); + res.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon()); + + for(const SymbolType& lhs : origGrammar.getNonterminalAlphabet()) { + if(i > 0) { + if(origGrammar.getRules().find(lhs) == origGrammar.getRules().end()) continue; + for(const std::vector<SymbolType>& rule : origGrammar.getRules().find(lhs)->second) { + res.addRule(lhs, rule); + } + + i--; + continue; // substitue only in i-th up to n-th nonterminals + } + if(origGrammar.getRules().find(lhs) == origGrammar.getRules().end()) continue; + if(!origNonterminals.count(lhs)) { // do not subsitute in nonoriginal nonterminals + for(const std::vector<SymbolType>& rule : origGrammar.getRules().find(lhs)->second) { + res.addRule(lhs, rule); + } + continue; + } + + const std::pair<SymbolType, std::set<std::vector<SymbolType>>>& rule = *origGrammar.getRules().find(lhs); + + for(const std::vector<SymbolType>& singleRHS : rule.second) { + if(res.getTerminalAlphabet().count(singleRHS[0])) { //do not substitute terminals + res.addRule(lhs, singleRHS); + continue; + } + const SymbolType& secondLHS = singleRHS[0]; + if(secondLHS >= lhs) { // substitute only by 0th up to i-th nonterminals right hand sides + res.addRule(lhs, singleRHS); + continue; + } + if(origGrammar.getRules().find(secondLHS) == origGrammar.getRules().end()) { //is there any right hand side to substitue with? + //if not well this rule does not generate anything anyway + continue; + } + + for(const std::vector<SymbolType>& secondSingleRHS : origGrammar.getRules().find(secondLHS)->second) { // do the substitution + std::vector<SymbolType> newRHS(secondSingleRHS); + newRHS.insert(newRHS.end(), singleRHS.begin() + 1, singleRHS.end()); + res.addRule(lhs, newRHS); + } + } + } + return res; +} + +template < class SymbolType > +grammar::EpsilonFreeCFG < SymbolType > LeftRecursionRemover::remove(const grammar::EpsilonFreeCFG < SymbolType > & origGrammar) { + grammar::EpsilonFreeCFG < SymbolType > step = origGrammar; + for(const SymbolType& nonterminal : step.getNonterminalAlphabet()) { // remove identities + step.removeRule(nonterminal, std::vector<SymbolType>{nonterminal}); + } + unsigned i = 0; + while(i < origGrammar.getNonterminalAlphabet().size()) { + grammar::EpsilonFreeCFG < SymbolType > nextStep = assignAsOrder(directLeftRecursionRemoveAsOrder(step), i, origGrammar.getNonterminalAlphabet()); + + if(step == nextStep) break; + step = std::move(nextStep); + i++; + }; + + return step; +} + +template < class SymbolType > +grammar::EpsilonFreeCFG < SymbolType > LeftRecursionRemover::remove(const grammar::CNF < SymbolType > & origGrammar) { + EpsilonFreeCFG < SymbolType > tmp(origGrammar.getInitialSymbol()); + tmp.setTerminalAlphabet(origGrammar.getTerminalAlphabet()); + tmp.setNonterminalAlphabet(origGrammar.getNonterminalAlphabet()); + tmp.setGeneratesEpsilon(origGrammar.getGeneratesEpsilon()); + for(const auto& rule : origGrammar.getRules()) { + for(const auto& rhs : rule.second) { + if(rhs.template is<SymbolType>()) { + tmp.addRule(rule.first, {rhs.template get<SymbolType>()}); + } else { + const auto& rhsPair = rhs.template get<std::pair<SymbolType, SymbolType>>(); + tmp.addRule(rule.first, {rhsPair.first, rhsPair.second}); + } + } + } + return remove(tmp); +} + +template < class SymbolType > +grammar::GNF < SymbolType > LeftRecursionRemover::remove(const grammar::GNF < SymbolType > & origGrammar) { + return origGrammar; +} + +template < class SymbolType > +grammar::RightRG < SymbolType > LeftRecursionRemover::remove(const grammar::RightRG < SymbolType > & origGrammar) { + return origGrammar; +} + +template < class SymbolType > +grammar::RightLG < SymbolType > LeftRecursionRemover::remove(const grammar::RightLG < SymbolType > & origGrammar) { + return origGrammar; +} + +template < class SymbolType > +grammar::RightRG < SymbolType > LeftRecursionRemover::remove(const grammar::LeftRG < SymbolType > & origGrammar) { + return convert::ToGrammarRightRG::convert(origGrammar); +} + +template < class SymbolType > +grammar::RightLG < SymbolType > LeftRecursionRemover::remove(const grammar::LeftLG < SymbolType > & /* origGrammar */) { + throw exception::CommonException("LeftRecursionRemover: Removing from LeftLG NYI"); // TODO +} + } /* namespace simplify */ } /* namespace grammar */ -- GitLab