From bc7014ef4b78d3d04ac653b41db6da8854744049 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Sun, 9 Apr 2017 16:30:33 +0200 Subject: [PATCH] normalize grammars --- alib2data/src/grammar/ContextFree/CFG.h | 37 +++++ alib2data/src/grammar/ContextFree/CNF.h | 49 +++++++ .../src/grammar/ContextFree/EpsilonFreeCFG.h | 43 ++++++ alib2data/src/grammar/ContextFree/GNF.h | 43 ++++++ alib2data/src/grammar/ContextFree/LG.h | 55 ++++++++ alib2data/src/grammar/ContextSensitive/CSG.h | 54 ++++++++ .../ContextSensitive/NonContractingGrammar.h | 54 ++++++++ alib2data/src/grammar/Regular/LeftLG.h | 52 ++++++++ alib2data/src/grammar/Regular/LeftRG.h | 52 ++++++++ alib2data/src/grammar/Regular/RightLG.h | 49 +++++++ alib2data/src/grammar/Regular/RightRG.h | 48 +++++++ .../ContextPreservingUnrestrictedGrammar.h | 47 +++++++ .../Unrestricted/UnrestrictedGrammar.h | 43 ++++++ .../src/grammar/common/GrammarNormalize.h | 126 ++++++++++++++++++ 14 files changed, 752 insertions(+) create mode 100644 alib2data/src/grammar/common/GrammarNormalize.h diff --git a/alib2data/src/grammar/ContextFree/CFG.h b/alib2data/src/grammar/ContextFree/CFG.h index 6b668c145b..471953b408 100644 --- a/alib2data/src/grammar/ContextFree/CFG.h +++ b/alib2data/src/grammar/ContextFree/CFG.h @@ -22,6 +22,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -49,6 +50,8 @@ public: bool addRule ( SymbolType leftHandSide, std::vector < SymbolType > rightHandSide ); + void addRules ( SymbolType leftHandSide, std::set < std::vector < SymbolType > > rightHandSide ); + const std::map < SymbolType, std::set < std::vector < SymbolType > > > & getRules ( ) const; bool removeRule ( const SymbolType & leftHandSide, const std::vector < SymbolType > & rightHandSide ); @@ -116,6 +119,27 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + CFG < > * res = new CFG < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::vector < SymbolType > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::vector < DefaultSymbolType > > rhs; + for ( std::vector < SymbolType > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeSymbols ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + return res; + } }; } /* namespace grammar */ @@ -164,6 +188,19 @@ bool CFG < SymbolType >::addRule ( SymbolType leftHandSide, std::vector < Symbol return rules[std::move ( leftHandSide )].insert ( std::move ( rightHandSide ) ).second; } +template < class SymbolType > +void CFG < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::vector < SymbolType > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::vector < SymbolType > & rhs : rightHandSide ) + for ( const SymbolType & symbol : rhs ) + if ( ! getTerminalAlphabet ( ).count ( symbol ) && ! getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + rules [ std::move ( leftHandSide ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < SymbolType, std::set < std::vector < SymbolType > > > & CFG < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/ContextFree/CNF.h b/alib2data/src/grammar/ContextFree/CNF.h index d237a00925..d9ca2276ef 100644 --- a/alib2data/src/grammar/ContextFree/CNF.h +++ b/alib2data/src/grammar/ContextFree/CNF.h @@ -23,6 +23,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -51,6 +52,8 @@ public: bool addRule ( SymbolType leftHandSide, SymbolType rightHandSide ); bool addRule ( SymbolType leftHandSide, std::pair < SymbolType, SymbolType > rightHandSide ); + void addRules ( SymbolType leftHandSide, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > rightHandSide ); + const std::map < SymbolType, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > > & getRules ( ) const; bool removeRule ( const SymbolType & leftHandSide, const std::variant < SymbolType, std::pair < SymbolType, SymbolType > > & rightHandSide ); @@ -123,6 +126,29 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + CNF < > * res = new CNF < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::variant < DefaultSymbolType, std::pair < DefaultSymbolType, DefaultSymbolType > > > rhs; + for ( std::variant < SymbolType, std::pair < SymbolType, SymbolType > > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeRHS ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + res->setGeneratesEpsilon ( getGeneratesEpsilon ( ) ); + + return res; + } }; template < class SymbolType > @@ -178,6 +204,29 @@ bool CNF < SymbolType >::addRule ( SymbolType leftHandSide, std::pair < SymbolTy return addRule ( std::move ( leftHandSide ), std::move ( rhs ) ); } +template < class SymbolType > +void CNF < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::variant < SymbolType, std::pair < SymbolType, SymbolType > > & element : rightHandSide ) { + if ( element.template is < SymbolType > ( ) ) { + if ( !getTerminalAlphabet ( ).count ( element.template get < SymbolType > ( ) ) ) + throw GrammarException ( "Rule must rewrite to terminal symbol" ); + } else { + const std::pair < SymbolType, SymbolType > rhs = element.template get < std::pair < SymbolType, SymbolType > > ( ); + + if ( !getNonterminalAlphabet ( ).count ( rhs.first ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( rhs.first ) + "\" is not a nonterminal symbol" ); + + if ( !getNonterminalAlphabet ( ).count ( rhs.second ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( rhs.second ) + "\" is not a nonterminal symbol" ); + } + } + + rules[std::move ( leftHandSide )].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < SymbolType, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > > & CNF < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/ContextFree/EpsilonFreeCFG.h b/alib2data/src/grammar/ContextFree/EpsilonFreeCFG.h index 4c136ba725..ab81ed281c 100644 --- a/alib2data/src/grammar/ContextFree/EpsilonFreeCFG.h +++ b/alib2data/src/grammar/ContextFree/EpsilonFreeCFG.h @@ -22,6 +22,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -48,6 +49,8 @@ public: bool addRule ( SymbolType leftHandSide, std::vector < SymbolType > rightHandSide ); + void addRules ( SymbolType leftHandSide, std::set < std::vector < SymbolType > > rightHandSide ); + const std::map < SymbolType, std::set < std::vector < SymbolType > > > & getRules ( ) const; bool removeRule ( const SymbolType & leftHandSide, const std::vector < SymbolType > & rightHandSide ); @@ -118,6 +121,29 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + EpsilonFreeCFG < > * res = new EpsilonFreeCFG < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::vector < SymbolType > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::vector < DefaultSymbolType > > rhs; + for ( std::vector < SymbolType > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeSymbols ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + res->setGeneratesEpsilon ( getGeneratesEpsilon ( ) ); + + return res; + } }; template < class SymbolType > @@ -153,6 +179,23 @@ bool EpsilonFreeCFG < SymbolType >::addRule ( SymbolType leftHandSide, std::vect return rules[std::move ( leftHandSide )].insert ( std::move ( rightHandSide ) ).second; } +template < class SymbolType > +void EpsilonFreeCFG < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::vector < SymbolType > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::vector < SymbolType > & rhs : rightHandSide ) { + if ( rhs.size ( ) == 0 ) + throw GrammarException ( "Epsilon rule is not allowed" ); + + for ( const SymbolType & symbol : rhs ) + if ( ! getTerminalAlphabet ( ).count ( symbol ) && ! getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + } + + rules [ std::move ( leftHandSide ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < SymbolType, std::set < std::vector < SymbolType > > > & EpsilonFreeCFG < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/ContextFree/GNF.h b/alib2data/src/grammar/ContextFree/GNF.h index a16cc04f48..1266da511c 100644 --- a/alib2data/src/grammar/ContextFree/GNF.h +++ b/alib2data/src/grammar/ContextFree/GNF.h @@ -22,6 +22,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -48,6 +49,8 @@ public: bool addRule ( SymbolType leftHandSide, std::pair < SymbolType, std::vector < SymbolType > > rightHandSide ); + void addRules ( SymbolType leftHandSide, std::set < std::pair < SymbolType, std::vector < SymbolType > > > rightHandSide ); + const std::map < SymbolType, std::set < std::pair < SymbolType, std::vector < SymbolType > > > > & getRules ( ) const; bool removeRule ( const SymbolType & leftHandSide, const std::pair < SymbolType, std::vector < SymbolType > > & rightHandSide ); @@ -119,6 +122,29 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + GNF < > * res = new GNF < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::pair < SymbolType, std::vector < SymbolType > > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::pair < DefaultSymbolType, std::vector < DefaultSymbolType > > > rhs; + for ( std::pair < SymbolType, std::vector < SymbolType > > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeRHS ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + res->setGeneratesEpsilon ( getGeneratesEpsilon ( ) ); + + return res; + } }; template < class SymbolType > @@ -154,6 +180,23 @@ bool GNF < SymbolType >::addRule ( SymbolType leftHandSide, std::pair < SymbolTy return rules[std::move ( leftHandSide )].insert ( std::move ( rightHandSide ) ).second; } +template < class SymbolType > +void GNF < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::pair < SymbolType, std::vector < SymbolType > > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::pair < SymbolType, std::vector < SymbolType > > & rhs : rightHandSide ) { + if ( ! getTerminalAlphabet ( ).count ( rhs.first ) ) + throw GrammarException ( "Rule must rewrite to terminal symbol" ); + + for ( const SymbolType & rhsNTs : rhs.second ) + if ( !getNonterminalAlphabet ( ).count ( rhsNTs ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( rhsNTs ) + "\" is not a nonterminal symbol" ); + } + + rules[std::move ( leftHandSide )].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < SymbolType, std::set < std::pair < SymbolType, std::vector < SymbolType > > > > & GNF < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/ContextFree/LG.h b/alib2data/src/grammar/ContextFree/LG.h index 6fa87be0b9..c27b3c7927 100644 --- a/alib2data/src/grammar/ContextFree/LG.h +++ b/alib2data/src/grammar/ContextFree/LG.h @@ -24,6 +24,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -51,6 +52,8 @@ public: bool addRule ( SymbolType leftHandSide, std::vector < SymbolType > rightHandSide ); bool addRule ( SymbolType leftHandSide, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > rightHandSide ); + void addRules ( SymbolType leftHandSide, std::set < std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > > rightHandSide ); + const std::map < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > > > & getRules ( ) const; bool removeRule ( const SymbolType & leftHandSide, const std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > & rightHandSide ); @@ -120,6 +123,27 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + LG < > * res = new LG < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::variant < std::vector < DefaultSymbolType >, std::tuple < std::vector < DefaultSymbolType >, DefaultSymbolType, std::vector < DefaultSymbolType > > > > rhs; + for ( std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeRHS ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + return res; + } }; template < class SymbolType > @@ -183,6 +207,37 @@ bool LG < SymbolType >::addRule ( SymbolType leftHandSide, std::tuple < std::vec return addRule ( std::move ( leftHandSide ), std::move ( rhs ) ); } +template < class SymbolType > +void LG < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > & element : rightHandSide ) { + if ( element.template is < std::vector < SymbolType > > ( ) ) { + const std::vector < SymbolType > & rhs = element.template get < std::vector < SymbolType > > ( ); + + for ( const SymbolType & symbol : rhs ) + if ( !getTerminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol " + std::to_string ( symbol ) + " is not a terminal symbol" ); + } else { + const std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > & rhs = element.template get < std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > ( ); + + for ( const SymbolType & symbol : std::get < 0 > ( rhs ) ) + if ( !getTerminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol " + std::to_string ( symbol ) + " is not a terminal symbol" ); + + if ( !getNonterminalAlphabet ( ).count ( std::get < 1 > ( rhs ) ) ) + throw GrammarException ( "Symbol " + std::to_string ( std::get < 1 > ( rhs ) ) + " is not a nonterminal symbol" ); + + for ( const SymbolType & symbol : std::get < 2 > ( rhs ) ) + if ( !getTerminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol " + std::to_string ( symbol ) + " is not a terminal symbol" ); + } + } + + rules [ std::move ( leftHandSide ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > > > & LG < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/ContextSensitive/CSG.h b/alib2data/src/grammar/ContextSensitive/CSG.h index 13cf1f4555..13de8fa26f 100644 --- a/alib2data/src/grammar/ContextSensitive/CSG.h +++ b/alib2data/src/grammar/ContextSensitive/CSG.h @@ -22,6 +22,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -48,6 +49,8 @@ public: bool addRule ( std::vector < SymbolType > lContext, SymbolType leftHandSide, std::vector < SymbolType > rContext, std::vector < SymbolType > rightHandSide ); + void addRules ( std::vector < SymbolType > lContext, SymbolType leftHandSide, std::vector < SymbolType > rContext, std::set < std::vector < SymbolType > > rightHandSide ); + const std::map < std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > >, std::set < std::vector < SymbolType > > > & getRules ( ) const; bool removeRule ( const std::vector < SymbolType > & lContext, const SymbolType & leftHandSide, const std::vector < SymbolType > & rContext, const std::vector < SymbolType > & rightHandSide ); @@ -112,6 +115,31 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + CSG < > * res = new CSG < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > >, std::set < std::vector < SymbolType > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::vector < DefaultSymbolType > > rhs; + for ( std::vector < SymbolType > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeSymbols ( std::move ( target ) ) ); + + std::vector < DefaultSymbolType > lContext = GrammarNormalize::normalizeSymbols ( std::move ( std::get < 0 > ( rule.first ) ) ); + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( std::get < 1 > ( rule.first ) ) ); + std::vector < DefaultSymbolType > rContext = GrammarNormalize::normalizeSymbols ( std::move ( std::get < 2 > ( rule.first ) ) ); + + res->addRules ( std::move ( lContext ), std::move ( lhs ), std::move ( rContext ), std::move ( rhs ) ); + } + + res->setGeneratesEpsilon ( getGeneratesEpsilon ( ) ); + + return res; + } }; template < class SymbolType > @@ -156,6 +184,32 @@ bool CSG < SymbolType >::addRule ( std::vector < SymbolType > lContext, SymbolTy return rules[make_tuple ( std::move ( lContext ), std::move ( leftHandSide ), std::move ( rContext ) )].insert ( std::move ( rightHandSide ) ).second; } +template < class SymbolType > +void CSG < SymbolType >::addRules ( std::vector < SymbolType > lContext, SymbolType leftHandSide, std::vector < SymbolType > rContext, std::set < std::vector < SymbolType > > rightHandSide ) { + for ( const SymbolType & symbol : lContext ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const SymbolType & symbol : rContext ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + for ( const std::vector < SymbolType > & rhs : rightHandSide ) { + if ( rhs.size ( ) == 0 ) { + throw GrammarException ( "Epsilon rule is not allowed" ); + } else { + for ( const SymbolType & symbol : rhs ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + } + } + + rules [ make_tuple ( std::move ( lContext ), std::move ( leftHandSide ), std::move ( rContext ) ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > >, std::set < std::vector < SymbolType > > > & CSG < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.h b/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.h index 05988372ac..578f13c127 100644 --- a/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.h +++ b/alib2data/src/grammar/ContextSensitive/NonContractingGrammar.h @@ -22,6 +22,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -48,6 +49,8 @@ public: bool addRule ( std::vector < SymbolType > leftHandSide, std::vector < SymbolType > rightHandSide ); + void addRules ( std::vector < SymbolType > leftHandSide, std::set < std::vector < SymbolType > > rightHandSide ); + const std::map < std::vector < SymbolType >, std::set < std::vector < SymbolType > > > & getRules ( ) const; bool removeRule ( const std::vector < SymbolType > & leftHandSide, const std::vector < SymbolType > & rightHandSide ); @@ -112,6 +115,29 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + NonContractingGrammar < > * res = new NonContractingGrammar < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < std::vector < SymbolType >, std::set < std::vector < SymbolType > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::vector < DefaultSymbolType > > rhs; + for ( std::vector < SymbolType > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeSymbols ( std::move ( target ) ) ); + + std::vector < DefaultSymbolType > lhs = GrammarNormalize::normalizeSymbols ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + res->setGeneratesEpsilon ( getGeneratesEpsilon ( ) ); + + return res; + } }; template < class SymbolType > @@ -157,6 +183,34 @@ bool NonContractingGrammar < SymbolType >::addRule ( std::vector < SymbolType > return rules[std::move ( leftHandSide )].insert ( std::move ( rightHandSide ) ).second; } +template < class SymbolType > +void NonContractingGrammar < SymbolType >::addRules ( std::vector < SymbolType > leftHandSide, std::set < std::vector < SymbolType > > rightHandSide ) { + int lSize = leftHandSide.size ( ); + + if ( std::all_of ( leftHandSide.begin ( ), leftHandSide.end ( ), [this] ( const SymbolType symbol ) { + return !getNonterminalAlphabet ( ).count ( symbol ); + } ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const SymbolType & symbol : leftHandSide ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + for ( const std::vector < SymbolType > & rhs : rightHandSide ) { + + int rSize = rightHandSide.size ( ); + + if ( lSize > rSize ) + throw GrammarException ( "Invalid size of right hand side of a rule" ); + + for ( const SymbolType & symbol : rhs ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + } + + rules [ std::move ( leftHandSide ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < std::vector < SymbolType >, std::set < std::vector < SymbolType > > > & NonContractingGrammar < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/Regular/LeftLG.h b/alib2data/src/grammar/Regular/LeftLG.h index a9d357117a..9fbb34ee60 100644 --- a/alib2data/src/grammar/Regular/LeftLG.h +++ b/alib2data/src/grammar/Regular/LeftLG.h @@ -23,6 +23,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -50,6 +51,8 @@ public: bool addRule ( SymbolType leftHandSide, std::vector < SymbolType > rightHandSide ); bool addRule ( SymbolType leftHandSide, std::pair < SymbolType, std::vector < SymbolType > > rightHandSide ); + void addRules ( SymbolType leftHandSide, std::set < std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > > rightHandSide ); + const std::map < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > > > & getRules ( ) const; bool removeRule ( const SymbolType & leftHandSide, const std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > & rightHandSide ); @@ -119,6 +122,28 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + LeftLG < > * res = new LeftLG < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::variant < std::vector < DefaultSymbolType >, std::pair < DefaultSymbolType, std::vector < DefaultSymbolType > > > > rhs; + for ( std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeRHS ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + return res; + } + }; template < class SymbolType > @@ -176,6 +201,33 @@ bool LeftLG < SymbolType >::addRule ( SymbolType leftHandSide, std::pair < Symbo return addRule ( std::move ( leftHandSide ), std::move ( rhs ) ); } +template < class SymbolType > +void LeftLG < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > & element : rightHandSide ) { + if ( element.template is < std::vector < SymbolType > > ( ) ) { + for ( const auto & symbol : element.template get < std::vector < SymbolType > > ( ) ) + if ( !getTerminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol " + std::to_string ( symbol ) + " is not a terminal symbol" ); + + } else { + const std::pair < SymbolType, std::vector < SymbolType > > & rhs = element.template get < std::pair < SymbolType, std::vector < SymbolType > > > ( ); + + if ( !getNonterminalAlphabet ( ).count ( rhs.first ) ) + throw GrammarException ( "Symbol " + std::to_string ( rhs.first ) + " is not a nonterminal symbol" ); + + for ( const auto & symbol : rhs.second ) + if ( !getTerminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol " + std::to_string ( symbol ) + " is not a terminal symbol" ); + + } + } + + rules [ std::move ( leftHandSide ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > > > & LeftLG < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/Regular/LeftRG.h b/alib2data/src/grammar/Regular/LeftRG.h index 47fc58e385..c29b4f6ffc 100644 --- a/alib2data/src/grammar/Regular/LeftRG.h +++ b/alib2data/src/grammar/Regular/LeftRG.h @@ -23,6 +23,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -90,6 +91,11 @@ public: */ bool addRule ( SymbolType leftHandSide, std::pair < SymbolType, SymbolType > rightHandSide ); + /** + * Add a new rules of a grammar in form of A -> aB | bC | ... | a | b | ..., where A, B, C ... \in N and a, b ... \in T + */ + void addRules ( SymbolType leftHandSide, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > rightHandSide ); + /** * Get rules of the grammar */ @@ -197,6 +203,29 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + LeftRG < > * res = new LeftRG < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::variant < DefaultSymbolType, std::pair < DefaultSymbolType, DefaultSymbolType > > > rhs; + for ( std::variant < SymbolType, std::pair < SymbolType, SymbolType > > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeRHS ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + res->setGeneratesEpsilon ( getGeneratesEpsilon ( ) ); + + return res; + } }; template < class SymbolType > @@ -237,6 +266,29 @@ bool LeftRG < SymbolType >::addRule ( SymbolType leftHandSide, std::variant < Sy return rules [ std::move ( leftHandSide ) ].insert ( std::move ( rightHandSide ) ).second; } +template < class SymbolType > +void LeftRG < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::variant < SymbolType, std::pair < SymbolType, SymbolType > > & element : rightHandSide ) { + if ( element.template is < SymbolType > ( ) ) { + const SymbolType & rhs = element.template get < SymbolType > ( ); + + if ( ! getTerminalAlphabet ( ).count ( rhs ) ) + throw GrammarException ( "Rule must rewrite to terminal symbol" ); + + } else { + const std::pair < SymbolType, SymbolType > & rhs = element.template get < std::pair < SymbolType, SymbolType > > ( ); + + if ( ! getNonterminalAlphabet ( ).count ( rhs.first ) || ! getTerminalAlphabet ( ).count ( rhs.second ) ) + throw GrammarException ( "Rule must rewrite to terminal symbol followed by nonterminal symbol" ); + } + } + + rules [ std::move ( leftHandSide ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > bool LeftRG < SymbolType >::addRule ( SymbolType leftHandSide, SymbolType rightHandSide ) { std::variant < SymbolType, std::pair < SymbolType, SymbolType > > rhs ( std::move ( rightHandSide ) ); diff --git a/alib2data/src/grammar/Regular/RightLG.h b/alib2data/src/grammar/Regular/RightLG.h index c3d430c30b..7f3f14ea8a 100644 --- a/alib2data/src/grammar/Regular/RightLG.h +++ b/alib2data/src/grammar/Regular/RightLG.h @@ -23,6 +23,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -50,6 +51,8 @@ public: bool addRule ( SymbolType leftHandSide, std::vector < SymbolType > rightHandSide ); bool addRule ( SymbolType leftHandSide, std::pair < std::vector < SymbolType >, SymbolType > rightHandSide ); + void addRules ( SymbolType leftHandSide, std::set < std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > > rightHandSide ); + const std::map < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > > > & getRules ( ) const; bool removeRule ( const SymbolType & leftHandSide, const std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > & rightHandSide ); @@ -119,6 +122,27 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + RightLG < > * res = new RightLG < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::variant < std::vector < DefaultSymbolType >, std::pair < std::vector < DefaultSymbolType >, DefaultSymbolType > > > rhs; + for ( std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeRHS ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + return res; + } }; template < class SymbolType > @@ -176,6 +200,31 @@ bool RightLG < SymbolType >::addRule ( SymbolType leftHandSide, std::pair < std: return addRule ( std::move ( leftHandSide ), std::move ( rhs ) ); } +template < class SymbolType > +void RightLG < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > & element : rightHandSide ) { + if ( element.template is < std::vector < SymbolType > > ( ) ) { + for ( const auto & symbol : element.template get < std::vector < SymbolType > > ( ) ) + if ( !getTerminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol " + std::to_string ( symbol ) + " is not a terminal symbol" ); + } else { + const std::pair < std::vector < SymbolType >, SymbolType > & rhs = element.template get < std::pair < std::vector < SymbolType >, SymbolType > > ( ); + + if ( !getNonterminalAlphabet ( ).count ( rhs.second ) ) + throw GrammarException ( "Symbol " + std::to_string ( rhs.second ) + " is not a nonterminal symbol" ); + + for ( const auto & symbol : rhs.first ) + if ( !getTerminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol " + std::to_string ( symbol ) + " is not a terminal symbol" ); + } + } + + rules [ std::move ( leftHandSide ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < SymbolType, std::set < std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > > > & RightLG < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/Regular/RightRG.h b/alib2data/src/grammar/Regular/RightRG.h index de42427fa2..f1f7f171dc 100644 --- a/alib2data/src/grammar/Regular/RightRG.h +++ b/alib2data/src/grammar/Regular/RightRG.h @@ -23,6 +23,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -68,6 +69,8 @@ public: bool addRule ( SymbolType leftHandSide, SymbolType rightHandSide ); bool addRule ( SymbolType leftHandSide, std::pair < SymbolType, SymbolType > rightHandSide ); + void addRules ( SymbolType leftHandSide, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > rightHandSide ); + const std::map < SymbolType, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > > & getRules ( ) const; bool removeRule ( const SymbolType & leftHandSide, const std::variant < SymbolType, std::pair < SymbolType, SymbolType > > & rightHandSide ); @@ -140,6 +143,29 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + RightRG < > * res = new RightRG < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < SymbolType, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::variant < DefaultSymbolType, std::pair < DefaultSymbolType, DefaultSymbolType > > > rhs; + for ( std::variant < SymbolType, std::pair < SymbolType, SymbolType > > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeRHS ( std::move ( target ) ) ); + + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + res->setGeneratesEpsilon ( getGeneratesEpsilon ( ) ); + + return res; + } }; template < class SymbolType > @@ -194,6 +220,28 @@ bool RightRG < SymbolType >::addRule ( SymbolType leftHandSide, std::pair < Symb return addRule ( std::move ( leftHandSide ), std::move ( rhs ) ); } +template < class SymbolType > +void RightRG < SymbolType >::addRules ( SymbolType leftHandSide, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > rightHandSide ) { + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const std::variant < SymbolType, std::pair < SymbolType, SymbolType > > & element : rightHandSide ) { + if ( element.template is < SymbolType > ( ) ) { + const SymbolType & rhs = element.template get < SymbolType > ( ); + + if ( ! getTerminalAlphabet ( ).count ( rhs ) ) + throw GrammarException ( "Rule must rewrite to terminal symbol" ); + } else { + const std::pair < SymbolType, SymbolType > & rhs = element.template get < std::pair < SymbolType, SymbolType > > ( ); + + if ( ! getTerminalAlphabet ( ).count ( rhs.first ) || ! getNonterminalAlphabet ( ).count ( rhs.second ) ) + throw GrammarException ( "Rule must rewrite to terminal symbol followed by nonterminal symbol" ); + } + } + + rules[std::move ( leftHandSide )].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < SymbolType, std::set < std::variant < SymbolType, std::pair < SymbolType, SymbolType > > > > & RightRG < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.h b/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.h index dbdb089aad..5ff718d0bf 100644 --- a/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.h +++ b/alib2data/src/grammar/Unrestricted/ContextPreservingUnrestrictedGrammar.h @@ -22,6 +22,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -47,6 +48,8 @@ public: bool addRule ( std::vector < SymbolType > lContext, SymbolType leftHandSide, std::vector < SymbolType > rContext, std::vector < SymbolType > rightHandSide ); + void addRules ( std::vector < SymbolType > lContext, SymbolType leftHandSide, std::vector < SymbolType > rContext, std::set < std::vector < SymbolType > > rightHandSide ); + const std::map < std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > >, std::set < std::vector < SymbolType > > > & getRules ( ) const; bool removeRule ( const std::vector < SymbolType > & lContext, const SymbolType & leftHandSide, const std::vector < SymbolType > & rContext, const std::vector < SymbolType > & rightHandSide ); @@ -108,6 +111,29 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + ContextPreservingUnrestrictedGrammar < > * res = new ContextPreservingUnrestrictedGrammar < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > >, std::set < std::vector < SymbolType > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::vector < DefaultSymbolType > > rhs; + for ( std::vector < SymbolType > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeSymbols ( std::move ( target ) ) ); + + std::vector < DefaultSymbolType > lContext = GrammarNormalize::normalizeSymbols ( std::move ( std::get < 0 > ( rule.first ) ) ); + DefaultSymbolType lhs = GrammarNormalize::normalizeSymbol ( std::move ( std::get < 1 > ( rule.first ) ) ); + std::vector < DefaultSymbolType > rContext = GrammarNormalize::normalizeSymbols ( std::move ( std::get < 2 > ( rule.first ) ) ); + + res->addRules ( std::move ( lContext ), std::move ( lhs ), std::move ( rContext ), std::move ( rhs ) ); + } + + return res; + } }; template < class SymbolType > @@ -148,6 +174,27 @@ bool ContextPreservingUnrestrictedGrammar < SymbolType >::addRule ( std::vector return rules[make_tuple ( std::move ( lContext ), std::move ( leftHandSide ), std::move ( rContext ) )].insert ( std::move ( rightHandSide ) ).second; } +template < class SymbolType > +void ContextPreservingUnrestrictedGrammar < SymbolType >::addRules ( std::vector < SymbolType > lContext, SymbolType leftHandSide, std::vector < SymbolType > rContext, std::set < std::vector < SymbolType > > rightHandSide ) { + for ( const SymbolType & symbol : lContext ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + if ( !getNonterminalAlphabet ( ).count ( leftHandSide ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const SymbolType & symbol : rContext ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + for ( const std::vector < SymbolType > & rhs : rightHandSide ) + for ( const SymbolType & symbol : rhs ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + rules [ make_tuple ( std::move ( lContext ), std::move ( leftHandSide ), std::move ( rContext ) ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > >, std::set < std::vector < SymbolType > > > & ContextPreservingUnrestrictedGrammar < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/Unrestricted/UnrestrictedGrammar.h b/alib2data/src/grammar/Unrestricted/UnrestrictedGrammar.h index 0b5b287d17..afa13eddd6 100644 --- a/alib2data/src/grammar/Unrestricted/UnrestrictedGrammar.h +++ b/alib2data/src/grammar/Unrestricted/UnrestrictedGrammar.h @@ -22,6 +22,7 @@ #include "../GrammarException.h" #include "../common/GrammarFromXMLParser.h" #include "../common/GrammarToXMLComposer.h" +#include "../common/GrammarNormalize.h" namespace grammar { @@ -47,6 +48,8 @@ public: bool addRule ( std::vector < SymbolType > leftHandSide, std::vector < SymbolType > rightHandSide ); + void addRules ( std::vector < SymbolType > leftHandSide, std::set < std::vector < SymbolType > > rightHandSide ); + const std::map < std::vector < SymbolType >, std::set < std::vector < SymbolType > > > & getRules ( ) const; bool removeRule ( const std::vector < SymbolType > & leftHandSide, const std::vector < SymbolType > & rightHandSide ); @@ -108,6 +111,27 @@ public: void composeRules ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual GrammarBase * normalize ( ) && { + std::set < DefaultSymbolType > nonterminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ) ); + std::set < DefaultSymbolType > terminals = GrammarNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ) ); + DefaultSymbolType initialSymbol = GrammarNormalize::normalizeSymbol ( std::move ( this->template accessElement < InitialSymbol > ( ).get ( ) ) ); + + UnrestrictedGrammar < > * res = new UnrestrictedGrammar < > ( std::move ( nonterminals ), std::move ( terminals ), std::move ( initialSymbol ) ); + + for ( std::pair < std::vector < SymbolType >, std::set < std::vector < SymbolType > > > && rule : std::make_moveable_map ( rules ) ) { + + std::set < std::vector < DefaultSymbolType > > rhs; + for ( std::vector < SymbolType > && target : std::make_moveable_set ( rule.second ) ) + rhs.insert ( GrammarNormalize::normalizeSymbols ( std::move ( target ) ) ); + + std::vector < DefaultSymbolType > lhs = GrammarNormalize::normalizeSymbols ( std::move ( rule.first ) ); + + res->addRules ( std::move ( lhs ), std::move ( rhs ) ); + } + + return res; + } }; template < class SymbolType > @@ -146,6 +170,25 @@ bool UnrestrictedGrammar < SymbolType >::addRule ( std::vector < SymbolType > le return rules[std::move ( leftHandSide )].insert ( std::move ( rightHandSide ) ).second; } +template < class SymbolType > +void UnrestrictedGrammar < SymbolType >::addRules ( std::vector < SymbolType > leftHandSide, std::set < std::vector < SymbolType > > rightHandSide ) { + if ( std::all_of ( leftHandSide.begin ( ), leftHandSide.end ( ), [this] ( const SymbolType symbol ) { + return !getNonterminalAlphabet ( ).count ( symbol ); + } ) ) + throw GrammarException ( "Rule must rewrite nonterminal symbol" ); + + for ( const SymbolType & symbol : leftHandSide ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + for ( const std::vector < SymbolType > & rhs : rightHandSide ) + for ( const SymbolType & symbol : rhs ) + if ( !getTerminalAlphabet ( ).count ( symbol ) && !getNonterminalAlphabet ( ).count ( symbol ) ) + throw GrammarException ( "Symbol \"" + std::to_string ( symbol ) + "\" is not neither terminal nor nonterminal symbol" ); + + rules [ std::move ( leftHandSide ) ].insert ( std::make_moveable_set ( rightHandSide ).begin ( ), std::make_moveable_set ( rightHandSide ).end ( ) ); +} + template < class SymbolType > const std::map < std::vector < SymbolType >, std::set < std::vector < SymbolType > > > & UnrestrictedGrammar < SymbolType >::getRules ( ) const { return rules; diff --git a/alib2data/src/grammar/common/GrammarNormalize.h b/alib2data/src/grammar/common/GrammarNormalize.h new file mode 100644 index 0000000000..88950000ce --- /dev/null +++ b/alib2data/src/grammar/common/GrammarNormalize.h @@ -0,0 +1,126 @@ +/* + * GrammarNormalize.h + * + * Created on: Apr 7, 2017 + * Author: Jan Travnicek + */ + +#ifndef GRAMMAR_NORMALIZE_H_ +#define GRAMMAR_NORMALIZE_H_ + +#include <vector> +#include <tuple> +#include <set> +#include <variant> + +#include <object/AnyObject.h> + +namespace grammar { + +/** + * This class contains methods to print XML representation of automata to the output stream. + */ +class GrammarNormalize { +public: + template < class SymbolType > + static std::set < DefaultSymbolType > normalizeAlphabet ( std::set < SymbolType > && symbols ); + + template < class SymbolType > + static DefaultSymbolType normalizeSymbol ( SymbolType && symbol ); + + template < class SymbolType > + static std::vector < DefaultSymbolType > normalizeSymbols ( std::vector < SymbolType > && symbols ); + + template < class SymbolType > + static std::pair < DefaultSymbolType, std::vector < DefaultSymbolType > > normalizeRHS ( std::pair < SymbolType, std::vector < SymbolType > > && symbol ); + + template < class SymbolType > + static std::variant < DefaultSymbolType, std::pair < DefaultSymbolType, DefaultSymbolType > > normalizeRHS ( std::variant < SymbolType, std::pair < SymbolType, SymbolType > > && symbol ); + + template < class SymbolType > + static std::variant < std::vector < DefaultSymbolType >, std::pair < DefaultSymbolType, std::vector < DefaultSymbolType > > > normalizeRHS ( std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > && symbol ); + + template < class SymbolType > + static std::variant < std::vector < DefaultSymbolType >, std::pair < std::vector < DefaultSymbolType >, DefaultSymbolType > > normalizeRHS ( std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > && symbol ); + + template < class SymbolType > + static std::variant < std::vector < DefaultSymbolType >, std::tuple < std::vector < DefaultSymbolType >, DefaultSymbolType, std::vector < DefaultSymbolType > > > normalizeRHS ( std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > && symbols ); + +}; + +template < class SymbolType > +std::set < DefaultSymbolType > GrammarNormalize::normalizeAlphabet ( std::set < SymbolType > && symbols ) { + std::set < DefaultSymbolType > res; + for ( SymbolType && symbol : std::make_moveable_set ( symbols ) ) { + res.insert ( normalizeSymbol ( std::move ( symbol ) ) ); + } + return res; +} + +template < class SymbolType > +DefaultSymbolType GrammarNormalize::normalizeSymbol ( SymbolType && symbol ) { + return DefaultSymbolType ( alib::AnyObject < SymbolType > ( std::move ( symbol ) ) ); +} + +template < class SymbolType > +std::vector < DefaultSymbolType > GrammarNormalize::normalizeSymbols ( std::vector < SymbolType > && symbols ) { + std::vector < DefaultSymbolType > res; + for ( SymbolType & symbol : symbols ) { + res.push_back ( normalizeSymbol ( std::move ( symbol ) ) ); + } + return res; +} + +template < class SymbolType > +std::pair < DefaultSymbolType, std::vector < DefaultSymbolType > > GrammarNormalize::normalizeRHS ( std::pair < SymbolType, std::vector < SymbolType > > && symbol ) { + return std::make_pair ( normalizeSymbol ( std::move ( symbol.first ) ), normalizeSymbols ( std::move ( symbol.second ) ) ); +} + +template < class SymbolType > +std::variant < DefaultSymbolType, std::pair < DefaultSymbolType, DefaultSymbolType > > GrammarNormalize::normalizeRHS ( std::variant < SymbolType, std::pair < SymbolType, SymbolType > > && symbol ) { + if ( symbol.template is < SymbolType > ( ) ) { + return std::variant < DefaultSymbolType, std::pair < DefaultSymbolType, DefaultSymbolType > > ( normalizeSymbol ( std::move ( symbol.template get < SymbolType > ( ) ) ) ); + } else { + std::pair < SymbolType, SymbolType > & inner = symbol.template get < std::pair < SymbolType, SymbolType > > ( ); + return std::variant < DefaultSymbolType, std::pair < DefaultSymbolType, DefaultSymbolType > > ( std::make_pair ( normalizeSymbol ( std::move ( inner.first ) ), normalizeSymbol ( std::move ( inner.second ) ) ) ); + } +} + +template < class SymbolType > +std::variant < std::vector < DefaultSymbolType >, std::pair < DefaultSymbolType, std::vector < DefaultSymbolType > > > GrammarNormalize::normalizeRHS ( std::variant < std::vector < SymbolType >, std::pair < SymbolType, std::vector < SymbolType > > > && symbol ) { + if ( symbol.template is < std::vector < SymbolType > > ( ) ) { + return std::variant < std::vector < DefaultSymbolType >, std::pair < DefaultSymbolType, std::vector < DefaultSymbolType > > > ( normalizeSymbols ( std::move ( symbol.template get < std::vector < SymbolType > > ( ) ) ) ); + } else { + std::pair < SymbolType, std::vector < SymbolType > > & inner = symbol.template get < std::pair < SymbolType, std::vector < SymbolType > > > ( ); + return std::variant < std::vector < DefaultSymbolType >, std::pair < DefaultSymbolType, std::vector < DefaultSymbolType > > > ( std::make_pair ( normalizeSymbol ( std::move ( inner.first ) ), normalizeSymbols ( std::move ( inner.second ) ) ) ); + } +} + +template < class SymbolType > +std::variant < std::vector < DefaultSymbolType >, std::pair < std::vector < DefaultSymbolType >, DefaultSymbolType > > GrammarNormalize::normalizeRHS ( std::variant < std::vector < SymbolType >, std::pair < std::vector < SymbolType >, SymbolType > > && symbol ) { + if ( symbol.template is < std::vector < SymbolType > > ( ) ) { + return std::variant < std::vector < DefaultSymbolType >, std::pair < std::vector < DefaultSymbolType >, DefaultSymbolType > > ( normalizeSymbols ( std::move ( symbol.template get < std::vector < SymbolType > > ( ) ) ) ); + } else { + std::pair < std::vector < SymbolType >, SymbolType > & inner = symbol.template get < std::pair < std::vector < SymbolType >, SymbolType > > ( ); + return std::variant < std::vector < DefaultSymbolType >, std::pair < std::vector < DefaultSymbolType >, DefaultSymbolType > > ( std::make_pair ( normalizeSymbols ( std::move ( inner.first ) ), normalizeSymbol ( std::move ( inner.second ) ) ) ); + } +} + +template < class SymbolType > +std::variant < std::vector < DefaultSymbolType >, std::tuple < std::vector < DefaultSymbolType >, DefaultSymbolType, std::vector < DefaultSymbolType > > > GrammarNormalize::normalizeRHS ( std::variant < std::vector < SymbolType >, std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > && symbols ) { + if ( symbols.template is < std::vector < SymbolType > > ( ) ) { + return std::variant < std::vector < DefaultSymbolType >, std::tuple < std::vector < DefaultSymbolType >, DefaultSymbolType, std::vector < DefaultSymbolType > > > ( normalizeSymbols ( std::move ( symbols.template get < std::vector < SymbolType > > ( ) ) ) ); + } else { + std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > & inner = symbols.template get < std::tuple < std::vector < SymbolType >, SymbolType, std::vector < SymbolType > > > ( ); + + std::vector < DefaultSymbolType > first = normalizeSymbols ( std::move ( std::get < 0 > ( inner ) ) ); + DefaultSymbolType second = normalizeSymbol ( std::move ( std::get < 1 > ( inner ) ) ); + std::vector < DefaultSymbolType > third = normalizeSymbols ( std::move ( std::get < 2 > ( inner ) ) ); + + return std::variant < std::vector < DefaultSymbolType >, std::tuple < std::vector < DefaultSymbolType >, DefaultSymbolType, std::vector < DefaultSymbolType > > > ( std::make_tuple ( std::move ( first ), std::move ( second ), std::move ( third ) ) ); + } +} + +} /* namespace grammar */ + +#endif /* GRAMMAR_NORMALIZE_H_ */ -- GitLab