From 35fb1d0963fb9f75841f586d5dffd7a58853d6ae Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Sat, 2 Dec 2017 20:52:23 +0100 Subject: [PATCH] document Context sensitive grammar --- alib2data/src/grammar/ContextSensitive/CSG.h | 334 ++++++++++++++++++- 1 file changed, 319 insertions(+), 15 deletions(-) diff --git a/alib2data/src/grammar/ContextSensitive/CSG.h b/alib2data/src/grammar/ContextSensitive/CSG.h index 0fb3e28c13..ccd1e52c6d 100644 --- a/alib2data/src/grammar/ContextSensitive/CSG.h +++ b/alib2data/src/grammar/ContextSensitive/CSG.h @@ -1,6 +1,22 @@ /* * CSG.h * + * This file is part of Algorithms library toolkit. + * Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz) + + * Algorithms library toolkit is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * Algorithms library toolkit is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with Algorithms library toolkit. If not, see <http://www.gnu.org/licenses/>. + * * Created on: Nov 17, 2013 * Author: Jan Travnicek */ @@ -30,110 +46,316 @@ namespace grammar { -/** - * Epsilon free context free grammar. Type 2 in Chomsky hierarchy. Produces context free languages. - */ class TerminalAlphabet; class NonterminalAlphabet; class InitialSymbol; +/** + * \brief + * Context sensitive grammar in Chomsky hierarchy or type 1 in Chomsky hierarchy. Generates context sensitive languages. + + * \details + * Definition is similar to all common definitions of context sensitive grammars. + * G = (N, T, P, S), + * N (NonterminalAlphabet) = nonempty finite set of nonterminal symbols, + * T (TerminalAlphabet) = finite set of terminal symbols - having this empty won't let grammar do much though, + * P = set of production rules of the form \alpha A \beta -> \alpha B \beta, where A \in N, B \in ( N \cup T )+ and \alpha, \beta \in ( N \cup T )*, + * S (InitialSymbol) = initial nonterminal symbol, + * + * \tparam SymbolType used for the terminal alphabet, the nonterminal alphabet, and the initial symbol of the grammar. + */ template < class SymbolType > class CSG final : public GrammarBase, public alib::Components < CSG < SymbolType >, ext::set < SymbolType >, component::Set, std::tuple < TerminalAlphabet, NonterminalAlphabet >, SymbolType, component::Value, InitialSymbol > { + /** + * Rules function as mapping from nonterminal symbol on the left hand side to a set of sequences of terminal and nonterminal symbols. + */ ext::map < ext::tuple < ext::vector < SymbolType >, SymbolType, ext::vector < SymbolType > >, ext::set < ext::vector < SymbolType > > > rules; + + /** + * Boolean signaling whether grammar generates empty string or don't. + */ bool generatesEpsilon; public: + /** + * \brief Creates a new instance of the grammar with a concrete initial symbol. + * + * \param initialSymbol the initial symbol of the grammar + */ explicit CSG ( SymbolType initialSymbol ); + /** + * \brief Creates a new instance of the grammar with a concrete nonterminal alphabet, terminal alphabet and initial symbol. + * + * \param nonTerminalSymbols the initial nonterminal alphabet + * \param terminalSymbols the initial terminal alphabet + * \param initialSymbol the initial symbol of the grammar + */ explicit CSG ( ext::set < SymbolType > nonTerminalSymbols, ext::set < SymbolType > terminalSymbols, SymbolType initialSymbol ); - virtual GrammarBase * clone ( ) const; - - virtual GrammarBase * plunder ( ) &&; - + /** + * @copydoc grammar::GrammarBase::clone() + */ + virtual GrammarBase * clone ( ) const override; + + /** + * @copydoc grammar::GrammarBase::clone() + */ + virtual GrammarBase * plunder ( ) && override; + + /** + * \brief Add a new rule of a grammar. + * + * \details The rule is in a form of \alpha A \beta -> \alpha B \beta, where A \in N, B \in ( N \cup T )+, and \alpha, \beta \in ( N \cup T )*. + * + * \param lContext the left context of the rule + * \param leftHandSide the left hand side of the rule + * \param rContext the right context of the rule + * \param rightHandSide the right hand side of the rule + * + * \returns true if the rule was indeed added, false othervise + */ bool addRule ( ext::vector < SymbolType > lContext, SymbolType leftHandSide, ext::vector < SymbolType > rContext, ext::vector < SymbolType > rightHandSide ); + /** + * \brief Add new rules of a grammar. + * + * \details The rules are in form of \alpha A \beta -> \alpha B \beta | \alpha C \beta | ..., where A \in N, B, C ... \in ( N \cup T )+, and \alpha, \beta \in ( N \cup T )*. + * + * \param lContext the left context of the rule + * \param leftHandSide the left hand side of the rule + * \param rContext the right context of the rule + * \param rightHandSide a set of right hand sides of the rule + */ void addRules ( ext::vector < SymbolType > lContext, SymbolType leftHandSide, ext::vector < SymbolType > rContext, ext::set < ext::vector < SymbolType > > rightHandSide ); + /** + * Get rules of the grammar. + * + * \returns rules of the grammar + */ const ext::map < ext::tuple < ext::vector < SymbolType >, SymbolType, ext::vector < SymbolType > >, ext::set < ext::vector < SymbolType > > > & getRules ( ) const &; + /** + * Get rules of the grammar. + * + * \returns rules of the grammar + */ ext::map < ext::tuple < ext::vector < SymbolType >, SymbolType, ext::vector < SymbolType > >, ext::set < ext::vector < SymbolType > > > && getRules ( ) &&; + /** + * Remove a rule of a grammar in form of \alpha A \beta -> \alpha B \beta, where A \in N, B \in (N \cup T)+, and \alpha, \beta \in ( N \cup T )*. + * + * \param lContext the left context of the rule + * \param leftHandSide the left hand side of the rule + * \param rContext the right context of the rule + * \param rightHandSide the right hand side of the rule + * + * \returns true if the rule was indeed removed, false othervise + */ bool removeRule ( const ext::vector < SymbolType > & lContext, const SymbolType & leftHandSide, const ext::vector < SymbolType > & rContext, const ext::vector < SymbolType > & rightHandSide ); + /** + * Getter of initial symbol. + * + * \returns the initial symbol of the grammar + */ const SymbolType & getInitialSymbol ( ) const & { return this->template accessComponent < InitialSymbol > ( ).get ( ); } + /** + * Getter of initial symbol. + * + * \returns the initial symbol of the grammar + */ SymbolType && getInitialSymbol ( ) && { return std::move ( this->template accessComponent < InitialSymbol > ( ).get ( ) ); } + /** + * Setter of initial symbol. + * + * \param symbol new initial symbol of the grammar + * + * \returns true if the initial symbol was indeed changed + */ bool setInitialSymbol ( SymbolType symbol ) { return this->template accessComponent < InitialSymbol > ( ).set ( std::move ( symbol ) ); } + /** + * Getter of nonterminal alphabet. + * + * \returns the nonterminal alphabet of the grammar + */ const ext::set < SymbolType > & getNonterminalAlphabet ( ) const & { return this->template accessComponent < NonterminalAlphabet > ( ).get ( ); } + /** + * Getter of nonterminal alphabet. + * + * \returns the nonterminal alphabet of the grammar + */ ext::set < SymbolType > && getNonterminalAlphabet ( ) && { return std::move ( this->template accessComponent < NonterminalAlphabet > ( ).get ( ) ); } + /** + * Adder of nonterminal symbol. + * + * \param symbol the new symbol to be added to nonterminal alphabet + * + * \returns true if the symbol was indeed added + */ bool addNonterminalSymbol ( SymbolType symbol ) { return this->template accessComponent < NonterminalAlphabet > ( ).add ( std::move ( symbol ) ); } + /** + * Setter of nonterminal alphabet. + * + * \param symbols completely new nonterminal alphabet + */ void setNonterminalAlphabet ( ext::set < SymbolType > symbols ) { this->template accessComponent < NonterminalAlphabet > ( ).set ( std::move ( symbols ) ); } + /** + * Getter of terminal alphabet. + * + * \returns the terminal alphabet of the grammar + */ const ext::set < SymbolType > & getTerminalAlphabet ( ) const & { return this->template accessComponent < TerminalAlphabet > ( ).get ( ); } + /** + * Getter of terminal alphabet. + * + * \returns the terminal alphabet of the grammar + */ ext::set < SymbolType > && getTerminalAlphabet ( ) && { return std::move ( this->template accessComponent < TerminalAlphabet > ( ).get ( ) ); } + /** + * Adder of terminal symbol. + * + * \param symbol the new symbol tuo be added to nonterminal alphabet + * + * \returns true if the symbol was indeed added + */ bool addTerminalSymbol ( SymbolType symbol ) { return this->template accessComponent < TerminalAlphabet > ( ).add ( std::move ( symbol ) ); } + /** + * Setter of terminal alphabet. + * + * \param symbol completely new nontemrinal alphabet + */ void setTerminalAlphabet ( ext::set < SymbolType > symbols ) { this->template accessComponent < TerminalAlphabet > ( ).set ( std::move ( symbols ) ); } + /** + * Sets sign representing that grammar generates or doesn't generate empty word. + * + * \param genEps sign representing the posibility of generating empty string from the grammar + */ void setGeneratesEpsilon ( bool genEps ); + + /** + * Gets sign representing that grammar generates or doesn't generate empty word. + * + * \returns sign representing the posibility of generating empty string from the grammar + */ bool getGeneratesEpsilon ( ) const; - virtual int compare ( const ObjectBase & other ) const { + /** + * @copydoc alib::CommonBase<ObjectBase>::compare ( const ObjectBase & ) + */ + virtual int compare ( const ObjectBase & other ) const override { if ( ext::type_index ( typeid ( * this ) ) == ext::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); return ext::type_index ( typeid ( * this ) ) - ext::type_index ( typeid ( other ) ); } - virtual int compare ( const CSG & other ) const; - - virtual void operator >>( std::ostream & os ) const; - - virtual explicit operator std::string ( ) const; - + /** + * Actual compare method + * + * \param other the other instance + * + * \returns actual relation between two by type same grammar instances + */ + int compare ( const CSG & other ) const; + + /** + * @copydoc alib::CommonBase<ObjectBase>::operator >> ( std::ostream & ) + */ + virtual void operator >>( std::ostream & os ) const override; + + /** + * @copydoc alib::CommonBase<ObjectBase>::operator std::string ( ) + */ + virtual explicit operator std::string ( ) const override; + + /** + * \brief The XML tag name of class. + * + * \details Intentionaly a static member function to be safe in the initialisation before the main function starts. + * + * \returns string representing the XML tag name of the class + */ static const std::string & getXmlTagName() { static std::string xmlTagName = "CSG"; return xmlTagName; } + /** + * Parsing from a sequence of xml tokens helper. + * + * \params input the iterator to sequence of xml tokens to parse from + * + * \returns the new instance of the grammar + */ static CSG parse ( ext::deque < sax::Token >::iterator & input ); + + /** + * Helper for parsing of individual rules of the grammar from a sequence of xml tokens. + * + * \params input the iterator to sequence of xml tokens to parse from + * \params grammar the grammar to add the rule to + */ static void parseRule ( ext::deque < sax::Token >::iterator & input, CSG & grammar ); + /** + * Composing to a sequence of xml tokens helper. + * + * \param out sink for new xml tokens representing the grammar + * \param grammar the grammar to compose + */ static void compose ( ext::deque < sax::Token > & out, const CSG & grammar ); + + /** + * Helper for composing rules of the grammar to a sequence of xml tokens. + * + * \param out sink for xml tokens representing the rules of the grammar + * \param grammar the grammar to compose + */ static void composeRules ( ext::deque < sax::Token > & out, const CSG & grammar ); - virtual alib::ObjectBase * inc ( ) &&; + /** + * @copydoc alib::GrammarBase::inc() + */ + virtual alib::ObjectBase * inc ( ) && override; + /** + * Type of normalized grammar. + */ typedef CSG < > normalized_type; }; @@ -331,9 +553,22 @@ alib::ObjectBase* CSG < SymbolType >::inc() && { namespace alib { +/** + * Helper class specifying constraints for the grammar's internal terminal alphabet component. + * + * \tparam SymbolType used for the terminal alphabet of the grammar. + */ template < class SymbolType > class SetConstraint< grammar::CSG < SymbolType >, SymbolType, grammar::TerminalAlphabet > { public: + /** + * Returns true if the terminal symbol is still used in some rule of the grammar. + * + * \param grammar the tested grammar + * \param symbol the tested symbol + * + * \returns true if the symbol is used, false othervise + */ static bool used ( const grammar::CSG < SymbolType > & grammar, const SymbolType & symbol ) { for ( const std::pair < const ext::tuple < ext::vector < SymbolType >, SymbolType, ext::vector < SymbolType > >, ext::set < ext::vector < SymbolType > > > & rule : grammar.getRules ( ) ) { for ( const SymbolType & lCont : std::get < 0 > ( rule.first ) ) @@ -353,19 +588,48 @@ public: return false; } + /** + * Returns true as all terminal symbols are possibly available to be terminal symbols. + * + * \param grammar the tested grammar + * \param symbol the tested symbol + * + * \returns true + */ static bool available ( const grammar::CSG < SymbolType > &, const SymbolType & ) { return true; } + /** + * Throws runtime exception if the symbol requested to be terminal symbol is already in nonterminal alphabet. + * + * \param grammar the tested grammar + * \param symbol the tested symbol + * + * \throws grammar::GrammarException of the tested symbol is in nonterminal alphabet + */ static void valid ( const grammar::CSG < SymbolType > & grammar, const SymbolType & symbol ) { if ( grammar.template accessComponent < grammar::NonterminalAlphabet > ( ).get ( ).count ( symbol ) ) throw grammar::GrammarException ( "Symbol " + ext::to_string ( symbol ) + "cannot be in terminal alphabet since it is already nonterminal alphabet" ); } }; +/** + * Helper class specifying constraints for the grammar's internal nonterminal alphabet component. + * + * \tparam SymbolType used for the nonterminal alphabet of the grammar. + */ template < class SymbolType > class SetConstraint< grammar::CSG < SymbolType >, SymbolType, grammar::NonterminalAlphabet > { public: + /** + * Returns true if the nonterminal symbol is still used in some rule of the grammar or if it is the initial symbol of the grammar. + * + * \param grammar the tested grammar + * \param symbol the tested symbol + * + * \returns true if the symbol is used, false othervise + */ static bool used ( const grammar::CSG < SymbolType > & grammar, const SymbolType & symbol ) { for ( const std::pair < const ext::tuple < ext::vector < SymbolType >, SymbolType, ext::vector < SymbolType > >, ext::set < ext::vector < SymbolType > > > & rule : grammar.getRules ( ) ) { for ( const SymbolType & lCont : std::get < 0 > ( rule.first ) ) @@ -391,27 +655,67 @@ public: return false; } + /** + * Returns true as all terminal symbols are possibly available to be nonterminal symbols. + * + * \param grammar the tested grammar + * \param symbol the tested symbol + * + * \returns true + */ static bool available ( const grammar::CSG < SymbolType > &, const SymbolType & ) { return true; } + /** + * Throws runtime exception if the symbol requested to be nonterminal symbol is already in terminal alphabet. + * + * \param grammar the tested grammar + * \param symbol the tested symbol + * + * \throws grammar::GrammarException of the tested symbol is in nonterminal alphabet + */ static void valid ( const grammar::CSG < SymbolType > & grammar, const SymbolType & symbol ) { if ( grammar.template accessComponent < grammar::TerminalAlphabet > ( ).get ( ).count ( symbol ) ) throw grammar::GrammarException ( "Symbol " + ext::to_string ( symbol ) + "cannot be in nonterminal alphabet since it is already in terminal alphabet" ); } }; +/** + * Helper class specifying constraints for the grammar's internal initial symbol element. + * + * \tparam SymbolType used for the initial symbol of the grammar. + */ template < class SymbolType > class ElementConstraint< grammar::CSG < SymbolType >, SymbolType, grammar::InitialSymbol > { public: + /** + * Returns true if the symbol requested to be initial is available in nonterminal alphabet. + * + * \param grammar the tested grammar + * \param symbol the tested symbol + * + * \returns true if the tested symbol is in nonterminal alphabet + */ static bool available ( const grammar::CSG < SymbolType > & grammar, const SymbolType & symbol ) { return grammar.template accessComponent < grammar::NonterminalAlphabet > ( ).get ( ).count ( symbol ); } + /** + * All symbols are valid as initial symbols. + * + * \param grammar the tested grammar + * \param symbol the tested symbol + */ static void valid ( const grammar::CSG < SymbolType > &, const SymbolType & ) { } }; +/** + * Helper for normalisation of types specified by templates used as internal datatypes of symbols. + * + * \returns new instance of the grammar with default template parameters or unmodified instance if the template parameters were already default ones + */ template < class SymbolType > struct normalize < grammar::CSG < SymbolType >, typename std::enable_if < ! std::is_same < grammar::CSG < SymbolType >, grammar::CSG < > >::value >::type > { static grammar::CSG < > eval ( grammar::CSG < SymbolType > && value ) { -- GitLab