From f223545df2fa02a278a59e817b544398f86a9973 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Mon, 17 Sep 2018 15:33:34 +0200 Subject: [PATCH] template RightRG in regexp to grammar algo --- alib2algo/src/regexp/convert/ToGrammar.cpp | 4 +- alib2algo/src/regexp/convert/ToGrammar.h | 23 +++-- .../convert/ToGrammarRightRGGlushkov.cpp | 78 +--------------- .../regexp/convert/ToGrammarRightRGGlushkov.h | 91 ++++++++++++++++++- 4 files changed, 107 insertions(+), 89 deletions(-) diff --git a/alib2algo/src/regexp/convert/ToGrammar.cpp b/alib2algo/src/regexp/convert/ToGrammar.cpp index c1aaa22a67..0bb63c7a90 100644 --- a/alib2algo/src/regexp/convert/ToGrammar.cpp +++ b/alib2algo/src/regexp/convert/ToGrammar.cpp @@ -12,8 +12,8 @@ namespace regexp { namespace convert { -auto ToGrammarFormalRegExp = registration::AbstractRegister < ToGrammar, grammar::RightRG < >, const regexp::FormalRegExp < > & > ( ToGrammar::convert ); -auto ToGrammarUnboundedRegExp = registration::AbstractRegister < ToGrammar, grammar::RightRG < >, const regexp::UnboundedRegExp < > & > ( ToGrammar::convert ); +auto ToGrammarFormalRegExp = registration::AbstractRegister < ToGrammar, grammar::RightRG < DefaultSymbolType, ext::pair < DefaultSymbolType, unsigned > >, const regexp::FormalRegExp < > & > ( ToGrammar::convert ); +auto ToGrammarUnboundedRegExp = registration::AbstractRegister < ToGrammar, grammar::RightRG < DefaultSymbolType, ext::pair < DefaultSymbolType, unsigned > >, const regexp::UnboundedRegExp < > & > ( ToGrammar::convert ); } /* namespace convert */ diff --git a/alib2algo/src/regexp/convert/ToGrammar.h b/alib2algo/src/regexp/convert/ToGrammar.h index 5a09b62ad0..c4f386c235 100644 --- a/alib2algo/src/regexp/convert/ToGrammar.h +++ b/alib2algo/src/regexp/convert/ToGrammar.h @@ -18,23 +18,32 @@ class ToGrammar { public: /** * Performs conversion. - * @return right regular grammar equivalent to source regexp. + * + * \tparam SymbolType type of symbols in the regexp + * + * \param regexp the converted regexp + * + * \return right regular grammar equivalent to source regexp. */ template < class SymbolType > - static grammar::RightRG < > convert(const regexp::FormalRegExp < SymbolType > & regexp); + static grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > convert(const regexp::FormalRegExp < SymbolType > & regexp); + + /** + * \overload + */ template < class SymbolType > - static grammar::RightRG < > convert(const regexp::UnboundedRegExp < SymbolType > & regexp); + static grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > convert(const regexp::UnboundedRegExp < SymbolType > & regexp); }; template < class SymbolType > -grammar::RightRG < > ToGrammar::convert(const regexp::FormalRegExp < SymbolType > & regexp) { - return grammar::RightRG < >(ToGrammarRightRGGlushkov::convert(regexp)); +grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > ToGrammar::convert ( const regexp::FormalRegExp < SymbolType > & regexp ) { + return ToGrammarRightRGGlushkov::convert ( regexp ); } template < class SymbolType > -grammar::RightRG < > ToGrammar::convert(const regexp::UnboundedRegExp < SymbolType > & regexp) { - return grammar::RightRG < >(ToGrammarRightRGGlushkov::convert(regexp)); +grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > ToGrammar::convert ( const regexp::UnboundedRegExp < SymbolType > & regexp ) { + return ToGrammarRightRGGlushkov::convert ( regexp); } } /* namespace convert */ diff --git a/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp b/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp index cc47bb2c46..ee9e2abcda 100644 --- a/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp +++ b/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.cpp @@ -6,88 +6,14 @@ */ #include "ToGrammarRightRGGlushkov.h" - -#include <alib/algorithm> - -#include <alphabet/InitialSymbol.h> - -#include "../glushkov/GlushkovFollow.h" -#include "../glushkov/GlushkovIndexate.h" -#include "../glushkov/GlushkovFirst.h" -#include "../glushkov/GlushkovLast.h" - -#include <regexp/properties/RegExpEpsilon.h> -#include <exception/CommonException.h> #include <registration/AlgoRegistration.hpp> -#include <container/ObjectsPair.h> - namespace regexp { namespace convert { -grammar::RightRG < > ToGrammarRightRGGlushkov::convert ( const regexp::UnboundedRegExp < > & regexp ) { - DefaultSymbolType S = alphabet::InitialSymbol::instance < DefaultSymbolType > ( ); - grammar::RightRG < > grammar ( S ); - - // step 1 - grammar.setTerminalAlphabet ( regexp.getAlphabet ( ) ); - - regexp::UnboundedRegExp < ext::pair < DefaultSymbolType, unsigned > > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); - - // steps 2, 3, 4 - const ext::set < regexp::UnboundedRegExpSymbol < ext::pair < DefaultSymbolType, unsigned > > > first = regexp::GlushkovFirst::first ( indexedRegExp ); - const ext::set < regexp::UnboundedRegExpSymbol < ext::pair < DefaultSymbolType, unsigned > > > last = regexp::GlushkovLast::last ( indexedRegExp ); - - // \e in q0 check is in step 7 - - // step 5 - for ( const ext::pair < DefaultSymbolType, unsigned > & symbol : indexedRegExp.getAlphabet ( ) ) - grammar.addNonterminalSymbol ( DefaultSymbolType ( container::ObjectsPair < DefaultSymbolType, unsigned > ( symbol ) ) ); - - // step 6 - for ( const regexp::UnboundedRegExpSymbol < ext::pair < DefaultSymbolType, unsigned > > & symbol : first ) - grammar.addRule ( S, ext::make_pair ( symbol.getSymbol ( ).first, DefaultSymbolType ( container::ObjectsPair < DefaultSymbolType, unsigned > ( symbol.getSymbol ( ) ) ) ) ); - - for ( const ext::pair < DefaultSymbolType, unsigned > & x : indexedRegExp.getAlphabet ( ) ) - for ( const regexp::UnboundedRegExpSymbol < ext::pair < DefaultSymbolType, unsigned > > & f : regexp::GlushkovFollow::follow ( indexedRegExp, UnboundedRegExpSymbol < ext::pair < DefaultSymbolType, unsigned > > ( x ) ) ) { - const ext::pair < DefaultSymbolType, unsigned > & a = x; - const ext::pair < DefaultSymbolType, unsigned > & b = f.getSymbol ( ); - - grammar.addRule ( DefaultSymbolType ( container::ObjectsPair < DefaultSymbolType, unsigned > ( a ) ), ext::make_pair ( b.first, DefaultSymbolType ( container::ObjectsPair < DefaultSymbolType, unsigned > ( b ) ) ) ); - } - - // step 7 - - /* - * for all rules where ns.m_nonTerminal is on rightSide: - * add Rule: leftSide -> symbol.getSymbol( ) - * unless it already exists - */ - for ( const auto & rule : grammar.getRules ( ) ) - for ( const auto & rhs : rule.second ) { - if ( ! rhs.is < ext::pair < DefaultSymbolType, DefaultSymbolType > > ( ) ) - continue; - - const ext::pair < DefaultSymbolType, DefaultSymbolType > & rawRhs = rhs.get < ext::pair < DefaultSymbolType, DefaultSymbolType > > ( ); - - for ( const regexp::UnboundedRegExpSymbol < ext::pair < DefaultSymbolType, unsigned > > & symbol : last ) - if ( rawRhs.second == DefaultSymbolType ( container::ObjectsPair < DefaultSymbolType, unsigned > ( symbol.getSymbol ( ) ) ) ) - grammar.addRule ( rule.first, rawRhs.first ); - } - - if ( regexp::properties::RegExpEpsilon::languageContainsEpsilon ( regexp ) ) - grammar.setGeneratesEpsilon ( true ); - - return grammar; -} - -grammar::RightRG < > ToGrammarRightRGGlushkov::convert ( const regexp::FormalRegExp < > & regexp ) { - return ToGrammarRightRGGlushkov::convert ( regexp::UnboundedRegExp < > ( regexp ) ); -} - -auto ToGrammarRightRGGlushkovUnboundedRegExp = registration::AbstractRegister < ToGrammarRightRGGlushkov, grammar::RightRG < >, const regexp::UnboundedRegExp < > & > ( ToGrammarRightRGGlushkov::convert ); -auto ToGrammarRightRGGlushkovFormalRegExp = registration::AbstractRegister < ToGrammarRightRGGlushkov, grammar::RightRG < >, const regexp::FormalRegExp < > & > ( ToGrammarRightRGGlushkov::convert ); +auto ToGrammarRightRGGlushkovUnboundedRegExp = registration::AbstractRegister < ToGrammarRightRGGlushkov, grammar::RightRG < DefaultSymbolType, ext::pair < DefaultSymbolType, unsigned > >, const regexp::UnboundedRegExp < > & > ( ToGrammarRightRGGlushkov::convert ); +auto ToGrammarRightRGGlushkovFormalRegExp = registration::AbstractRegister < ToGrammarRightRGGlushkov, grammar::RightRG < DefaultSymbolType, ext::pair < DefaultSymbolType, unsigned > >, const regexp::FormalRegExp < > & > ( ToGrammarRightRGGlushkov::convert ); } /* namespace convert */ diff --git a/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.h b/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.h index 3fe820d47b..d6588235a1 100644 --- a/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.h +++ b/alib2algo/src/regexp/convert/ToGrammarRightRGGlushkov.h @@ -12,6 +12,17 @@ #include <regexp/formal/FormalRegExp.h> #include <regexp/unbounded/UnboundedRegExp.h> +#include <alib/algorithm> + +#include <alphabet/InitialSymbol.h> + +#include "../glushkov/GlushkovFollow.h" +#include "../glushkov/GlushkovIndexate.h" +#include "../glushkov/GlushkovFirst.h" +#include "../glushkov/GlushkovLast.h" + +#include <regexp/properties/RegExpEpsilon.h> + namespace regexp { namespace convert { @@ -24,13 +35,85 @@ class ToGrammarRightRGGlushkov { public: /** * Performs conversion. - * @param regexp original regular expression - * @return right regular grammar equivalent to source regexp. + * + * \tparam SymbolType tye of symbols in the regular expression + * + * \param regexp original regular expression + * + * \return right regular grammar equivalent to source regexp. + */ + template < class SymbolType > + static grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > convert(const regexp::FormalRegExp < SymbolType > & regexp); + + /** + * \override */ - static grammar::RightRG < > convert(const regexp::FormalRegExp < > & regexp); - static grammar::RightRG < > convert(const regexp::UnboundedRegExp < > & regexp); + template < class SymbolType > + static grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > convert(const regexp::UnboundedRegExp < SymbolType > & regexp); }; +template < class SymbolType > +grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > ToGrammarRightRGGlushkov::convert ( const regexp::UnboundedRegExp < SymbolType > & regexp ) { + ext::pair < SymbolType, unsigned > S = ext::make_pair ( alphabet::InitialSymbol::instance < SymbolType > ( ), 0 ); + grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > grammar ( S ); + + // step 1 + grammar.setTerminalAlphabet ( regexp.getAlphabet ( ) ); + + regexp::UnboundedRegExp < ext::pair < SymbolType, unsigned > > indexedRegExp = regexp::GlushkovIndexate::index ( regexp ); + + // steps 2, 3, 4 + const ext::set < regexp::UnboundedRegExpSymbol < ext::pair < SymbolType, unsigned > > > first = regexp::GlushkovFirst::first ( indexedRegExp ); + const ext::set < regexp::UnboundedRegExpSymbol < ext::pair < SymbolType, unsigned > > > last = regexp::GlushkovLast::last ( indexedRegExp ); + + // \e in q0 check is in step 7 + + // step 5 + for ( const auto & nonterminal : indexedRegExp.getAlphabet ( ) ) + grammar.addNonterminalSymbol ( nonterminal ); + + // step 6 + for ( const regexp::UnboundedRegExpSymbol < ext::pair < SymbolType, unsigned > > & symbol : first ) + grammar.addRule ( S, ext::make_pair ( symbol.getSymbol ( ).first, symbol.getSymbol ( ) ) ); + + for ( const ext::pair < SymbolType, unsigned > & x : indexedRegExp.getAlphabet ( ) ) + for ( const regexp::UnboundedRegExpSymbol < ext::pair < SymbolType, unsigned > > & f : regexp::GlushkovFollow::follow ( indexedRegExp, UnboundedRegExpSymbol < ext::pair < SymbolType, unsigned > > ( x ) ) ) { + const ext::pair < SymbolType, unsigned > & a = x; + const ext::pair < SymbolType, unsigned > & b = f.getSymbol ( ); + + grammar.addRule ( a, ext::make_pair ( b.first, b ) ); + } + + // step 7 + + /* + * for all rules where ns.m_nonTerminal is on rightSide: + * add Rule: leftSide -> symbol.getSymbol( ) + * unless it already exists + */ + for ( const auto & rule : grammar.getRules ( ) ) + for ( const auto & rhs : rule.second ) { + if ( ! rhs.template is < ext::pair < SymbolType, ext::pair < SymbolType, unsigned > > > ( ) ) + continue; + + const ext::pair < SymbolType, ext::pair < SymbolType, unsigned > > & rawRhs = rhs.template get < ext::pair < SymbolType, ext::pair < SymbolType, unsigned > > > ( ); + + for ( const regexp::UnboundedRegExpSymbol < ext::pair < SymbolType, unsigned > > & symbol : last ) + if ( rawRhs.second == symbol.getSymbol ( ) ) + grammar.addRule ( rule.first, rawRhs.first ); + } + + if ( regexp::properties::RegExpEpsilon::languageContainsEpsilon ( regexp ) ) + grammar.setGeneratesEpsilon ( true ); + + return grammar; +} + +template < class SymbolType > +grammar::RightRG < SymbolType, ext::pair < SymbolType, unsigned > > ToGrammarRightRGGlushkov::convert ( const regexp::FormalRegExp < SymbolType > & regexp ) { + return ToGrammarRightRGGlushkov::convert ( regexp::UnboundedRegExp < SymbolType > ( regexp ) ); +} + } /* namespace convert */ } /* namespace regexp */ -- GitLab