From b1ab92584c1a54b3a96190a43fa5726d97f08b04 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 20 Sep 2018 14:41:22 +0200 Subject: [PATCH] redesign regexp derivation based conversions --- .../regexp/convert/ToAutomatonDerivation.h | 70 +++++---------- .../convert/ToGrammarRightRGDerivation.cpp | 87 +------------------ .../convert/ToGrammarRightRGDerivation.h | 64 +++++++++++++- 3 files changed, 87 insertions(+), 134 deletions(-) diff --git a/alib2algo/src/regexp/convert/ToAutomatonDerivation.h b/alib2algo/src/regexp/convert/ToAutomatonDerivation.h index ce28ae7fec..b0b53a9c3d 100644 --- a/alib2algo/src/regexp/convert/ToAutomatonDerivation.h +++ b/alib2algo/src/regexp/convert/ToAutomatonDerivation.h @@ -46,66 +46,42 @@ automaton::DFA < SymbolType, unsigned > ToAutomatonDerivation::convert(const T& // 1. T V = regexp::simplify::RegExpOptimize::optimize(regexp); - ext::set<T> Q = { V }; - ext::deque<ext::set<T>> Qi; + ext::deque < T > Qi; - Qi.push_back(ext::set<T>()); - Qi.at(0).insert(V); + Qi.push_back ( V ); - int i = 1; - - // 2. - while(! Qi.at(i - 1).empty()) { - Qi.push_back(ext::set<T>()); // initialize set Q_i - - for(const auto& dregexp : Qi.at(i - 1)) { - - for(const auto& a : regexp.getAlphabet()) { - T derived = regexp::RegExpDerivation::derivation(dregexp, a); - derived = regexp::simplify::RegExpOptimize::optimize(derived); - - // this will also add \emptyset as a regexp (and as FA state) - if(Q.count(derived) == 0) // if this state has already been found, do not add - Qi.at(i).insert(derived); - } - } - - Q.insert(Qi.at(i).begin(), Qi.at(i).end()); - i += 1; - } - - ext::map<T, unsigned> stateMap; + ext::map < T, unsigned> stateMap; unsigned stateId = 0; + stateMap.insert ( std::make_pair ( V, stateId ++ ) ); - for(const auto& r : Q) { - stateMap.insert ( std::make_pair ( r, stateId ++ ) ); - } - - // ------------------------------------------------------------------------ - // 3. - - automaton::DFA < SymbolType, unsigned > automaton ( stateMap.find ( V )->second ); - - for(const auto& r : stateMap) { - automaton.addState(r.second); - } - + automaton::DFA < SymbolType, unsigned > automaton ( stateMap.at ( V ) ); automaton.setInputAlphabet(regexp.getAlphabet()); - for(const auto& r : Q) { + // 2., 3. + while(! Qi.empty()) { + T r = std::move ( Qi.back ( ) ); // initialize set Q_i + Qi.pop_back ( ); - for(const auto& a: regexp.getAlphabet()) { + for(const auto& a : regexp.getAlphabet()) { T derived = regexp::RegExpDerivation::derivation(r, a); derived = regexp::simplify::RegExpOptimize::optimize(derived); - automaton.addTransition(stateMap.find(r)->second, a, stateMap.find(derived)->second); + // this will also add \emptyset as a regexp (and as FA state) + if(stateMap.count(derived) == 0) { // if this state has already been found, do not add + Qi.push_back(derived); + automaton.addState ( stateId ); + stateMap.insert ( std::make_pair ( derived, stateId ++ ) ); + + if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(derived)) + automaton.addFinalState(stateMap.at(derived)); + } + + automaton.addTransition(stateMap.at(r), a, stateMap.at(derived)); } } - for(const auto& r : Q) { - if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(r)) - automaton.addFinalState(stateMap.find(r)->second); - } + if(regexp::properties::RegExpEpsilon::languageContainsEpsilon ( V ) ) + automaton.addFinalState(stateMap.at( V )); return automaton; } diff --git a/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.cpp b/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.cpp index 3f1d2bb64a..b6f2df7a1a 100644 --- a/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.cpp +++ b/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.cpp @@ -6,97 +6,14 @@ */ #include "ToGrammarRightRGDerivation.h" - -#include <alib/set> -#include <alib/deque> -#include <alib/vector> -#include <alib/hexavigesimal> - -#include <common/createUnique.hpp> - -#include <regexp/simplify/RegExpOptimize.h> -#include <regexp/transform/RegExpDerivation.h> -#include <regexp/properties/RegExpEpsilon.h> #include <registration/AlgoRegistration.hpp> namespace regexp { namespace convert { -template<class T> -grammar::RightRG < > ToGrammarRightRGDerivation::convert(const T& regexp) { - // 1. - T V = regexp::simplify::RegExpOptimize::optimize(regexp); - - ext::set<T> N = { V }; - ext::deque<ext::set<T>> Ni; - - Ni.push_back(ext::set<T>()); - Ni.at(0).insert(V); - - int i = 1; - - // 2. - while(! Ni.at(i - 1).empty()) { - Ni.push_back(ext::set<T>()); // initialize set Q_i - - for(const auto & dregexp : Ni.at( i - 1 )) { - for(const auto & a : regexp.getAlphabet()) { - T derived = regexp::RegExpDerivation::derivation(dregexp, a); - derived = regexp::simplify::RegExpOptimize::optimize(derived); - - // this will also add \emptyset as a regexp (and as FA state) - if(N.count(derived) == 0) // if this state has already been found, do not add - Ni.at(i).insert(derived); - } - } - - N.insert(Ni.at(i).begin(), Ni.at(i).end()); - i += 1; - } - - // ------------------------------------------------------------------------ - // 3. - - int nonterminalId = 0; - ext::map<T, DefaultSymbolType> nonterminalMap; - - DefaultSymbolType ntV(nonterminalId++); - nonterminalMap.insert(std::make_pair(V, ntV)); - - grammar::RightRG < > grammar(ntV); - grammar.setTerminalAlphabet(regexp.getAlphabet()); - - for(const auto & r : N) { - if(V == r) continue; - - DefaultSymbolType nt = common::createUnique(DefaultSymbolType(ext::toBase26(nonterminalId++)), grammar.getTerminalAlphabet(), grammar.getNonterminalAlphabet()); - nonterminalMap.insert(std::make_pair(r, nt)); - grammar.addNonterminalSymbol(nt); - } - - for(const auto & r : N) { - for(const auto & a : regexp.getAlphabet()) { - T derived = regexp::RegExpDerivation::derivation(r, a); - derived = regexp::simplify::RegExpOptimize::optimize(derived); - - grammar.addRule(nonterminalMap.find(r)->second, ext::make_pair(a, nonterminalMap.find(derived)->second)); - - if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(derived)) - grammar.addRule(nonterminalMap.find(r)->second, a); - } - } - - grammar.setInitialSymbol(nonterminalMap.find(V)->second); - - if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(V)) - grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY! - - return grammar; -} - -auto ToGrammarRightRGDerivationUnboundedRegExp = registration::AbstractRegister < ToGrammarRightRGDerivation, grammar::RightRG < >, const regexp::UnboundedRegExp < > & > ( ToGrammarRightRGDerivation::convert ); -auto ToGrammarRightRGDerivationFormalRegExp = registration::AbstractRegister < ToGrammarRightRGDerivation, grammar::RightRG < >, const regexp::FormalRegExp < > & > ( ToGrammarRightRGDerivation::convert ); +auto ToGrammarRightRGDerivationUnboundedRegExp = registration::AbstractRegister < ToGrammarRightRGDerivation, grammar::RightRG < DefaultSymbolType, unsigned >, const regexp::UnboundedRegExp < > & > ( ToGrammarRightRGDerivation::convert ); +auto ToGrammarRightRGDerivationFormalRegExp = registration::AbstractRegister < ToGrammarRightRGDerivation, grammar::RightRG < DefaultSymbolType, unsigned >, const regexp::FormalRegExp < > & > ( ToGrammarRightRGDerivation::convert ); } /* namespace convert */ diff --git a/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.h b/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.h index b708b30217..2791abadf9 100644 --- a/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.h +++ b/alib2algo/src/regexp/convert/ToGrammarRightRGDerivation.h @@ -9,9 +9,21 @@ #define TO_GRAMMAR_RIGHT_RG_DERIVATION_H_ #include <grammar/Regular/RightRG.h> +#include <regexp/RegExp.h> #include <regexp/formal/FormalRegExp.h> #include <regexp/unbounded/UnboundedRegExp.h> +#include <alib/set> +#include <alib/deque> +#include <alib/vector> +#include <alib/hexavigesimal> + +#include <common/createUnique.hpp> + +#include <regexp/simplify/RegExpOptimize.h> +#include <regexp/transform/RegExpDerivation.h> +#include <regexp/properties/RegExpEpsilon.h> + namespace regexp { namespace convert { @@ -26,11 +38,59 @@ public: * Performs conversion. * @return right regular grammar equivalent to source regexp. */ - template <class T> - static grammar::RightRG < > convert(const T& regexp); + template < class T, class SymbolType = typename regexp::SymbolTypeOfRegexp < T > > + static grammar::RightRG < SymbolType, unsigned > convert ( const T & regexp ); }; +template < class T, class SymbolType > +grammar::RightRG < SymbolType, unsigned > ToGrammarRightRGDerivation::convert ( const T & regexp ) { + // 1. + T V = regexp::simplify::RegExpOptimize::optimize(regexp); + + // 2., 3. + unsigned nonterminalId = 0; + ext::map < T, unsigned > nonterminalMap; + + unsigned ntV = common::createUnique ( nonterminalId ++, regexp.getAlphabet ( ) ); + nonterminalMap.insert ( std::make_pair ( V, ntV ) ); + + grammar::RightRG < SymbolType, unsigned > grammar(ntV); + grammar.setTerminalAlphabet ( regexp.getAlphabet ( ) ); + + ext::deque < T > Ni; + + Ni.push_back ( V ); + + while(! Ni.empty()) { + T r = std::move ( Ni.back ( ) ); + Ni.pop_back ( ); + + for(const auto & a : regexp.getAlphabet()) { + T derived = regexp::RegExpDerivation::derivation(r, a); + derived = regexp::simplify::RegExpOptimize::optimize(derived); + + // this will also add \emptyset as a regexp (and as FA state) + if ( nonterminalMap.count(derived) == 0) { // if this state has already been found, do not add + Ni.push_back(derived); + unsigned nt = common::createUnique ( nonterminalId ++, grammar.getTerminalAlphabet ( ), grammar.getNonterminalAlphabet ( ) ); + grammar.addNonterminalSymbol ( nt ); + nonterminalMap.insert ( derived, nt ); + } + + if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(derived)) + grammar.addRule(nonterminalMap.at(r), a); + + grammar.addRule(nonterminalMap.at(r), ext::make_pair(a, nonterminalMap.at(derived))); + } + } + + if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(V)) + grammar.setGeneratesEpsilon(true); // okay, because of this feature we do not have to bother with extending the grammar with new rules and nonterminals. YAY! + + return grammar; +} + } /* namespace convert */ } /* namespace regexp */ -- GitLab