diff --git a/alib2algo/src/regexp/convert/ToAutomatonDerivation.cpp b/alib2algo/src/regexp/convert/ToAutomatonDerivation.cpp index 867b1fb9fd0f5bb37bc311a398bd8f27570cb4ef..7feb5ae6dbad1f4ab24a9334d2fafb4f90822719 100644 --- a/alib2algo/src/regexp/convert/ToAutomatonDerivation.cpp +++ b/alib2algo/src/regexp/convert/ToAutomatonDerivation.cpp @@ -6,93 +6,12 @@ */ #include "ToAutomatonDerivation.h" - -#include <alib/set> -#include <alib/deque> -#include <alib/vector> - -#include <string/LinearString.h> - -#include <regexp/transform/RegExpDerivation.h> -#include <regexp/simplify/RegExpOptimize.h> -#include <regexp/properties/RegExpEpsilon.h> #include <registration/AlgoRegistration.hpp> namespace regexp { namespace convert { -template < class T, class SymbolType > -automaton::DFA < SymbolType, unsigned > ToAutomatonDerivation::convert(const T& regexp) { - // 1. - T V = regexp::simplify::RegExpOptimize::optimize(regexp); - - ext::set<T> Q = { V }; - ext::deque<ext::set<T>> Qi; - - Qi.push_back(ext::set<T>()); - Qi.at(0).insert(V); - - int i = 1; - - // 2. - while(! Qi.at(i - 1).empty()) { - Qi.push_back(ext::set<T>()); // initialize set Q_i - - for(const auto& dregexp : Qi.at(i - 1)) { - - for(const auto& a : regexp.getAlphabet()) { - string::LinearString < > string(ext::vector<DefaultSymbolType>{a}); - T derived = regexp::RegExpDerivation::derivation(dregexp, string); - derived = regexp::simplify::RegExpOptimize::optimize(derived); - - // this will also add \emptyset as a regexp (and as FA state) - if(Q.count(derived) == 0) // if this state has already been found, do not add - Qi.at(i).insert(derived); - } - } - - Q.insert(Qi.at(i).begin(), Qi.at(i).end()); - i += 1; - } - - ext::map<T, unsigned> stateMap; - unsigned stateId = 0; - - for(const auto& r : Q) { - stateMap.insert ( std::make_pair ( r, stateId ++ ) ); - } - - // ------------------------------------------------------------------------ - // 3. - - automaton::DFA < SymbolType, unsigned > automaton ( stateMap.find ( V )->second ); - - for(const auto& r : stateMap) { - automaton.addState(r.second); - } - - automaton.setInputAlphabet(regexp.getAlphabet()); - - for(const auto& r : Q) { - - for(const auto& a: regexp.getAlphabet()) { - string::LinearString < > string(ext::vector<DefaultSymbolType>{a}); - T derived = regexp::RegExpDerivation::derivation(r, string); - derived = regexp::simplify::RegExpOptimize::optimize(derived); - - automaton.addTransition(stateMap.find(r)->second, a, stateMap.find(derived)->second); - } - } - - for(const auto& r : Q) { - if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(r)) - automaton.addFinalState(stateMap.find(r)->second); - } - - return automaton; -} - auto ToAutomatonDerivationFormalRegExp = registration::AbstractRegister < ToAutomatonDerivation, automaton::DFA < DefaultSymbolType, unsigned >, const regexp::FormalRegExp < > & > ( ToAutomatonDerivation::convert ); auto ToAutomatonDerivationUnboundedRegExp = registration::AbstractRegister < ToAutomatonDerivation, automaton::DFA < DefaultSymbolType, unsigned >, const regexp::UnboundedRegExp < > & > ( ToAutomatonDerivation::convert ); diff --git a/alib2algo/src/regexp/convert/ToAutomatonDerivation.h b/alib2algo/src/regexp/convert/ToAutomatonDerivation.h index 64b34101c3a3cdc9de5211163a5a30e12b5cfbe8..b9b9ee12bff43a854e2bdecaae9ca4c93d91bbde 100644 --- a/alib2algo/src/regexp/convert/ToAutomatonDerivation.h +++ b/alib2algo/src/regexp/convert/ToAutomatonDerivation.h @@ -15,6 +15,16 @@ #include <regexp/RegExp.h> +#include <alib/set> +#include <alib/deque> +#include <alib/vector> + +#include <string/LinearString.h> + +#include <regexp/transform/RegExpDerivation.h> +#include <regexp/simplify/RegExpOptimize.h> +#include <regexp/properties/RegExpEpsilon.h> + namespace regexp { namespace convert { @@ -33,6 +43,77 @@ public: static automaton::DFA < SymbolType, unsigned > convert ( const T & regexp ); }; +template < class T, class SymbolType > +automaton::DFA < SymbolType, unsigned > ToAutomatonDerivation::convert(const T& regexp) { + // 1. + T V = regexp::simplify::RegExpOptimize::optimize(regexp); + + ext::set<T> Q = { V }; + ext::deque<ext::set<T>> Qi; + + Qi.push_back(ext::set<T>()); + Qi.at(0).insert(V); + + int i = 1; + + // 2. + while(! Qi.at(i - 1).empty()) { + Qi.push_back(ext::set<T>()); // initialize set Q_i + + for(const auto& dregexp : Qi.at(i - 1)) { + + for(const auto& a : regexp.getAlphabet()) { + string::LinearString < > string(ext::vector<DefaultSymbolType>{a}); + T derived = regexp::RegExpDerivation::derivation(dregexp, string); + derived = regexp::simplify::RegExpOptimize::optimize(derived); + + // this will also add \emptyset as a regexp (and as FA state) + if(Q.count(derived) == 0) // if this state has already been found, do not add + Qi.at(i).insert(derived); + } + } + + Q.insert(Qi.at(i).begin(), Qi.at(i).end()); + i += 1; + } + + ext::map<T, unsigned> stateMap; + unsigned stateId = 0; + + for(const auto& r : Q) { + stateMap.insert ( std::make_pair ( r, stateId ++ ) ); + } + + // ------------------------------------------------------------------------ + // 3. + + automaton::DFA < SymbolType, unsigned > automaton ( stateMap.find ( V )->second ); + + for(const auto& r : stateMap) { + automaton.addState(r.second); + } + + automaton.setInputAlphabet(regexp.getAlphabet()); + + for(const auto& r : Q) { + + for(const auto& a: regexp.getAlphabet()) { + string::LinearString < > string(ext::vector<DefaultSymbolType>{a}); + T derived = regexp::RegExpDerivation::derivation(r, string); + derived = regexp::simplify::RegExpOptimize::optimize(derived); + + automaton.addTransition(stateMap.find(r)->second, a, stateMap.find(derived)->second); + } + } + + for(const auto& r : Q) { + if(regexp::properties::RegExpEpsilon::languageContainsEpsilon(r)) + automaton.addFinalState(stateMap.find(r)->second); + } + + return automaton; +} + } /* namespace convert */ } /* namespace regexp */