diff --git a/aconversions2/src/ConversionHandler.cpp b/aconversions2/src/ConversionHandler.cpp index 2b6e790d95c42b086d7b39d514083fb461bead2a..d523dced31a63f879d3d30bba48e806c072fdb71 100644 --- a/aconversions2/src/ConversionHandler.cpp +++ b/aconversions2/src/ConversionHandler.cpp @@ -204,8 +204,7 @@ void ConversionHandler::convertREtoFSM( void ) switch( m_algorithm ) { case BRZOZOWSKI_DERIVATION: { - conversions::re2fa::BrzozowskiDerivation conv; - alib::DataFactory::toStdout(conv.convert(regexp)); + alib::DataFactory::toStdout(conversions::re2fa::BrzozowskiDerivation::convert(regexp)); break; } case THOMPSON_NFA: { diff --git a/alib2algo/src/conversions/re2fa/BrzozowskiDerivation.cpp b/alib2algo/src/conversions/re2fa/BrzozowskiDerivation.cpp index e7937b3fd4ab6f2184232db8b38a1a76769dfea3..93032524fd1c279cecd82980a4f96d035dace9ac 100644 --- a/alib2algo/src/conversions/re2fa/BrzozowskiDerivation.cpp +++ b/alib2algo/src/conversions/re2fa/BrzozowskiDerivation.cpp @@ -2,7 +2,7 @@ * BrzozowskiDerivation.cpp * * Created on: 11. 1. 2014 - * Author: Tomas Pecka + * Author: Tomas Pecka */ #include "BrzozowskiDerivation.h" @@ -14,7 +14,6 @@ #include <string/LinearString.h> #include <std/hexavigesimal.h> -#include <label/StringLabel.h> #include "../../regexp/RegExpDerivation.h" #include "../../regexp/RegExpOptimize.h" @@ -26,110 +25,116 @@ namespace conversions namespace re2fa { -BrzozowskiDerivation::BrzozowskiDerivation(void){} -BrzozowskiDerivation::~BrzozowskiDerivation(void){} - - -void BrzozowskiDerivation::Visit(void* userData, const regexp::FormalRegExp& regexp) +automaton::Automaton BrzozowskiDerivation::convert(const regexp::RegExp& regexp) { - std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData; - out.first = regexp.getAlphabet(); - out.second = regexp::RegExpEpsilon::languageContainsEpsilon(regexp); + automaton::Automaton* out = NULL; + regexp.getData().Accept((void*) &out, BrzozowskiDerivation::BRZOZOWSKI_DERIVATION); + automaton::Automaton res = std::move(*out); + delete out; + return res; } -void BrzozowskiDerivation::Visit(void* userData, const regexp::UnboundedRegExp& regexp) + +template<class T> +automaton::NFA BrzozowskiDerivation::convert(const T& regexp) { - std::pair<std::set<alphabet::Symbol>, bool>& out = *(std::pair<std::set<alphabet::Symbol>, bool>*) userData; - out.first = regexp.getAlphabet(); - out.second = regexp::RegExpEpsilon::languageContainsEpsilon(regexp); + // 1. + regexp::RegExpOptimize opt; + regexp::RegExp V = regexp::RegExp{opt.optimize(regexp)}; + + std::set<regexp::RegExp> Q = { V }; + std::deque<std::set<regexp::RegExp>> Qi; + + Qi.push_back(std::set<regexp::RegExp>()); + Qi.at(0).insert(V); + + int i = 1; + + // 2. + while(! Qi.at(i - 1).empty()) + { + Qi.push_back(std::set<regexp::RegExp>()); // initialize set Q_i + + for(const auto& dregexp : Qi.at(i - 1)) + { + regexp::RegExpDerivation deriv; + + for(const auto& a : regexp.getAlphabet()) + { + string::LinearString string(std::vector<alphabet::Symbol>{a}); + regexp::RegExp derived = deriv.derivation(dregexp, string); + derived = opt.optimize(derived); + + // this will also add \emptyset as a regexp (and as FA state) + if(Q.count(derived) == 0) // if this state has already been found, do not add + Qi.at(i).insert(derived); + } + } + + Q.insert(Qi.at(i).begin(), Qi.at(i).end()); + i += 1; + } + + std::map<regexp::RegExp, automaton::State> stateMap; + int stateId = 0; + + for(const auto& r : Q) + { + automaton::State q(std::toBase26(stateId++)); + stateMap.insert(std::make_pair(r, q)); + } + + // ------------------------------------------------------------------------ + // 3. + + automaton::NFA automaton; + + for(const auto& r : stateMap) + { + automaton.addState(r.second); + } + + automaton.addInitialState(stateMap.find(V)->second); + + automaton.setInputSymbols(regexp.getAlphabet()); + + for(const auto& r : Q) + { + regexp::RegExpDerivation deriv; + + for(const auto& a: regexp.getAlphabet()) + { + string::LinearString string(std::vector<alphabet::Symbol>{a}); + regexp::RegExp derived = deriv.derivation(r, string); + derived = opt.optimize(derived); + + automaton.addTransition(stateMap.find(r)->second, a, stateMap.find(derived)->second); + } + } + + for(const auto& r : Q) + { + if(regexp::RegExpEpsilon::languageContainsEpsilon(r)) + automaton.addFinalState(stateMap.find(r)->second); + } + + return automaton; } -automaton::DFA BrzozowskiDerivation::convert(const regexp::RegExp& regexp) +template automaton::NFA BrzozowskiDerivation::convert(const regexp::FormalRegExp& regexp); +template automaton::NFA BrzozowskiDerivation::convert(const regexp::UnboundedRegExp& regexp); + +void BrzozowskiDerivation::Visit(void* userData, const regexp::FormalRegExp& regexp) const { - // 1. - regexp::RegExpOptimize opt; - regexp::RegExp V = opt.optimize(regexp); - - std::pair<std::set<alphabet::Symbol>, bool> out({}, false); - regexp.getData().Accept((void*) &out, *this); - const std::set<alphabet::Symbol>& alphabet = out.first; - - std::set<regexp::RegExp> Q = { V }; - std::deque<std::set<regexp::RegExp>> Qi; - - Qi.push_back(std::set<regexp::RegExp>()); - Qi.at(0).insert(V); - - int i = 1; - - // 2. - while(! Qi.at(i - 1).empty()) - { - Qi.push_back(std::set<regexp::RegExp>()); // initialize set Q_i - - for(const auto& dregexp : Qi.at(i - 1)) - { - regexp::RegExpDerivation deriv; - - for(const auto& a : alphabet) - { - string::LinearString string(std::vector<alphabet::Symbol>{a}); - regexp::RegExp derived = deriv.derivation(dregexp, string); - derived = opt.optimize(derived); - - // this will also add \emptyset as a regexp (and as FA state) - if(Q.count(derived) == 0) // if this state has already been found, do not add - Qi.at(i).insert(derived); - } - } - - Q.insert(Qi.at(i).begin(), Qi.at(i).end()); - i += 1; - } - - std::map<regexp::RegExp, automaton::State> stateMap; - int stateId = 0; - - for(const auto& r : Q) - { - automaton::State q(label::Label(label::StringLabel(std::toBase26(stateId++)))); - stateMap.insert(std::make_pair(r, q)); - } - - // ------------------------------------------------------------------------ - // 3. - - automaton::DFA automaton(stateMap.find(V)->second); - - for(const auto& r : stateMap) - { - automaton.addState(r.second); - } - - automaton.setInputSymbols(alphabet); - - for(const auto& r : Q) - { - regexp::RegExpDerivation deriv; - - for(const auto& a: alphabet) - { - string::LinearString string(std::vector<alphabet::Symbol>{a}); - regexp::RegExp derived = deriv.derivation(r, string); - derived = opt.optimize(derived); - - automaton.addTransition(stateMap.find(r)->second, a, stateMap.find(derived)->second); - } - } - - for(const auto& r : Q) - { - r.getData().Accept((void*) &out, *this); - if(out.second) // if(r.containsEmptyString()) - automaton.addFinalState(stateMap.find(r)->second); - } - - return automaton; + automaton::Automaton* &out = *((automaton::Automaton**) userData); + out = new automaton::Automaton(this->convert(regexp)); } +void BrzozowskiDerivation::Visit(void* userData, const regexp::UnboundedRegExp& regexp) const +{ + automaton::Automaton* &out = *((automaton::Automaton**) userData); + out = new automaton::Automaton(this->convert(regexp)); +} + +const BrzozowskiDerivation BrzozowskiDerivation::BRZOZOWSKI_DERIVATION; } /* namespace re2fa */ diff --git a/alib2algo/src/conversions/re2fa/BrzozowskiDerivation.h b/alib2algo/src/conversions/re2fa/BrzozowskiDerivation.h index 4344d295786b96448ea5729ef890f2a79dc17e52..a3c249dfe7ab5465b2547b4100af12c1139a50f6 100644 --- a/alib2algo/src/conversions/re2fa/BrzozowskiDerivation.h +++ b/alib2algo/src/conversions/re2fa/BrzozowskiDerivation.h @@ -2,7 +2,7 @@ * BrzozowskiDerivation.h * * Created on: 11. 1. 2014 - * Author: Tomas Pecka + * Author: Tomas Pecka */ #ifndef RE2FA_BRZOZOWSKIDERIVATION_H_ @@ -11,7 +11,9 @@ #include <regexp/RegExp.h> #include <regexp/formal/FormalRegExp.h> #include <regexp/unbounded/UnboundedRegExp.h> -#include <automaton/FSM/DFA.h> + +#include <automaton/Automaton.h> +#include <automaton/FSM/NFA.h> namespace conversions { @@ -23,21 +25,23 @@ namespace re2fa * Converts regular expression to finite automaton using BrzozowskiDerivation algorithm (derivations of regular expressions). * Source: Melichar 2.110 */ -class BrzozowskiDerivation : public regexp::VisitableRegExpBase::visitor_type +class BrzozowskiDerivation : public regexp::VisitableRegExpBase::const_visitor_type { public: - BrzozowskiDerivation(void); - ~BrzozowskiDerivation(void); + /** + * Performs conversion. + * @return FSM equivalent to original regular expression. + */ + static automaton::Automaton convert(const regexp::RegExp& regexp); - /** - * Performs conversion. - * @return FSM equivalent to original regular expression. - */ - automaton::DFA convert(const regexp::RegExp& regexp); + template<class T> + static automaton::NFA convert(const T& regexp); private: - void Visit(void* , const regexp::FormalRegExp& regexp); - void Visit(void* , const regexp::UnboundedRegExp& regexp); + void Visit(void* , const regexp::FormalRegExp& regexp) const; + void Visit(void* , const regexp::UnboundedRegExp& regexp) const; + + static const BrzozowskiDerivation BRZOZOWSKI_DERIVATION; }; } /* namespace re2fa */ diff --git a/alib2algo/test-src/conversions/re2fa/re2faTest.cpp b/alib2algo/test-src/conversions/re2fa/re2faTest.cpp index 66743585bb7c71522f85149e6aa98abbc40273f0..3e0a368f99de1bd821eb792cb5321d02529733a9 100644 --- a/alib2algo/test-src/conversions/re2fa/re2faTest.cpp +++ b/alib2algo/test-src/conversions/re2fa/re2faTest.cpp @@ -7,12 +7,14 @@ #include "conversions/fa2re/Algebraic.h" #include "determinize/nfa/NFADeterminizer.h" #include "minimize/dfa/MinimizeDFA.h" +#include "normalize/dfa/NormalizeDFA.h" #include "epsilon/fsm/FSMEpsilonRemover.h" #include "regexp/unbounded/UnboundedRegExp.h" #include "regexp/RegExpFromStringParser.h" #include "automaton/FSM/NFA.h" +#include <factory/DataFactory.hpp> CPPUNIT_TEST_SUITE_REGISTRATION( re2faTest ); @@ -80,16 +82,24 @@ void re2faTest::testBrzozowski() { regexp::RegExpFromStringParser parser(inputs); regexp::RegExp regexp1( parser.parseValue() ); - conversions::re2fa::BrzozowskiDerivation brzozowski1; - automaton::DFA dfa1 = brzozowski1.convert(regexp1); + automaton::Automaton nfa1 = conversions::re2fa::BrzozowskiDerivation::convert(regexp1); + alib::DataFactory::toStdout(nfa1); - regexp::RegExp regexp2( conversions::fa2re::Algebraic::convert(dfa1) ); + regexp::RegExp regexp2( conversions::fa2re::Algebraic::convert(static_cast<const automaton::NFA&>(nfa1.getData())) ); + alib::DataFactory::toStdout(regexp2); - conversions::re2fa::BrzozowskiDerivation brzozowski2; - automaton::DFA dfa2 = brzozowski2.convert(regexp2); + automaton::Automaton nfa2 = conversions::re2fa::BrzozowskiDerivation::convert(regexp2); - automaton::DFA mdfa1 = minimize::MinimizeDFA::minimize(dfa1); - automaton::DFA mdfa2 = minimize::MinimizeDFA::minimize(dfa2); + automaton::DFA mdfa1_1 = determinize::NFADeterminizer::determinize(static_cast<const automaton::NFA&>(nfa1.getData())); + automaton::DFA mdfa1_2 = minimize::MinimizeDFA::minimize(mdfa1_1); + automaton::DFA mdfa1_3 = normalize::NormalizeDFA::normalize(mdfa1_2); - CPPUNIT_ASSERT( mdfa1 == mdfa2); + automaton::DFA mdfa2_1 = determinize::NFADeterminizer::determinize(static_cast<const automaton::NFA&>(nfa2.getData())); + automaton::DFA mdfa2_2 = minimize::MinimizeDFA::minimize(mdfa2_1); + automaton::DFA mdfa2_3 = normalize::NormalizeDFA::normalize(mdfa2_2); + + alib::DataFactory::toStdout(mdfa1_3); + alib::DataFactory::toStdout(mdfa2_3); + + CPPUNIT_ASSERT( mdfa1_3 == mdfa2_3); }