diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.cpp b/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.cpp index 68d7b2630ec755194ce8e905f48bff3a4ac430e3..1065b10a54deb187e445d3775cfe86f942cfd352 100644 --- a/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.cpp +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.cpp @@ -6,18 +6,13 @@ */ #include "ToPostfixPushdownAutomaton.h" -#include "ToPostfixPushdownAutomatonGlushkovNaive.h" #include <registration/AlgoRegistration.hpp> namespace rte { namespace convert { -automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType >, char > ToPostfixPushdownAutomaton::convert ( const rte::FormalRTE < > & rte ) { - return ToPostfixPushdownAutomatonGlushkovNaive::convert ( rte ); -} - -auto ToAutomatonFormalRegExp = registration::AbstractRegister < ToPostfixPushdownAutomaton, automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType >, char >, const rte::FormalRTE < > & > ( ToPostfixPushdownAutomaton::convert ); +auto ToAutomatonFormalRegExp = registration::AbstractRegister < ToPostfixPushdownAutomaton, automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, alphabet::EndSymbol >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, alphabet::BottomOfTheStackSymbol >, char >, const rte::FormalRTE < > & > ( ToPostfixPushdownAutomaton::convert ); } /* namespace convert */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.h b/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.h index 59af9171a2bcd4e59fca4e989f2cef47d1b9b9de..09dbe38de621a619a4146fb1beeb40b1b512f977 100644 --- a/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.h +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.h @@ -10,6 +10,7 @@ #include <rte/formal/FormalRTE.h> #include <automaton/PDA/NPDA.h> +#include "ToPostfixPushdownAutomatonGlushkovNaive.h" namespace rte { @@ -27,14 +28,23 @@ public: /** * Implements conversion of the tree regular expressions to a pushdown automaton usign Glushkov's method of neighbours. * + * \tparam SymbolType the type of symbols in the regular expression + * \tparam RankType the type of symbol ranks in the regular expression + * * \param regexp the regexp to convert * * \return PDA equivalent to original regular rte expression reading linearized postfix tree */ - static automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType >, char > convert ( const rte::FormalRTE < > & rte ); + template < class SymbolType, class RankType > + static automaton::NPDA < ext::variant < common::ranked_symbol < SymbolType, RankType >, alphabet::EndSymbol >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType >, alphabet::BottomOfTheStackSymbol >, char > convert ( const rte::FormalRTE < SymbolType, RankType > & rte ); }; +template < class SymbolType, class RankType > +automaton::NPDA < ext::variant < common::ranked_symbol < SymbolType, RankType >, alphabet::EndSymbol >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType >, alphabet::BottomOfTheStackSymbol >, char > ToPostfixPushdownAutomaton::convert ( const rte::FormalRTE < SymbolType, RankType > & rte ) { + return ToPostfixPushdownAutomatonGlushkovNaive::convert ( rte ); +} + } /* namespace convert */ } /* namespace rte */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkovNaive.cpp b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkovNaive.cpp index 0b49beb96a2c89e68730a71b65d98e6b26bbf035..5602f906db4713f7ca171605d64b799eaf76c371 100644 --- a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkovNaive.cpp +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkovNaive.cpp @@ -7,134 +7,13 @@ #include "ToPostfixPushdownAutomatonGlushkovNaive.h" -#include <alphabet/BottomOfTheStackSymbol.h> -#include <alphabet/EndSymbol.h> - -#include <global/GlobalData.h> - #include <registration/AlgoRegistration.hpp> -#include "../glushkov/GlushkovFollowNaive.h" -#include "../glushkov/GlushkovIndexate.h" -#include "../glushkov/GlushkovFirst.h" - namespace rte { namespace convert { -inline common::ranked_symbol < DefaultSymbolType, DefaultRankType > phi ( const common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > & symbol ) { - return common::ranked_symbol < DefaultSymbolType, DefaultRankType > ( symbol.getSymbol ( ).first, symbol.getRank ( ) ); -} - -/*ext::vector < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType > > phi ( const ext::vector < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > > & follow ) { - return ext::transform < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType > > ( follow, []( const common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > & symbol ) { return phi ( symbol ); } ); -}*/ - -bool isSubstSymbolPresent ( const ext::set < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > > & container, const ext::set < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > > & substAlphabet ) { - ext::vector < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > > intersection; - std::set_intersection ( container.begin ( ), container.end ( ), substAlphabet.begin ( ), substAlphabet.end ( ), std::back_inserter ( intersection ) ); - return intersection.size ( ) > 0; -} - -automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType >, char > ToPostfixPushdownAutomatonGlushkovNaive::convert ( const rte::FormalRTE < > & rte ) { - - // step 1; index RTE - rte::FormalRTE < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > indexedRTE = rte::GlushkovIndexate::index ( rte ); - - // step 2; compute: - // - first set - const ext::set < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > > firstSet = rte::GlushkovFirst::first ( indexedRTE ); - - // - follow set for every element of (non-indexed) RTE alphabet element - ext::map < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, ext::set < ext::vector < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > > > > followSet; - - for ( const common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > & symbol : indexedRTE.getAlphabet ( ) ) - followSet.insert ( std::make_pair ( symbol, rte::GlushkovFollowNaive::follow ( indexedRTE, symbol ) ) ); - - /* check for exceptions -> there must be NO substitution symbol in first or follow sets */ - if ( isSubstSymbolPresent ( firstSet, indexedRTE.getSubstitutionAlphabet ( ) ) ) - throw exception::CommonException ( "GlushkovRTE: Substitution symbol appeared in the first set" ); - - for ( const auto & kv : followSet ) - for ( const auto & followTuple : kv.second ) - if ( isSubstSymbolPresent ( ext::set < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > > ( followTuple.begin ( ), followTuple.end ( ) ), indexedRTE.getSubstitutionAlphabet ( ) ) ) - throw exception::CommonException ( "GlushkovRTE: Substitution symbol appeared in a follow set" ); - - /* check end */ - - // step 3; create PDA (w/o transitions yet) and initialize input alphabet = (non-indexed) RTE alphabet and END symbol - char q = 'q'; - char f = 'f'; - automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType >, char > automaton ( q, alphabet::BottomOfTheStackSymbol::instance < DefaultSymbolType > ( ) ); - - automaton.addState ( f ); - automaton.addFinalState ( f ); - - for ( const common::ranked_symbol < DefaultSymbolType, DefaultRankType > & symbol : rte.getAlphabet ( ) ) - automaton.addInputSymbol ( symbol ); - - automaton.addInputSymbol ( alphabet::EndSymbol::instance < DefaultSymbolType > ( ) ); - - // step 4; create pushdown store alphabet; it consists of elements of indexed RTE alphabet and BotS symbol - for ( const common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > & symb : indexedRTE.getAlphabet ( ) ) - automaton.addPushdownStoreSymbol ( symb ); - - /* DEBUG */ - if ( common::GlobalData::verbose ) { - common::Streams::err << "RTE:" << std::endl; - - for ( const auto & symbol : indexedRTE.getAlphabet ( ) ) - common::Streams::err << "\t" << symbol << std::endl; - - common::Streams::err << std::endl; - - common::Streams::err << "First(RTE):" << std::endl; - - for ( const auto & symbol : firstSet ) - common::Streams::err << "\t" << symbol << std::endl; - - common::Streams::err << std::endl; - - for ( const auto & kv : followSet ) { - common::Streams::err << "Follow(RTE, " << kv.first << "):" << std::endl; - - if ( kv.second.empty ( ) ) - common::Streams::err << "\t" << "{}" << std::endl; - - for ( const auto & follow : kv.second ) { - for ( const auto & symbol : follow ) - common::Streams::err << "\t" << symbol << std::endl; - - common::Streams::err << std::endl; - } - - common::Streams::err << std::endl; - } - } - /* DEBUG END */ - - for ( const common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > & symb : indexedRTE.getAlphabet ( ) ) { - if ( symb.getRank ( ) == 0 ) - automaton.addTransition ( q, phi ( symb ), { }, q, { symb } ); - else - for ( const ext::vector < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > > & follow : followSet[symb] ) { - ext::vector < ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType > > fstring ( follow.rbegin ( ), follow.rend ( ) ); - automaton.addTransition ( q, phi ( symb ), fstring, q, { symb } ); - } - - } - - for ( const common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType > & symb : firstSet ) { - ext::vector < ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType > > pop; - pop.push_back ( symb ); - pop.push_back ( alphabet::BottomOfTheStackSymbol::instance < DefaultSymbolType > ( ) ); - automaton.addTransition ( q, alphabet::EndSymbol::instance < DefaultSymbolType > ( ), pop, f, { } ); - } - - return automaton; -} - -auto ToPostfixPushdownAutomatonGlushkovNaiveFormalRTE = registration::AbstractRegister < ToPostfixPushdownAutomatonGlushkovNaive, automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType >, char >, const rte::FormalRTE < > & > ( ToPostfixPushdownAutomatonGlushkovNaive::convert ); +auto ToPostfixPushdownAutomatonGlushkovNaiveFormalRTE = registration::AbstractRegister < ToPostfixPushdownAutomatonGlushkovNaive, automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, alphabet::EndSymbol >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, alphabet::BottomOfTheStackSymbol >, char >, const rte::FormalRTE < > & > ( ToPostfixPushdownAutomatonGlushkovNaive::convert ); } /* namespace convert */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkovNaive.h b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkovNaive.h index 340091f8b98ed288d15757150b9d56bed153f007..587df49cc9689fa8156bf2aeb3335c9be1328c67 100644 --- a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkovNaive.h +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkovNaive.h @@ -8,10 +8,17 @@ #ifndef TO_POSTFIX_PUSHDOWN_AUTOMATON_GLUSHKOV_NAIVE_H_ #define TO_POSTFIX_PUSHDOWN_AUTOMATON_GLUSHKOV_NAIVE_H_ +#include <global/GlobalData.h> + #include <automaton/PDA/NPDA.h> #include <rte/formal/FormalRTE.h> - // #include <rte/unbounded/UnboundedRegExp.h> +#include <alphabet/BottomOfTheStackSymbol.h> +#include <alphabet/EndSymbol.h> + +#include "../glushkov/GlushkovFollowNaive.h" +#include "../glushkov/GlushkovIndexate.h" +#include "../glushkov/GlushkovFirst.h" namespace rte { @@ -23,17 +30,132 @@ namespace convert { * Source: Master Thesis, Pecka Tomas, CTU FIT, 2016, chapter 4.2 */ class ToPostfixPushdownAutomatonGlushkovNaive { + template < class SymbolType, class RankType > + static common::ranked_symbol < SymbolType, RankType > phi ( const common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > & symbol ) { + return common::ranked_symbol < SymbolType, RankType > ( symbol.getSymbol ( ).first, symbol.getRank ( ) ); + } + + template < class SymbolType, class RankType > + static bool isSubstSymbolPresent ( const ext::set < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > > & container, const ext::set < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > > & substAlphabet ) { + ext::vector < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > > intersection; + std::set_intersection ( container.begin ( ), container.end ( ), substAlphabet.begin ( ), substAlphabet.end ( ), std::back_inserter ( intersection ) ); + return intersection.size ( ) > 0; + } + public: /** * Implements conversion of the regular tree expressions to a real-time height-deterministic pushdown automaton usign algorithm similar to Glushkov's method of neighbours. * + * \tparam SymbolType the type of symbols in the regular expression + * \tparam RankType the type of symbol ranks in the regular expression + * * \param rte the converted regexp to convert * * \return real-time height-determinitic pushdown automaton accepting the language described by the original regular tree expression */ - static automaton::NPDA < ext::variant < common::ranked_symbol < DefaultSymbolType, DefaultRankType >, DefaultSymbolType >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < DefaultSymbolType, unsigned >, DefaultRankType >, DefaultSymbolType >, char > convert ( const rte::FormalRTE < > & rte ); + template < class SymbolType, class RankType > + static automaton::NPDA < ext::variant < common::ranked_symbol < SymbolType, RankType >, alphabet::EndSymbol >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType >, alphabet::BottomOfTheStackSymbol >, char > convert ( const rte::FormalRTE < SymbolType, RankType > & rte ); }; +template < class SymbolType, class RankType > +automaton::NPDA < ext::variant < common::ranked_symbol < SymbolType, RankType >, alphabet::EndSymbol >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType >, alphabet::BottomOfTheStackSymbol >, char > ToPostfixPushdownAutomatonGlushkovNaive::convert ( const rte::FormalRTE < SymbolType, RankType > & rte ) { + + // step 1; index RTE + rte::FormalRTE < ext::pair < SymbolType, unsigned >, RankType > indexedRTE = rte::GlushkovIndexate::index ( rte ); + + // step 2; compute: + // - first set + const ext::set < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > > firstSet = rte::GlushkovFirst::first ( indexedRTE ); + + // - follow set for every element of (non-indexed) RTE alphabet element + ext::map < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType >, ext::set < ext::vector < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > > > > followSet; + + for ( const common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > & symbol : indexedRTE.getAlphabet ( ) ) + followSet.insert ( std::make_pair ( symbol, rte::GlushkovFollowNaive::follow ( indexedRTE, symbol ) ) ); + + /* check for exceptions -> there must be NO substitution symbol in first or follow sets */ + if ( isSubstSymbolPresent ( firstSet, indexedRTE.getSubstitutionAlphabet ( ) ) ) + throw exception::CommonException ( "GlushkovRTE: Substitution symbol appeared in the first set" ); + + for ( const auto & kv : followSet ) + for ( const auto & followTuple : kv.second ) + if ( isSubstSymbolPresent ( ext::set < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > > ( followTuple.begin ( ), followTuple.end ( ) ), indexedRTE.getSubstitutionAlphabet ( ) ) ) + throw exception::CommonException ( "GlushkovRTE: Substitution symbol appeared in a follow set" ); + + /* check end */ + + // step 3; create PDA (w/o transitions yet) and initialize input alphabet = (non-indexed) RTE alphabet and END symbol + char q = 'q'; + char f = 'f'; + automaton::NPDA < ext::variant < common::ranked_symbol < SymbolType, RankType >, alphabet::EndSymbol >, DefaultEpsilonType, ext::variant < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType >, alphabet::BottomOfTheStackSymbol >, char > automaton ( q, alphabet::BottomOfTheStackSymbol { } ); + + automaton.addState ( f ); + automaton.addFinalState ( f ); + + for ( const common::ranked_symbol < SymbolType, RankType > & symbol : rte.getAlphabet ( ) ) + automaton.addInputSymbol ( symbol ); + + automaton.addInputSymbol ( alphabet::EndSymbol { } ); + + // step 4; create pushdown store alphabet; it consists of elements of indexed RTE alphabet and BotS symbol + for ( const common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > & symb : indexedRTE.getAlphabet ( ) ) + automaton.addPushdownStoreSymbol ( symb ); + + /* DEBUG */ + if ( common::GlobalData::verbose ) { + common::Streams::err << "RTE:" << std::endl; + + for ( const auto & symbol : indexedRTE.getAlphabet ( ) ) + common::Streams::err << "\t" << symbol << std::endl; + + common::Streams::err << std::endl; + + common::Streams::err << "First(RTE):" << std::endl; + + for ( const auto & symbol : firstSet ) + common::Streams::err << "\t" << symbol << std::endl; + + common::Streams::err << std::endl; + + for ( const auto & kv : followSet ) { + common::Streams::err << "Follow(RTE, " << kv.first << "):" << std::endl; + + if ( kv.second.empty ( ) ) + common::Streams::err << "\t" << "{}" << std::endl; + + for ( const auto & follow : kv.second ) { + for ( const auto & symbol : follow ) + common::Streams::err << "\t" << symbol << std::endl; + + common::Streams::err << std::endl; + } + + common::Streams::err << std::endl; + } + } + /* DEBUG END */ + + for ( const common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > & symb : indexedRTE.getAlphabet ( ) ) { + if ( symb.getRank ( ) == 0 ) + automaton.addTransition ( q, phi ( symb ), { }, q, { symb } ); + else + for ( const ext::vector < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > > & follow : followSet[symb] ) { + ext::vector < ext::variant < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType >, alphabet::BottomOfTheStackSymbol > > fstring ( follow.rbegin ( ), follow.rend ( ) ); + automaton.addTransition ( q, phi ( symb ), fstring, q, { symb } ); + } + + } + + for ( const common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType > & symb : firstSet ) { + ext::vector < ext::variant < common::ranked_symbol < ext::pair < SymbolType, unsigned >, RankType >, alphabet::BottomOfTheStackSymbol > > pop; + pop.push_back ( symb ); + pop.push_back ( alphabet::BottomOfTheStackSymbol ( ) ); + automaton.addTransition ( q, alphabet::EndSymbol ( ), pop, f, { } ); + } + + return automaton; +} + } /* namespace convert */ } /* namespace rte */