From ce913f44db77f12b03631489b594aae6c036554a Mon Sep 17 00:00:00 2001 From: Tomas Pecka <peckato1@fit.cvut.cz> Date: Sat, 16 Apr 2016 22:44:52 +0200 Subject: [PATCH] GlushkovRTE: wip (missing correct follow computation) --- alib2algo/src/rte/GlushkovPair.cpp | 30 ++ alib2algo/src/rte/GlushkovPair.h | 32 ++ alib2algo/src/rte/GlushkovSymbol.cpp | 31 ++ alib2algo/src/rte/GlushkovSymbol.h | 34 ++ alib2algo/src/rte/GlushkovTraversal.cpp | 436 ++++++++++++++++++ alib2algo/src/rte/GlushkovTraversal.h | 106 +++++ .../convert/ToPostfixPushdownAutomaton.cpp | 36 ++ .../rte/convert/ToPostfixPushdownAutomaton.h | 41 ++ .../ToPostfixPushdownAutomatonGlushkov.cpp | 140 ++++++ .../ToPostfixPushdownAutomatonGlushkov.h | 44 ++ 10 files changed, 930 insertions(+) create mode 100644 alib2algo/src/rte/GlushkovPair.cpp create mode 100644 alib2algo/src/rte/GlushkovPair.h create mode 100644 alib2algo/src/rte/GlushkovSymbol.cpp create mode 100644 alib2algo/src/rte/GlushkovSymbol.h create mode 100644 alib2algo/src/rte/GlushkovTraversal.cpp create mode 100644 alib2algo/src/rte/GlushkovTraversal.h create mode 100644 alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.cpp create mode 100644 alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.h create mode 100644 alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.cpp create mode 100644 alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.h diff --git a/alib2algo/src/rte/GlushkovPair.cpp b/alib2algo/src/rte/GlushkovPair.cpp new file mode 100644 index 0000000000..666f2dc752 --- /dev/null +++ b/alib2algo/src/rte/GlushkovPair.cpp @@ -0,0 +1,30 @@ +/* + * GlushkovPair.cpp + * + * Created on: 14. 4. 2016 + * Author: Tomas Pecka + */ + +#include "GlushkovPair.h" + +namespace rte { + +GlushkovPair::GlushkovPair ( GlushkovSymbol const & first, GlushkovSymbol const & second ) : m_first ( first ), m_second ( second ) { +} + +bool GlushkovPair::operator <( GlushkovPair const & x ) const { + if ( m_first.getId ( ) == x.m_first.getId ( ) ) + return m_second.getId ( ) < x.m_second.getId ( ); + else + return m_first.getId ( ) < x.m_first.getId ( ); +} + +GlushkovSymbol const & GlushkovPair::getFirst ( void ) const { + return m_first; +} + +GlushkovSymbol const & GlushkovPair::getSecond ( void ) const { + return m_second; +} + +} /* namespace rte */ diff --git a/alib2algo/src/rte/GlushkovPair.h b/alib2algo/src/rte/GlushkovPair.h new file mode 100644 index 0000000000..9812242133 --- /dev/null +++ b/alib2algo/src/rte/GlushkovPair.h @@ -0,0 +1,32 @@ +/* + * GlushkovPair.h + * + * Created on: 14. 4. 2016 + * Author: Tomas Pecka + */ + +#ifndef RTE_GLUSHKOVPAIR_H_ +#define RTE_GLUSHKOVPAIR_H_ + +#include "GlushkovSymbol.h" + +namespace rte { + +/** + * Actually, this is just std::pair. 2-tuple. + */ +class GlushkovPair { +private: + GlushkovSymbol const m_first; + GlushkovSymbol const m_second; + +public: + GlushkovPair ( GlushkovSymbol const & first, GlushkovSymbol const & second ); + bool operator <( GlushkovPair const & x ) const; + GlushkovSymbol const & getFirst ( void ) const; + GlushkovSymbol const & getSecond ( void ) const; +}; + +} /* namespace rte */ + +#endif /* GLUSHKOVPAIR_H_ */ diff --git a/alib2algo/src/rte/GlushkovSymbol.cpp b/alib2algo/src/rte/GlushkovSymbol.cpp new file mode 100644 index 0000000000..81c22f304e --- /dev/null +++ b/alib2algo/src/rte/GlushkovSymbol.cpp @@ -0,0 +1,31 @@ +/* + * GlushkovSymbol.cpp + * + * Created on: 14. 4. 2016 + * Author: Tomas Pecka + */ + +#include "GlushkovSymbol.h" + +namespace rte { + +GlushkovSymbol::GlushkovSymbol ( rte::FormalRTESymbol const * const & node, int i ) : m_Symbol ( node ), m_i ( i ) { +} + +bool GlushkovSymbol::operator <( GlushkovSymbol const & x ) const { + return m_i < x.m_i; +} + +int GlushkovSymbol::getId ( void ) const { + return m_i; +} + +alphabet::RankedSymbol GlushkovSymbol::getInputSymbol ( void ) const { + return m_Symbol->getSymbol ( ); +} + +rte::FormalRTESymbol const * GlushkovSymbol::getSymbolPtr ( void ) const { + return m_Symbol; +} + +} /* namespace rte */ diff --git a/alib2algo/src/rte/GlushkovSymbol.h b/alib2algo/src/rte/GlushkovSymbol.h new file mode 100644 index 0000000000..eed310bf26 --- /dev/null +++ b/alib2algo/src/rte/GlushkovSymbol.h @@ -0,0 +1,34 @@ +/* + * GlushkovSymbol.h + * + * Created on: 14. 4. 2016 + * Author: Tomas Pecka + */ + +#ifndef RTE_GLUSHKOVSYMBOL_H_ +#define RTE_GLUSHKOVSYMBOL_H_ + +#include <alphabet/Symbol.h> +#include <rte/formal/FormalRTESymbol.h> + +namespace rte { + +/** + * Represents numbered symbol in glushkov algorithm. + */ +class GlushkovSymbol { +private: + rte::FormalRTESymbol const * const m_Symbol; + int m_i; + +public: + GlushkovSymbol ( rte::FormalRTESymbol const * const & node, int i ); + bool operator <( GlushkovSymbol const & x ) const; + int getId ( void ) const; + alphabet::RankedSymbol getInputSymbol ( void ) const; + rte::FormalRTESymbol const * getSymbolPtr ( void ) const; +}; + +} /* namespace rte */ + +#endif /* GLUSHKOVSYMBOL_H_ */ diff --git a/alib2algo/src/rte/GlushkovTraversal.cpp b/alib2algo/src/rte/GlushkovTraversal.cpp new file mode 100644 index 0000000000..df65c24751 --- /dev/null +++ b/alib2algo/src/rte/GlushkovTraversal.cpp @@ -0,0 +1,436 @@ +/* + * GlushkovTraversal.cpp + * + * Created on: 14. 4. 2016 + * Author: Tomas Pecka + */ + +#include "GlushkovTraversal.h" +#include <iterator> +#include <vector> + +namespace rte { + +const GlushkovSymbol & GlushkovTraversal::findSymbol ( rte::FormalRTESymbol const * const symbol, const std::set < GlushkovSymbol > & symbolSet ) { + auto it = find_if ( symbolSet.begin ( ), symbolSet.end ( ), [symbol] ( GlushkovSymbol const & gs ) -> bool { + return gs.getSymbolPtr ( ) == symbol; + } ); + + if ( it == symbolSet.end ( ) ) + throw exception::CommonException ( "GlushkovTraversal RTE - Can not find GlushkovSymbol for regexp node. Probably symbol from constant alphabet?" ); + + return * it; +} + +bool GlushkovTraversal::pos ( GlushkovSymbol const & symbol, rte::FormalRTE const * const & node ) { + return pos ( & node->getRTE ( ), symbol.getSymbolPtr ( ) ); +} + +std::set < GlushkovSymbol > GlushkovTraversal::first ( rte::FormalRTE const & re ) { + std::set < GlushkovSymbol > firstSet, symbolSet = getSymbols ( re ); + + for ( auto const & s : first ( & re.getRTE ( ) ) ) + firstSet.insert ( findSymbol ( s, symbolSet ) ); + + return firstSet; +} + +/* + * std::set<GlushkovSymbol> GlushkovTraversal::last( rte::FormalRTE const& re ) + * { + * std::set<GlushkovSymbol> lastSet, symbolSet = getSymbols( re ); + * + * for( auto const& s : last( & re.getRTE() ) ) + * lastSet.insert( findSymbol( s, symbolSet ) ); + * + * return lastSet; + * } + */ + +std::set < std::vector < GlushkovSymbol > > GlushkovTraversal::follow ( rte::FormalRTE const & re, GlushkovSymbol const & symbol ) { + std::set < GlushkovSymbol > symbolSet = getSymbols ( re ); + std::set < std::vector < GlushkovSymbol > > followSet; + + std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > symbMap; + + for ( const auto & csymbol : re.getConstantAlphabet ( ) ) + symbMap.insert ( std::make_pair ( csymbol, std::set < rte::FormalRTESymbol const * > ( ) ) ); + + for ( const auto & f : follow ( & re.getRTE ( ), symbol.getSymbolPtr ( ), symbMap ) ) { + std::vector < GlushkovSymbol > curr; + + for ( const auto & s : f ) + curr.push_back ( findSymbol ( s, symbolSet ) ); + + followSet.insert ( curr ); + } + + return followSet; +} + +// ----------------------------------------------------------------------------- + +std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTEElement const * const & node ) { + rte::FormalRTEAlternation const * const alternation = dynamic_cast < rte::FormalRTEAlternation const * const > ( node ); + rte::FormalRTESubstitution const * const concatenation = dynamic_cast < rte::FormalRTESubstitution const * const > ( node ); + rte::FormalRTEIteration const * const iteration = dynamic_cast < rte::FormalRTEIteration const * const > ( node ); + rte::FormalRTESymbol const * const symbol = dynamic_cast < rte::FormalRTESymbol const * const > ( node ); + rte::FormalRTEEmpty const * const empty = dynamic_cast < rte::FormalRTEEmpty const * const > ( node ); + + if ( alternation ) + return first ( alternation ); + else if ( concatenation ) + return first ( concatenation ); + else if ( iteration ) + return first ( iteration ); + else if ( empty ) + return first ( empty ); + else if ( symbol ) + return first ( symbol ); + + throw exception::CommonException ( "GlushkovTraversal::first - invalid RegExpElement node" ); +} + +std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTEAlternation const * const & node ) { + std::set < rte::FormalRTESymbol const * > ret, tmp; + + tmp = first ( & node->getLeftElement ( ) ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + tmp = first ( & node->getRightElement ( ) ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + return ret; +} + +std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTESubstitution const * const & node ) { + std::set < rte::FormalRTESymbol const * > ret, tmp; + + tmp = first ( & node->getLeftElement ( ) ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + if ( ret.count ( & node->getSubstitutionSymbol ( ) ) ) { + ret.erase ( & node->getSubstitutionSymbol ( ) ); + tmp = first ( & node->getRightElement ( ) ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + } + + return ret; +} + +std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTEIteration const * const & node ) { + std::set < rte::FormalRTESymbol const * > ret = first ( & node->getElement ( ) ); + + ret.insert ( & node->getSubstitutionSymbol ( ) ); + return ret; +} + +std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTESymbol const * const & node ) { + return { + node + }; +} + +std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTEEmpty const * const & /* node */ ) { + return std::set < rte::FormalRTESymbol const * > ( ); +} + +// ---------------------------------------------------------------------------- + +/* + * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTEElement const * const & node ) + * { + * rte::FormalRTEAlternation const * const alternation = dynamic_cast<rte::FormalRTEAlternation const * const>( node ); + * rte::FormalRTESubstitution const * const concatenation = dynamic_cast<rte::FormalRTESubstitution const * const>( node ); + * rte::FormalRTEIteration const * const iteration = dynamic_cast<rte::FormalRTEIteration const * const>( node ); + * rte::FormalRTESymbol const * const symbol = dynamic_cast<rte::FormalRTESymbol const * const>( node ); + * rte::FormalRTEEmpty const * const empty = dynamic_cast<rte::FormalRTEEmpty const * const>( node ); + * + * if( symbol ) + * return last( symbol ); + * else if( alternation ) + * return last( alternation ); + * else if( concatenation ) + * return last( concatenation ); + * else if( iteration ) + * return last( iteration ); + * else if( empty ) + * return last( empty ); + * + * throw exception::CommonException( "GlushkovTraversal::last - invalid RegExpElement node" ); + * } + * + * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTEAlternation const * const & node ) + * { + * std::set<rte::FormalRTESymbol const *> ret; + * + * for( auto const& element : node->getElements( ) ) + * { + * std::set<rte::FormalRTESymbol const *> tmp = last( element ); + * ret.insert( tmp.begin( ), tmp.end( ) ); + * } + * + * return ret; + * } + * + * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTESubstitution const * const & node ) + * { + * std::set<rte::FormalRTESymbol const *> ret, tmp; + * + * for( auto it = node->getElements( ).rbegin( ); it != node->getElements( ).rend( ) ; it ++ ) + * { + * tmp = last( *it ); + * ret.insert( tmp.begin( ), tmp.end( ) ); + * + * if( ! rte::properties::RegExpEpsilon::languageContainsEpsilon(**it) ) + * break; + * } + * + * return ret; + * } + * + * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTEIteration const * const & node ) + * { + * return last( & node->getElement( ) ); + * } + * + * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTESymbol const * const & node ) + * { + * return std::set<rte::FormalRTESymbol const *> { node }; + * } + * + * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTEEmpty const * const & node ) + * { + * return std::set<rte::FormalRTESymbol const *>( ); + * } + */ + +// ---------------------------------------------------------------------------- + +std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTEElement const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { + rte::FormalRTEAlternation const * const alternation = dynamic_cast < rte::FormalRTEAlternation const * const > ( node ); + rte::FormalRTESubstitution const * const concatenation = dynamic_cast < rte::FormalRTESubstitution const * const > ( node ); + rte::FormalRTEIteration const * const iteration = dynamic_cast < rte::FormalRTEIteration const * const > ( node ); + rte::FormalRTESymbol const * const symbol = dynamic_cast < rte::FormalRTESymbol const * const > ( node ); + rte::FormalRTEEmpty const * const empty = dynamic_cast < rte::FormalRTEEmpty const * const > ( node ); + + if ( alternation ) + return follow ( alternation, symbolptr, subMap ); + + else if ( concatenation ) + return follow ( concatenation, symbolptr, subMap ); + + else if ( iteration ) + return follow ( iteration, symbolptr, subMap ); + + else if ( symbol ) + return follow ( symbol, symbolptr, subMap ); + + else if ( empty ) + return follow ( empty, symbolptr, subMap ); + + throw exception::CommonException ( "GlushkovTraversal::follow() - unknown RegExpElement node" ); +} + +std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTEAlternation const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { + std::set < std::vector < rte::FormalRTESymbol const * > > ret, tmp; + + tmp = follow ( & node->getLeftElement ( ), symbolptr, subMap ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + tmp = follow ( & node->getRightElement ( ), symbolptr, subMap ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + std::cerr << "Follow Alt: (" << symbolptr->getSymbol ( ) << "): " << ret << std::endl; + + return ret; +} + +std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTESubstitution const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { + + for ( const auto & s : first ( & node->getRightElement ( ) ) ) + subMap[node->getSubstitutionSymbol ( ).getSymbol ( )].insert ( s ); + + /* + * E sub F + * 1. if symbolptr in F subtree, then Follow(F, symbolptr); + * 2. if symbolptr in E subtree, then Follow(E, symbolptr); + */ + + std::set < std::vector < rte::FormalRTESymbol const * > > ret; + + if ( pos ( & node->getLeftElement ( ), symbolptr ) ) + ret = follow ( & node->getLeftElement ( ), symbolptr, subMap ); + else + ret = follow ( & node->getRightElement ( ), symbolptr, subMap ); + + std::cerr << "Follow Sub: (" << symbolptr->getSymbol ( ) << "): " << ret << std::endl; + return ret; +} + +std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTEIteration const * const & /* node */, rte::FormalRTESymbol const * const & /* symbolptr */, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & /* subMap */ ) { + return std::set < std::vector < rte::FormalRTESymbol const * > > ( ); +} + +std::set < std::vector < rte::FormalRTESymbol const * > > replaceConstants ( const std::vector < rte::FormalRTESymbol const * > & f, const std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { + std::cerr << " replC for " << f << std::endl; + + std::set < std::vector < rte::FormalRTESymbol const * > > ret; + + if ( f.size ( ) == 0 ) return ret; + + auto subIt = subMap.find ( f[0]->getSymbol ( ) ); + + if ( subIt == subMap.end ( ) ) { + std::vector < rte::FormalRTESymbol const * > v; + v.push_back ( f[0] ); + ret.insert ( v ); + } else { + for ( const auto & subSymbol : subIt->second ) { + std::vector < rte::FormalRTESymbol const * > v; + v.push_back ( subSymbol ); + ret.insert ( v ); + } + } + + for ( size_t i = 1; i < f.size ( ); i++ ) { + std::set < std::vector < rte::FormalRTESymbol const * > > tmp; + + subIt = subMap.find ( f[i]->getSymbol ( ) ); + + if ( subIt == subMap.end ( ) ) + for ( const auto & r : ret ) { + std::vector < rte::FormalRTESymbol const * > v = r; + v.push_back ( f[i] ); + tmp.insert ( v ); + } + + else + + for ( const auto & r : ret ) + for ( const auto & subSymbol : subIt->second ) { + std::vector < rte::FormalRTESymbol const * > v = r; + v.push_back ( subSymbol ); + tmp.insert ( v ); + } + + ret = tmp; + } + + return ret; +} + +std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTESymbol const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { + std::set < std::vector < rte::FormalRTESymbol const * > > ret; + std::vector < rte::FormalRTESymbol const * > children = node->getElements ( ); + + if ( symbolptr == node ) { + ret = replaceConstants ( children, subMap ); + std::cerr << "Follow Symb1: (" << symbolptr->getSymbol ( ) << "): " << ret << std::endl; + return ret; + } + + for ( const auto & c : children ) + if ( pos ( c, symbolptr ) ) + return follow ( c, symbolptr, subMap ); + + return std::set < std::vector < rte::FormalRTESymbol const * > > ( ); +} + +std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTEEmpty const * const & /* node */, rte::FormalRTESymbol const * const & /* symbolptr */, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & /* subMap */ ) { + return std::set < std::vector < rte::FormalRTESymbol const * > > ( ); +} + +// ---------------------------------------------------------------------------- + +bool GlushkovTraversal::pos ( rte::FormalRTEElement const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { + rte::FormalRTEAlternation const * const alternation = dynamic_cast < rte::FormalRTEAlternation const * const > ( node ); + rte::FormalRTEIteration const * const iteration = dynamic_cast < rte::FormalRTEIteration const * const > ( node ); + rte::FormalRTESubstitution const * const concatenation = dynamic_cast < rte::FormalRTESubstitution const * const > ( node ); + rte::FormalRTESymbol const * const symbol = dynamic_cast < rte::FormalRTESymbol const * const > ( node ); + rte::FormalRTEEmpty const * const empty = dynamic_cast < rte::FormalRTEEmpty const * const > ( node ); + + if ( alternation ) + return pos ( alternation, symbolptr ); + + else if ( concatenation ) + return pos ( concatenation, symbolptr ); + + else if ( iteration ) + return pos ( iteration, symbolptr ); + + else if ( symbol ) + return pos ( symbol, symbolptr ); + + else if ( empty ) + return pos ( empty, symbolptr ); + + throw exception::CommonException ( "GlushkovTraversal::pos() - unknown RTE node" ); +} + +bool GlushkovTraversal::pos ( rte::FormalRTEAlternation const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { + return pos ( & node->getLeftElement ( ), symbolptr ) || pos ( & node->getRightElement ( ), symbolptr ); +} + +bool GlushkovTraversal::pos ( rte::FormalRTESubstitution const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { + return pos ( & node->getLeftElement ( ), symbolptr ) || pos ( & node->getRightElement ( ), symbolptr ); +} + +bool GlushkovTraversal::pos ( rte::FormalRTEIteration const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { + return pos ( & node->getElement ( ), symbolptr ); +} + +bool GlushkovTraversal::pos ( rte::FormalRTESymbol const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { + if ( symbolptr == node ) return true; + + for ( auto const & element : node->getElements ( ) ) + if ( pos ( element, symbolptr ) ) + return true; + + return false; +} + +bool GlushkovTraversal::pos ( rte::FormalRTEEmpty const * const & /* node */, rte::FormalRTESymbol const * const & /* symbolptr */ ) { + return false; +} + +// ---------------------------------------------------------------------------- + +std::set < GlushkovSymbol > GlushkovTraversal::getSymbols ( rte::FormalRTE const & re ) { + std::set < GlushkovSymbol > alphabet; + int i = 1; + + getSymbols ( & re.getRTE ( ), alphabet, i ); + + return alphabet; +} + +void GlushkovTraversal::getSymbols ( rte::FormalRTEElement const * const & node, std::set < GlushkovSymbol > & alphabet, int & i ) { + rte::FormalRTEAlternation const * const alternation = dynamic_cast < rte::FormalRTEAlternation const * const > ( node ); + rte::FormalRTEIteration const * const iteration = dynamic_cast < rte::FormalRTEIteration const * const > ( node ); + rte::FormalRTESubstitution const * const substitution = dynamic_cast < rte::FormalRTESubstitution const * const > ( node ); + rte::FormalRTESymbol const * const symbol = dynamic_cast < rte::FormalRTESymbol const * const > ( node ); + rte::FormalRTEEmpty const * const empty = dynamic_cast < rte::FormalRTEEmpty const * const > ( node ); + + if ( symbol ) { + alphabet.insert ( GlushkovSymbol ( symbol, i++ ) ); + return; + } else if ( alternation ) { + getSymbols ( & alternation->getLeftElement ( ), alphabet, i ); + getSymbols ( & alternation->getRightElement ( ), alphabet, i ); + return; + } else if ( substitution ) { + getSymbols ( & substitution->getLeftElement ( ), alphabet, i ); + getSymbols ( & substitution->getRightElement ( ), alphabet, i ); + return; + } else if ( iteration ) { + getSymbols ( & iteration->getElement ( ), alphabet, i ); + return; + } else if ( empty ) { + return; + } + + throw exception::CommonException ( "GlushkovTraversal::getSymbols() - unknown RegExpElement node" ); +} + +} /* namespace rte */ diff --git a/alib2algo/src/rte/GlushkovTraversal.h b/alib2algo/src/rte/GlushkovTraversal.h new file mode 100644 index 0000000000..738cf70533 --- /dev/null +++ b/alib2algo/src/rte/GlushkovTraversal.h @@ -0,0 +1,106 @@ +/* + * GlushkovTraversal.h + * + * Created on: 14. 4. 2016 + * Author: Tomas Pecka + */ + +#ifndef RTE_GLUSHKOVTRAVERSAL_H_ +#define RTE_GLUSHKOVTRAVERSAL_H_ + +#include <set> +#include <map> +#include <vector> +#include <algorithm> + +#include <rte/formal/FormalRTE.h> +#include <rte/formal/FormalRTEElement.h> +#include <rte/formal/FormalRTEAlternation.h> +#include <rte/formal/FormalRTESubstitution.h> +#include <rte/formal/FormalRTEIteration.h> +#include <rte/formal/FormalRTESymbol.h> +#include <rte/formal/FormalRTEEmpty.h> + +#include <exception/CommonException.h> + +#include <alphabet/RankedSymbol.h> + +#include "GlushkovSymbol.h" +#include "GlushkovPair.h" + +namespace rte { + +class GlushkovTraversal { +public: + /** + * @param re rte to probe + * @return all rteSymbols whichcan start the word. + */ + static std::set < GlushkovSymbol > first ( rte::FormalRTE const & re ); + + /** + * @param re rte to probe + * @return all rteSymbols that can terminate the word. + */ + static std::set < GlushkovSymbol > last ( rte::FormalRTE const & re ); + + /** + * @param re rte to probe + * @param symbol GlushkovSymbol for which we need the follow() + * @return all symbols that can follow specific symbol in word + */ + static std::set < std::vector < GlushkovSymbol > > follow ( rte::FormalRTE const & re, GlushkovSymbol const & symbol ); + + /** + * @param re rte to probe + * @return symbols of rte tree in order of they occurence in rte. + */ + static std::set < GlushkovSymbol > getSymbols ( rte::FormalRTE const & re ); + +private: + /** + * @param symbol ptr to symbol + * @param symbolSet set of gl.symbols + * @return GlushkovSymbol equivalent for rteSymbol pointer + */ + static GlushkovSymbol const & findSymbol ( rte::FormalRTESymbol const * const symbol, const std::set < GlushkovSymbol > & symbolSet ); + + /** + * @return bool true if symbol pointer is in this subtree + */ + static bool pos ( GlushkovSymbol const & symbol, rte::FormalRTE const * const & node ); + + static void getSymbols ( rte::FormalRTEElement const * const & node, std::set < GlushkovSymbol > & alphabet, int & i ); + + static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTEElement const * const & node ); + static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTEAlternation const * const & node ); + static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTESubstitution const * const & node ); + static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTEIteration const * const & node ); + static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTESymbol const * const & node ); + static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTEEmpty const * const & node ); + + static std::set < rte::FormalRTESymbol const * > last ( rte::FormalRTEElement const * const & node ); + static std::set < rte::FormalRTESymbol const * > last ( rte::FormalRTEAlternation const * const & node ); + static std::set < rte::FormalRTESymbol const * > last ( rte::FormalRTESubstitution const * const & node ); + static std::set < rte::FormalRTESymbol const * > last ( rte::FormalRTEIteration const * const & node ); + static std::set < rte::FormalRTESymbol const * > last ( rte::FormalRTESymbol const * const & node ); + static std::set < rte::FormalRTESymbol const * > last ( rte::FormalRTEEmpty const * const & node ); + + static bool pos ( rte::FormalRTEElement const * const & node, rte::FormalRTESymbol const * const & symbSearch ); + static bool pos ( rte::FormalRTEAlternation const * const & node, rte::FormalRTESymbol const * const & symbSearch ); + static bool pos ( rte::FormalRTESubstitution const * const & node, rte::FormalRTESymbol const * const & symbSearch ); + static bool pos ( rte::FormalRTEIteration const * const & node, rte::FormalRTESymbol const * const & symbSearch ); + static bool pos ( rte::FormalRTESymbol const * const & node, rte::FormalRTESymbol const * const & symbSearch ); + static bool pos ( rte::FormalRTEEmpty const * const & node, rte::FormalRTESymbol const * const & symbSearch ); + + static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTEElement const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ); + static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTEAlternation const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ); + static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTESubstitution const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ); + static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTEIteration const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ); + static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTESymbol const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ); + static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTEEmpty const * const & node, rte::FormalRTESymbol const * const & symbolptr, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ); +}; + +} /* namespace rte */ + +#endif /* RTE_GLUSHKOVTRAVERSAL_H_ */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.cpp b/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.cpp new file mode 100644 index 0000000000..6ec396ae1d --- /dev/null +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.cpp @@ -0,0 +1,36 @@ +/* + * ToPostfixPushdownAutomaton.h + * + * Created on: 11. 4. 2016 + * Author: Tomas Pecka + */ + +#include "ToPostfixPushdownAutomaton.h" +#include "ToPostfixPushdownAutomatonGlushkov.h" +#include <exception/CommonException.h> + +namespace rte { + +namespace convert { + +automaton::Automaton ToPostfixPushdownAutomaton::convert ( const rte::RTE & rte ) { + return dispatch ( rte.getData ( ) ); +} + +automaton::Automaton ToPostfixPushdownAutomaton::convert ( const rte::FormalRTE & rte ) { + return automaton::Automaton ( ToPostfixPushdownAutomatonGlushkov::convert ( rte ) ); +} + +auto ToAutomatonFormalRegExp = ToPostfixPushdownAutomaton::RegistratorWrapper < automaton::Automaton, rte::FormalRTE > ( ToPostfixPushdownAutomaton::convert ); + +/* + * automaton::Automaton ToPostfixPushdownAutomaton::convert(const regexp::UnboundedRegExp& regexp) { + * return automaton::Automaton(ToAutomatonGlushkov::convert(regexp)); + * } + * + * auto ToAutomatonUnboundedRegExp = ToPostfixPushdownAutomaton::RegistratorWrapper<automaton::Automaton, rte::UnboundedRTE>(ToPostfixPushdownAutomaton::getInstance(), ToPostfixPushdownAutomaton::convert); + */ + +} /* namespace convert */ + +} /* namespace rte */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.h b/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.h new file mode 100644 index 0000000000..b71d8ea53d --- /dev/null +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomaton.h @@ -0,0 +1,41 @@ +/* + * ToPostfixPushdownAutomaton.h + * + * Created on: 11. 4. 2016 + * Author: Tomas Pecka + */ + +#ifndef RTE_TO_AUTOMATON_H_ +#define RTE_TO_AUTOMATON_H_ + +#include <core/multipleDispatch.hpp> + +#include <rte/RTE.h> +#include <rte/formal/FormalRTE.h> + +// #include <rte/unbounded/UnboundedRegExp.h> + +#include <automaton/Automaton.h> + +namespace rte { + +namespace convert { + +class ToPostfixPushdownAutomaton : public std::SingleDispatch < ToPostfixPushdownAutomaton, automaton::Automaton, rte::RTEBase > { +public: + /** + * Performs conversion. + * @return PDA equivalent to original regular rte expression reading linearized postfix tree + */ + static automaton::Automaton convert ( const rte::RTE & rte ); + + static automaton::Automaton convert ( const rte::FormalRTE & rte ); + + // static automaton::Automaton convert(const rte::UnboundedRegExp& rte); +}; + +} /* namespace convert */ + +} /* namespace rte */ + +#endif /* RTE_TO_AUTOMATON_H_ */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.cpp b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.cpp new file mode 100644 index 0000000000..6be918c685 --- /dev/null +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.cpp @@ -0,0 +1,140 @@ +/* + * ToPostfixPushdownAutomatonGlushkov.h + * + * Created on: 11. 4. 2016 + * Author: Tomas Pecka + */ + +#include "ToPostfixPushdownAutomatonGlushkov.h" + +#include "label/Label.h" +#include "label/LabelPairLabel.h" +#include "label/ObjectLabel.h" +#include "alphabet/BottomOfTheStackSymbol.h" +#include "alphabet/EndSymbol.h" + +#include <automaton/Automaton.h> + +#include "object/Object.h" + +#include "../GlushkovTraversal.h" + +namespace rte { + +namespace convert { + +automaton::NPDA ToPostfixPushdownAutomatonGlushkov::convert ( const rte::RTE & rte ) { + return dispatch ( rte.getData ( ) ); +} + +std::vector < alphabet::Symbol > phi ( const std::vector < GlushkovSymbol > & follow, const std::map < GlushkovSymbol, alphabet::Symbol > & pos_stack_map ) { + std::vector < alphabet::Symbol > ret; + + for ( const GlushkovSymbol & f : follow ) { + auto it = pos_stack_map.find ( f ); + + if ( it == pos_stack_map.end ( ) ) + throw exception::CommonException ( "GlushkovRTE: phi: symbol not in pos. Probably constant? Invalid RTE!" ); + + ret.push_back ( it->second ); + } + + return ret; +} + +automaton::NPDA ToPostfixPushdownAutomatonGlushkov::convert ( const rte::FormalRTE & rte ) { + automaton::State q ( label::labelFrom ( 'q' ) ); + automaton::State f ( label::labelFrom ( 'f' ) ); + automaton::NPDA automaton ( q, alphabet::Symbol { alphabet::BottomOfTheStackSymbol::BOTTOM_OF_THE_STACK } ); + + // step 3 + automaton.addState ( f ); + automaton.addFinalState ( f ); + + // step 4 + for ( const alphabet::RankedSymbol & symbol : rte.getAlphabet ( ) ) + automaton.addInputSymbol ( symbol.getSymbol ( ) ); + + automaton.addInputSymbol ( alphabet::Symbol { alphabet::EndSymbol::END } ); + + std::set < GlushkovSymbol > posSet = rte::GlushkovTraversal::getSymbols ( rte ); + std::set < GlushkovSymbol > firstSet = rte::GlushkovTraversal::first ( rte ); + std::map < GlushkovSymbol, std::set < std::vector < GlushkovSymbol > > > followSet; + + for ( const auto & symbol : posSet ) + followSet.insert ( std::make_pair ( symbol, rte::GlushkovTraversal::follow ( rte, symbol ) ) ); + + /* DEBUG */ + std::cerr << "Pos:" << std::endl; + + for ( const auto & symbol : posSet ) std::cerr << "\t" << symbol.getInputSymbol ( ) << " id=" << symbol.getId ( ) << std::endl; + + std::cerr << "First:" << std::endl; + + for ( const auto & symbol : firstSet ) std::cerr << "\t" << symbol.getInputSymbol ( ) << " id=" << symbol.getId ( ) << std::endl; + + std::cerr << "Follow:" << std::endl; + + for ( const auto & kv : followSet ) { + std::cerr << "\t" << kv.first.getInputSymbol ( ) << " id=" << kv.first.getId ( ) << std::endl; + + for ( const auto & f : kv.second ) + for ( const auto & symbol : f ) + std::cerr << "\t\t" << symbol.getInputSymbol ( ) << " id=" << symbol.getId ( ) << std::endl; + + } + + /* DEBUG END */ + + std::map < GlushkovSymbol, alphabet::Symbol > pos_stack_map; + + for ( const GlushkovSymbol & gsymb : posSet ) { + auto label = label::Label ( label::LabelPairLabel ( std::make_pair ( label::Label ( label::ObjectLabel ( alib::Object ( gsymb.getInputSymbol ( ).getSymbol ( ).getData ( ) ) ) ), label::labelFrom ( gsymb.getId ( ) ) ) ) ); + + pos_stack_map.insert ( std::make_pair ( gsymb, alphabet::Symbol ( alphabet::LabeledSymbol ( label ) ) ) ); + automaton.addPushdownStoreSymbol ( alphabet::Symbol ( alphabet::LabeledSymbol ( label ) ) ); + } + + for ( const GlushkovSymbol & symb : posSet ) { + auto it = pos_stack_map.find ( symb ); + + if ( it == pos_stack_map.end ( ) ) + throw exception::CommonException ( "GlushkovRTE: fail" ); + + if ( symb.getInputSymbol ( ).getRank ( ) == primitive::Unsigned ( 0 ) ) + automaton.addTransition ( q, symb.getInputSymbol ( ).getSymbol ( ), { }, q, { it->second } ); + else + for ( const std::vector < GlushkovSymbol > & f : followSet[symb] ) { + std::vector < alphabet::Symbol > fstring = phi ( f, pos_stack_map ); + std::reverse ( fstring.begin ( ), fstring.end ( ) ); + automaton.addTransition ( q, symb.getInputSymbol ( ).getSymbol ( ), fstring, q, { it->second } ); + } + + } + + for ( const GlushkovSymbol & symb : firstSet ) { + auto it = pos_stack_map.find ( symb ); + + if ( it == pos_stack_map.end ( ) ) + throw exception::CommonException ( "GlushkovRTE: fail2" ); + + automaton.addTransition ( q, alphabet::Symbol { alphabet::EndSymbol::END }, { it->second, alphabet::Symbol { alphabet::BottomOfTheStackSymbol::BOTTOM_OF_THE_STACK } + }, f, { } ); + } + + return automaton; +} + +auto ToAutomatonGlushkovFormalRegExp = ToPostfixPushdownAutomatonGlushkov::RegistratorWrapper < automaton::NPDA, rte::FormalRTE > ( ToPostfixPushdownAutomatonGlushkov::convert ); + +/* + * automaton::NPDA ToPostfixPushdownAutomatonGlushkov::convert(const rte::UnboundedRTE& rte) + * { + * throw exception::CommonException("Glushkov: Converting Unbounded RTE to postfix rhPDA NYI"); // TODO + * } + * + * auto ToAutomatonGlushkovUnboundedRegExp = ToAutomatonGlushkov::RegistratorWrapper<automaton::NPDA, rte::UnboundedRTE>(ToAutomatonGlushkov::getInstance(), ToAutomatonGlushkov::convert); + */ +} /* namespace convert */ + +} /* namespace rte */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.h b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.h new file mode 100644 index 0000000000..1019d82552 --- /dev/null +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.h @@ -0,0 +1,44 @@ +/* + * ToPostfixPushdownAutomatonGlushkov.h + * + * Created on: 11. 4. 2016 + * Author: Tomas Pecka + */ + +#ifndef TO_POSTFIX_PUSHDOWN_AUTOMATON_GLUSHKOV_H_ +#define TO_POSTFIX_PUSHDOWN_AUTOMATON_GLUSHKOV_H_ + +#include <core/multipleDispatch.hpp> +#include <map> + +#include <automaton/FSM/NFA.h> +#include <rte/RTE.h> +#include <rte/formal/FormalRTE.h> +#include <automaton/PDA/NPDA.h> + +// #include <rte/unbounded/UnboundedRegExp.h> + +namespace rte { + +namespace convert { + +/** + * Converts regular expression to finite automaton using Glushkov's NFA construction algorithm. + * Source: Melichar 2.107 + */ +class ToPostfixPushdownAutomatonGlushkov : public std::SingleDispatch < ToPostfixPushdownAutomatonGlushkov, automaton::NPDA, rte::RTEBase > { +public: + /** + * Performs conversion. + * @param re Original regular tree expression. + * @return rhNPDA equivalent to original regular expression. + */ + static automaton::NPDA convert ( const rte::RTE & rte ); + static automaton::NPDA convert ( const rte::FormalRTE & rte ); +}; + +} /* namespace convert */ + +} /* namespace rte */ + +#endif /* TO_POSTFIX_PUSHDOWN_AUTOMATON_GLUSHKOV_H_ */ -- GitLab