diff --git a/alib2algo/src/rte/GlushkovPair.cpp b/alib2algo/src/rte/GlushkovPair.cpp deleted file mode 100644 index 666f2dc7526674969ae5b827d909efe778c81b0d..0000000000000000000000000000000000000000 --- a/alib2algo/src/rte/GlushkovPair.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * GlushkovPair.cpp - * - * Created on: 14. 4. 2016 - * Author: Tomas Pecka - */ - -#include "GlushkovPair.h" - -namespace rte { - -GlushkovPair::GlushkovPair ( GlushkovSymbol const & first, GlushkovSymbol const & second ) : m_first ( first ), m_second ( second ) { -} - -bool GlushkovPair::operator <( GlushkovPair const & x ) const { - if ( m_first.getId ( ) == x.m_first.getId ( ) ) - return m_second.getId ( ) < x.m_second.getId ( ); - else - return m_first.getId ( ) < x.m_first.getId ( ); -} - -GlushkovSymbol const & GlushkovPair::getFirst ( void ) const { - return m_first; -} - -GlushkovSymbol const & GlushkovPair::getSecond ( void ) const { - return m_second; -} - -} /* namespace rte */ diff --git a/alib2algo/src/rte/GlushkovPair.h b/alib2algo/src/rte/GlushkovPair.h deleted file mode 100644 index 9812242133d5dd1adad2424ba04e91e935dfc58a..0000000000000000000000000000000000000000 --- a/alib2algo/src/rte/GlushkovPair.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * GlushkovPair.h - * - * Created on: 14. 4. 2016 - * Author: Tomas Pecka - */ - -#ifndef RTE_GLUSHKOVPAIR_H_ -#define RTE_GLUSHKOVPAIR_H_ - -#include "GlushkovSymbol.h" - -namespace rte { - -/** - * Actually, this is just std::pair. 2-tuple. - */ -class GlushkovPair { -private: - GlushkovSymbol const m_first; - GlushkovSymbol const m_second; - -public: - GlushkovPair ( GlushkovSymbol const & first, GlushkovSymbol const & second ); - bool operator <( GlushkovPair const & x ) const; - GlushkovSymbol const & getFirst ( void ) const; - GlushkovSymbol const & getSecond ( void ) const; -}; - -} /* namespace rte */ - -#endif /* GLUSHKOVPAIR_H_ */ diff --git a/alib2algo/src/rte/GlushkovSymbol.cpp b/alib2algo/src/rte/GlushkovSymbol.cpp deleted file mode 100644 index 81c22f304e0c8abc5538238aa2efbc479bed8b56..0000000000000000000000000000000000000000 --- a/alib2algo/src/rte/GlushkovSymbol.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * GlushkovSymbol.cpp - * - * Created on: 14. 4. 2016 - * Author: Tomas Pecka - */ - -#include "GlushkovSymbol.h" - -namespace rte { - -GlushkovSymbol::GlushkovSymbol ( rte::FormalRTESymbol const * const & node, int i ) : m_Symbol ( node ), m_i ( i ) { -} - -bool GlushkovSymbol::operator <( GlushkovSymbol const & x ) const { - return m_i < x.m_i; -} - -int GlushkovSymbol::getId ( void ) const { - return m_i; -} - -alphabet::RankedSymbol GlushkovSymbol::getInputSymbol ( void ) const { - return m_Symbol->getSymbol ( ); -} - -rte::FormalRTESymbol const * GlushkovSymbol::getSymbolPtr ( void ) const { - return m_Symbol; -} - -} /* namespace rte */ diff --git a/alib2algo/src/rte/GlushkovSymbol.h b/alib2algo/src/rte/GlushkovSymbol.h deleted file mode 100644 index eed310bf26960224d2a7c00d8d7dfcfea79466ad..0000000000000000000000000000000000000000 --- a/alib2algo/src/rte/GlushkovSymbol.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * GlushkovSymbol.h - * - * Created on: 14. 4. 2016 - * Author: Tomas Pecka - */ - -#ifndef RTE_GLUSHKOVSYMBOL_H_ -#define RTE_GLUSHKOVSYMBOL_H_ - -#include <alphabet/Symbol.h> -#include <rte/formal/FormalRTESymbol.h> - -namespace rte { - -/** - * Represents numbered symbol in glushkov algorithm. - */ -class GlushkovSymbol { -private: - rte::FormalRTESymbol const * const m_Symbol; - int m_i; - -public: - GlushkovSymbol ( rte::FormalRTESymbol const * const & node, int i ); - bool operator <( GlushkovSymbol const & x ) const; - int getId ( void ) const; - alphabet::RankedSymbol getInputSymbol ( void ) const; - rte::FormalRTESymbol const * getSymbolPtr ( void ) const; -}; - -} /* namespace rte */ - -#endif /* GLUSHKOVSYMBOL_H_ */ diff --git a/alib2algo/src/rte/GlushkovTraversal.cpp b/alib2algo/src/rte/GlushkovTraversal.cpp deleted file mode 100644 index 048737a85746093aa63d4e7096d1b117518650c1..0000000000000000000000000000000000000000 --- a/alib2algo/src/rte/GlushkovTraversal.cpp +++ /dev/null @@ -1,524 +0,0 @@ -/* - * GlushkovTraversal.cpp - * - * Created on: 14. 4. 2016 - * Author: Tomas Pecka - */ - -#include "GlushkovTraversal.h" -#include <iterator> -#include <vector> - -namespace rte { - -const GlushkovSymbol & GlushkovTraversal::findSymbol ( rte::FormalRTESymbol const * const symbol, const std::set < GlushkovSymbol > & symbolSet ) { - auto it = find_if ( symbolSet.begin ( ), symbolSet.end ( ), [symbol] ( GlushkovSymbol const & gs ) -> bool { - return gs.getSymbolPtr ( ) == symbol; - } ); - - if ( it == symbolSet.end ( ) ) - throw exception::CommonException ( "GlushkovTraversal RTE - Can not find GlushkovSymbol for regexp node. Probably symbol from constant alphabet?" ); - - return * it; -} - -bool GlushkovTraversal::pos ( GlushkovSymbol const & symbol, rte::FormalRTE const * const & node ) { - return pos ( & node->getRTE ( ), symbol.getSymbolPtr ( ) ); -} - -std::set < GlushkovSymbol > GlushkovTraversal::first ( rte::FormalRTE const & re ) { - std::set < GlushkovSymbol > firstSet, symbolSet = getSymbols ( re ); - - for ( auto const & s : first ( & re.getRTE ( ) ) ) - // std::cerr << "In first set: " << s -> getSymbol ( ) << std::endl; - firstSet.insert ( findSymbol ( s, symbolSet ) ); - - return firstSet; -} - -/* - * std::set<GlushkovSymbol> GlushkovTraversal::last( rte::FormalRTE const& re ) - * { - * std::set<GlushkovSymbol> lastSet, symbolSet = getSymbols( re ); - * - * for( auto const& s : last( & re.getRTE() ) ) - * lastSet.insert( findSymbol( s, symbolSet ) ); - * - * return lastSet; - * } - */ - -std::set < std::vector < GlushkovSymbol > > GlushkovTraversal::follow ( rte::FormalRTE const & re, GlushkovSymbol const & symbol ) { - std::set < GlushkovSymbol > symbolSet = getSymbols ( re ); - std::set < std::vector < GlushkovSymbol > > followSet; - std::set < alphabet::RankedSymbol > alphabetK = re.getConstantAlphabet ( ); - std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > symbMap; - - for ( const auto & csymbol : alphabetK ) - symbMap.insert ( std::make_pair ( csymbol, std::set < rte::FormalRTESymbol const * > ( ) ) ); - - for ( const auto & f : follow ( & re.getRTE ( ), symbol.getSymbolPtr ( ), alphabetK, symbMap ) ) { - std::vector < GlushkovSymbol > curr; - - for ( const auto & s : f ) - curr.push_back ( findSymbol ( s, symbolSet ) ); - - followSet.insert ( curr ); - } - - return followSet; -} - -// ----------------------------------------------------------------------------- - -std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTEElement const * const & node ) { - rte::FormalRTEAlternation const * const alternation = dynamic_cast < rte::FormalRTEAlternation const * const > ( node ); - rte::FormalRTESubstitution const * const concatenation = dynamic_cast < rte::FormalRTESubstitution const * const > ( node ); - rte::FormalRTEIteration const * const iteration = dynamic_cast < rte::FormalRTEIteration const * const > ( node ); - rte::FormalRTESymbol const * const symbol = dynamic_cast < rte::FormalRTESymbol const * const > ( node ); - rte::FormalRTEEmpty const * const empty = dynamic_cast < rte::FormalRTEEmpty const * const > ( node ); - - if ( alternation ) - return first ( alternation ); - else if ( concatenation ) - return first ( concatenation ); - else if ( iteration ) - return first ( iteration ); - else if ( empty ) - return first ( empty ); - else if ( symbol ) - return first ( symbol ); - - throw exception::CommonException ( "GlushkovTraversal::first - invalid RegExpElement node" ); -} - -std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTEAlternation const * const & node ) { - std::set < rte::FormalRTESymbol const * > ret, tmp; - - tmp = first ( & node->getLeftElement ( ) ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - tmp = first ( & node->getRightElement ( ) ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - return ret; -} - -std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTESubstitution const * const & node ) { - std::set < rte::FormalRTESymbol const * > ret, tmp; - - tmp = first ( & node->getLeftElement ( ) ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - auto it = std::find_if ( ret.begin ( ), ret.end ( ), [node] ( rte::FormalRTESymbol const * a ) { - return * a == node->getSubstitutionSymbol ( ); - } ); - - if ( it != ret.end ( ) ) { - ret.erase ( it ); - tmp = first ( & node->getRightElement ( ) ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - } - - return ret; -} - -std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTEIteration const * const & node ) { - std::set < rte::FormalRTESymbol const * > ret = first ( & node->getElement ( ) ); - - ret.insert ( & node->getSubstitutionSymbol ( ) ); - return ret; -} - -std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTESymbol const * const & node ) { - return std::set < rte::FormalRTESymbol const * > { node }; -} - -std::set < rte::FormalRTESymbol const * > GlushkovTraversal::first ( rte::FormalRTEEmpty const * const & /* node */ ) { - return std::set < rte::FormalRTESymbol const * > ( ); -} - -// ---------------------------------------------------------------------------- - -/* - * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTEElement const * const & node ) - * { - * rte::FormalRTEAlternation const * const alternation = dynamic_cast<rte::FormalRTEAlternation const * const>( node ); - * rte::FormalRTESubstitution const * const concatenation = dynamic_cast<rte::FormalRTESubstitution const * const>( node ); - * rte::FormalRTEIteration const * const iteration = dynamic_cast<rte::FormalRTEIteration const * const>( node ); - * rte::FormalRTESymbol const * const symbol = dynamic_cast<rte::FormalRTESymbol const * const>( node ); - * rte::FormalRTEEmpty const * const empty = dynamic_cast<rte::FormalRTEEmpty const * const>( node ); - * - * if( symbol ) - * return last( symbol ); - * else if( alternation ) - * return last( alternation ); - * else if( concatenation ) - * return last( concatenation ); - * else if( iteration ) - * return last( iteration ); - * else if( empty ) - * return last( empty ); - * - * throw exception::CommonException( "GlushkovTraversal::last - invalid RegExpElement node" ); - * } - * - * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTEAlternation const * const & node ) - * { - * std::set<rte::FormalRTESymbol const *> ret; - * - * for( auto const& element : node->getElements( ) ) - * { - * std::set<rte::FormalRTESymbol const *> tmp = last( element ); - * ret.insert( tmp.begin( ), tmp.end( ) ); - * } - * - * return ret; - * } - * - * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTESubstitution const * const & node ) - * { - * std::set<rte::FormalRTESymbol const *> ret, tmp; - * - * for( auto it = node->getElements( ).rbegin( ); it != node->getElements( ).rend( ) ; it ++ ) - * { - * tmp = last( *it ); - * ret.insert( tmp.begin( ), tmp.end( ) ); - * - * if( ! rte::properties::RegExpEpsilon::languageContainsEpsilon(**it) ) - * break; - * } - * - * return ret; - * } - * - * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTEIteration const * const & node ) - * { - * return last( & node->getElement( ) ); - * } - * - * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTESymbol const * const & node ) - * { - * return std::set<rte::FormalRTESymbol const *> { node }; - * } - * - * std::set<rte::FormalRTESymbol const *> GlushkovTraversal::last( rte::FormalRTEEmpty const * const & node ) - * { - * return std::set<rte::FormalRTESymbol const *>( ); - * } - */ - -// ---------------------------------------------------------------------------- - -std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTEElement const * const & node, rte::FormalRTESymbol const * const & symbolptr, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { - rte::FormalRTEAlternation const * const alternation = dynamic_cast < rte::FormalRTEAlternation const * const > ( node ); - rte::FormalRTESubstitution const * const concatenation = dynamic_cast < rte::FormalRTESubstitution const * const > ( node ); - rte::FormalRTEIteration const * const iteration = dynamic_cast < rte::FormalRTEIteration const * const > ( node ); - rte::FormalRTESymbol const * const symbol = dynamic_cast < rte::FormalRTESymbol const * const > ( node ); - rte::FormalRTEEmpty const * const empty = dynamic_cast < rte::FormalRTEEmpty const * const > ( node ); - - if ( alternation ) - return follow ( alternation, symbolptr, alphabetK, subMap ); - - else if ( concatenation ) - return follow ( concatenation, symbolptr, alphabetK, subMap ); - - else if ( iteration ) - return follow ( iteration, symbolptr, alphabetK, subMap ); - - else if ( symbol ) - return follow ( symbol, symbolptr, alphabetK, subMap ); - - else if ( empty ) - return follow ( empty, symbolptr, alphabetK, subMap ); - - throw exception::CommonException ( "GlushkovTraversal::follow() - unknown RegExpElement node" ); -} - -#include <iostream> -std::ostream & operator <<( std::ostream & os, const std::set < std::vector < rte::FormalRTESymbol const * > > & x ) { - os << "{"; - - for ( const auto & f : x ) { - os << "["; - - for ( const auto & ff : f ) - os << ff->getSymbol ( ).getSymbol ( ) << "(" << ff->getSymbol ( ).getRank ( ) << "), "; - - os << "], "; - } - - os << "}"; - return os; -} - -std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTEAlternation const * const & node, rte::FormalRTESymbol const * const & symbolptr, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { - std::set < std::vector < rte::FormalRTESymbol const * > > ret, tmp; - - tmp = follow ( & node->getLeftElement ( ), symbolptr, alphabetK, subMap ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - tmp = follow ( & node->getRightElement ( ), symbolptr, alphabetK, subMap ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - - // std::cerr << "FollowAlt(" << symbolptr->getSymbol ( ) << "): " << ret << std::endl; - - return ret; -} - -std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTESubstitution const * const & node, rte::FormalRTESymbol const * const & symbolptr, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { - - std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > subMap2 ( subMap ); - auto itMap = subMap2.find ( node->getSubstitutionSymbol ( ).getSymbol ( ) ); - - itMap->second.clear ( ); - - for ( const auto & s : first ( & node->getRightElement ( ) ) ) - itMap->second.insert ( s ); - - /* - * E sub F - * 1. if symbolptr in F subtree, then Follow(F, symbolptr); - * 2. if symbolptr in E subtree, then Follow(E, symbolptr); - */ - - std::set < std::vector < rte::FormalRTESymbol const * > > ret; - - if ( pos ( & node->getLeftElement ( ), symbolptr ) ) - ret = follow ( & node->getLeftElement ( ), symbolptr, alphabetK, subMap2 ); - else - ret = follow ( & node->getRightElement ( ), symbolptr, alphabetK, subMap ); - - // std::cerr << "FollowSub(" << symbolptr->getSymbol ( ) << "): " << ret << std::endl; - return ret; -} - -std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTEIteration const * const & node, rte::FormalRTESymbol const * const & symbolptr, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { - - std::set < std::vector < rte::FormalRTESymbol const * > > ret; - - for ( const auto & s : first ( & node->getElement ( ) ) ) - subMap[node->getSubstitutionSymbol ( ).getSymbol ( )].insert ( s ); - - ret = follow ( & node->getElement ( ), symbolptr, alphabetK, subMap ); - - // std::cerr << "FollowIter(" << symbolptr->getSymbol ( ) << "): " << ret << std::endl; - return ret; -} - -void preprocessSubMap ( const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { - bool change = true; - - while ( change ) { - change = false; - - for ( auto & kv : subMap ) { - std::set < rte::FormalRTESymbol const * > & value = kv.second; - - for ( const auto & v : value ) { - if ( alphabetK.find ( v->getSymbol ( ) ) == alphabetK.end ( ) ) continue; - - // std::cerr << "Gonna replace in submap: " << kv.first << ": " << v->getSymbol() << std::endl; - - auto it = subMap.find ( v->getSymbol ( ) ); - size_t vsize = value.size ( ); - value.insert ( it->second.begin ( ), it->second.end ( ) ); - change = ( vsize != value.size ( ) ); - value.erase ( v ); - - /* - * for(const auto& x : it->second) - * std::cerr << "\t" << x->getSymbol() << std::endl; - */ - } - } - } -} - -std::set < std::vector < rte::FormalRTESymbol const * > > replaceConstants ( const std::set < alphabet::RankedSymbol > & alphabetK, const std::vector < std::smart_ptr < const rte::FormalRTESymbol > > & f, const std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap2 ) { - // std::cerr << " replC for " << f << std::endl; - - std::set < std::vector < rte::FormalRTESymbol const * > > ret; - - if ( f.size ( ) == 0 ) return ret; - - std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > subMap ( subMap2 ); - preprocessSubMap ( alphabetK, subMap ); - - auto subIt = subMap.find ( f[0]->getSymbol ( ) ); - - if ( subIt == subMap.end ( ) ) { - std::vector < rte::FormalRTESymbol const * > v; - v.push_back ( f[0].get() ); - ret.insert ( v ); - } else { - for ( const auto & subSymbol : subIt->second ) { - std::vector < rte::FormalRTESymbol const * > v; - v.push_back ( subSymbol ); - ret.insert ( v ); - } - } - - for ( size_t i = 1; i < f.size ( ); i++ ) { - std::set < std::vector < rte::FormalRTESymbol const * > > tmp; - - subIt = subMap.find ( f[i]->getSymbol ( ) ); - - if ( subIt == subMap.end ( ) ) - for ( const auto & r : ret ) { - std::vector < rte::FormalRTESymbol const * > v = r; - v.push_back ( f[i].get() ); - tmp.insert ( v ); - } - - else - - for ( const auto & r : ret ) - for ( const auto & subSymbol : subIt->second ) { - std::vector < rte::FormalRTESymbol const * > v = r; - v.push_back ( subSymbol ); - tmp.insert ( v ); - } - - ret = tmp; - } - - // std::cerr << "ReplC for " << f << " rets:" << std::endl; - - /* - * for ( const auto & r : ret ) { - * std::cerr << "\t"; - * - * for ( const auto & x : r ) - * std::cerr << " " << * x; - * - * std::cerr << std::endl; - * } - */ - - return ret; -} - -std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTESymbol const * const & node, rte::FormalRTESymbol const * const & symbolptr, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subMap ) { - std::set < std::vector < rte::FormalRTESymbol const * > > ret, tmp; - const std::vector < std::smart_ptr < const rte::FormalRTESymbol > > & children = node->getElements ( ); - - if ( symbolptr == node ) { - ret = replaceConstants ( alphabetK, children, subMap ); - - // std::cerr << "FollowSymbol1(" << symbolptr->getSymbol ( ) << "): " << ret << std::endl; - return ret; - } - - for ( const auto & c : children ) { - tmp = follow ( c.get(), symbolptr, alphabetK, subMap ); - ret.insert ( tmp.begin ( ), tmp.end ( ) ); - } - - return ret; -} - -std::set < std::vector < rte::FormalRTESymbol const * > > GlushkovTraversal::follow ( rte::FormalRTEEmpty const * const & /* node */, rte::FormalRTESymbol const * const & /* symbolptr */, const std::set < alphabet::RankedSymbol > & /* alphabetK */, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & /* subMap */ ) { - return std::set < std::vector < rte::FormalRTESymbol const * > > ( ); -} - -// ---------------------------------------------------------------------------- - -bool GlushkovTraversal::pos ( rte::FormalRTEElement const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { - rte::FormalRTEAlternation const * const alternation = dynamic_cast < rte::FormalRTEAlternation const * const > ( node ); - rte::FormalRTEIteration const * const iteration = dynamic_cast < rte::FormalRTEIteration const * const > ( node ); - rte::FormalRTESubstitution const * const concatenation = dynamic_cast < rte::FormalRTESubstitution const * const > ( node ); - rte::FormalRTESymbol const * const symbol = dynamic_cast < rte::FormalRTESymbol const * const > ( node ); - rte::FormalRTEEmpty const * const empty = dynamic_cast < rte::FormalRTEEmpty const * const > ( node ); - - if ( alternation ) - return pos ( alternation, symbolptr ); - - else if ( concatenation ) - return pos ( concatenation, symbolptr ); - - else if ( iteration ) - return pos ( iteration, symbolptr ); - - else if ( symbol ) - return pos ( symbol, symbolptr ); - - else if ( empty ) - return pos ( empty, symbolptr ); - - throw exception::CommonException ( "GlushkovTraversal::pos() - unknown RTE node" ); -} - -bool GlushkovTraversal::pos ( rte::FormalRTEAlternation const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { - return pos ( & node->getLeftElement ( ), symbolptr ) || pos ( & node->getRightElement ( ), symbolptr ); -} - -bool GlushkovTraversal::pos ( rte::FormalRTESubstitution const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { - return pos ( & node->getLeftElement ( ), symbolptr ) || pos ( & node->getRightElement ( ), symbolptr ); -} - -bool GlushkovTraversal::pos ( rte::FormalRTEIteration const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { - return pos ( & node->getElement ( ), symbolptr ); -} - -bool GlushkovTraversal::pos ( rte::FormalRTESymbol const * const & node, rte::FormalRTESymbol const * const & symbolptr ) { - if ( symbolptr == node ) return true; - - for ( auto const & element : node->getElements ( ) ) - if ( pos ( element.get(), symbolptr ) ) - return true; - - return false; -} - -bool GlushkovTraversal::pos ( rte::FormalRTEEmpty const * const & /* node */, rte::FormalRTESymbol const * const & /* symbolptr */ ) { - return false; -} - -// ---------------------------------------------------------------------------- - -std::set < GlushkovSymbol > GlushkovTraversal::getSymbols ( rte::FormalRTE const & re ) { - std::set < GlushkovSymbol > posSet; - int i = 1; - - std::set < alphabet::RankedSymbol > alphabetF = re.getAlphabet ( ); - - getSymbols ( & re.getRTE ( ), alphabetF, posSet, i ); - - return posSet; -} - -void GlushkovTraversal::getSymbols ( rte::FormalRTEElement const * const & node, const std::set < alphabet::RankedSymbol > & alphabetF, std::set < GlushkovSymbol > & posSet, int & i ) { - rte::FormalRTEAlternation const * const alternation = dynamic_cast < rte::FormalRTEAlternation const * const > ( node ); - rte::FormalRTEIteration const * const iteration = dynamic_cast < rte::FormalRTEIteration const * const > ( node ); - rte::FormalRTESubstitution const * const substitution = dynamic_cast < rte::FormalRTESubstitution const * const > ( node ); - rte::FormalRTESymbol const * const symbol = dynamic_cast < rte::FormalRTESymbol const * const > ( node ); - rte::FormalRTEEmpty const * const empty = dynamic_cast < rte::FormalRTEEmpty const * const > ( node ); - - if ( symbol ) { - if ( alphabetF.count ( symbol->getSymbol ( ) ) ) - posSet.insert ( GlushkovSymbol ( symbol, i++ ) ); - - for ( const auto & c : symbol->getElements ( ) ) - getSymbols ( c.get(), alphabetF, posSet, i ); - - return; - } else if ( alternation ) { - getSymbols ( & alternation->getLeftElement ( ), alphabetF, posSet, i ); - getSymbols ( & alternation->getRightElement ( ), alphabetF, posSet, i ); - return; - } else if ( substitution ) { - getSymbols ( & substitution->getLeftElement ( ), alphabetF, posSet, i ); - getSymbols ( & substitution->getRightElement ( ), alphabetF, posSet, i ); - return; - } else if ( iteration ) { - getSymbols ( & iteration->getElement ( ), alphabetF, posSet, i ); - return; - } else if ( empty ) { - return; - } - - throw exception::CommonException ( "GlushkovTraversal::getSymbols() - unknown RegExpElement node" ); -} - -} /* namespace rte */ diff --git a/alib2algo/src/rte/GlushkovTraversal.h b/alib2algo/src/rte/GlushkovTraversal.h deleted file mode 100644 index 0b728051ac157275b1af0e07663f6cd3fe953da3..0000000000000000000000000000000000000000 --- a/alib2algo/src/rte/GlushkovTraversal.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * GlushkovTraversal.h - * - * Created on: 14. 4. 2016 - * Author: Tomas Pecka - */ - -#ifndef RTE_GLUSHKOVTRAVERSAL_H_ -#define RTE_GLUSHKOVTRAVERSAL_H_ - -#include <set> -#include <map> -#include <vector> -#include <algorithm> - -#include <rte/formal/FormalRTE.h> -#include <rte/formal/FormalRTEElement.h> -#include <rte/formal/FormalRTEAlternation.h> -#include <rte/formal/FormalRTESubstitution.h> -#include <rte/formal/FormalRTEIteration.h> -#include <rte/formal/FormalRTESymbol.h> -#include <rte/formal/FormalRTEEmpty.h> - -#include <exception/CommonException.h> - -#include <alphabet/RankedSymbol.h> - -#include "GlushkovSymbol.h" -#include "GlushkovPair.h" - -namespace rte { - -class GlushkovTraversal { -public: - /** - * @param re rte to probe - * @return all rteSymbols whichcan start the word. - */ - static std::set < GlushkovSymbol > first ( rte::FormalRTE const & re ); - - /** - * @param re rte to probe - * @return all rteSymbols that can terminate the word. - */ - static std::set < GlushkovSymbol > last ( rte::FormalRTE const & re ); - - /** - * @param re rte to probe - * @param symbol GlushkovSymbol for which we need the follow() - * @return all symbols that can follow specific symbol in word - */ - static std::set < std::vector < GlushkovSymbol > > follow ( rte::FormalRTE const & re, GlushkovSymbol const & symbol ); - - /** - * @param re rte to probe - * @return symbols of rte tree in order of they occurence in rte. - */ - static std::set < GlushkovSymbol > getSymbols ( rte::FormalRTE const & re ); - -private: - /** - * @param symbol ptr to symbol - * @param symbolSet set of gl.symbols - * @return GlushkovSymbol equivalent for rteSymbol pointer - */ - static GlushkovSymbol const & findSymbol ( rte::FormalRTESymbol const * const symbol, const std::set < GlushkovSymbol > & symbolSet ); - - /** - * @return bool true if symbol pointer is in this subtree - */ - static bool pos ( GlushkovSymbol const & symbol, rte::FormalRTE const * const & node ); - - static void getSymbols ( rte::FormalRTEElement const * const & node, const std::set < alphabet::RankedSymbol > & alphabetF, std::set < GlushkovSymbol > & posSet, int & i ); - - static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTEElement const * const & node ); - static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTEAlternation const * const & node ); - static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTESubstitution const * const & node ); - static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTEIteration const * const & node ); - static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTESymbol const * const & node ); - static std::set < rte::FormalRTESymbol const * > first ( rte::FormalRTEEmpty const * const & node ); - - static bool pos ( rte::FormalRTEElement const * const & node, rte::FormalRTESymbol const * const & symbSearch ); - static bool pos ( rte::FormalRTEAlternation const * const & node, rte::FormalRTESymbol const * const & symbSearch ); - static bool pos ( rte::FormalRTESubstitution const * const & node, rte::FormalRTESymbol const * const & symbSearch ); - static bool pos ( rte::FormalRTEIteration const * const & node, rte::FormalRTESymbol const * const & symbSearch ); - static bool pos ( rte::FormalRTESymbol const * const & node, rte::FormalRTESymbol const * const & symbSearch ); - static bool pos ( rte::FormalRTEEmpty const * const & node, rte::FormalRTESymbol const * const & symbSearch ); - - static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTEElement const * const & node, rte::FormalRTESymbol const * const & symbptr, const std::set<alphabet::RankedSymbol>& alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subM ); - static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTEAlternation const * const & node, rte::FormalRTESymbol const * const & symbptr, const std::set<alphabet::RankedSymbol>& alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subM ); - static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTESubstitution const * const & node, rte::FormalRTESymbol const * const & symbptr, const std::set<alphabet::RankedSymbol>& alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subM ); - static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTEIteration const * const & node, rte::FormalRTESymbol const * const & symbptr, const std::set<alphabet::RankedSymbol>& alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subM ); - static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTESymbol const * const & node, rte::FormalRTESymbol const * const & symbptr, const std::set<alphabet::RankedSymbol>& alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subM ); - static std::set < std::vector < rte::FormalRTESymbol const * > > follow ( rte::FormalRTEEmpty const * const & node, rte::FormalRTESymbol const * const & symbptr, const std::set<alphabet::RankedSymbol>& alphabetK, std::map < alphabet::RankedSymbol, std::set < rte::FormalRTESymbol const * > > & subM ); -}; - -} /* namespace rte */ - -#endif /* RTE_GLUSHKOVTRAVERSAL_H_ */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.cpp b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.cpp index 49f821f1e50f6afe463e99599bf7138b196d2cd6..d3829002019aeae4ec03e3eba027bb3b0ddb0ab0 100644 --- a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.cpp +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.cpp @@ -7,18 +7,19 @@ #include "ToPostfixPushdownAutomatonGlushkov.h" -#include "label/Label.h" -#include "label/LabelPairLabel.h" -#include "label/ObjectLabel.h" #include "alphabet/BottomOfTheStackSymbol.h" #include "alphabet/EndSymbol.h" +#include "label/Label.h" +#include "label/LabelPairLabel.h" #include <automaton/Automaton.h> -#include "object/Object.h" #include "global/GlobalData.h" +#include "object/Object.h" + +#include "../glushkov/GlushkovTraversal.h" -#include "../GlushkovTraversal.h" +#include <iterator> namespace rte { @@ -28,103 +29,110 @@ automaton::NPDA ToPostfixPushdownAutomatonGlushkov::convert ( const rte::RTE & r return dispatch ( rte.getData ( ) ); } -std::vector < alphabet::Symbol > phi ( const std::vector < GlushkovSymbol > & follow, const std::map < GlushkovSymbol, alphabet::Symbol > & pos_stack_map ) { - std::vector < alphabet::Symbol > ret; +std::vector < alphabet::Symbol > phi ( const std::vector < alphabet::RankedSymbol > & follow ) { + return std::vector < alphabet::Symbol > ( follow.begin ( ), follow.end ( ) ); +} - for ( const GlushkovSymbol & f : follow ) { - auto it = pos_stack_map.find ( f ); +bool isSubstSymbolPresent ( const std::set < alphabet::RankedSymbol > & container, const std::set < alphabet::RankedSymbol > & substAlphabet ) { + std::vector < alphabet::RankedSymbol > intersection; + std::set_intersection ( container.begin ( ), container.end ( ), substAlphabet.begin ( ), substAlphabet.end ( ), std::back_inserter ( intersection ) ); + return intersection.size ( ) > 0; +} - if ( it == pos_stack_map.end ( ) ) - throw exception::CommonException ( "GlushkovRTE: phi: symbol not in pos. Probably constant? Invalid RTE!" ); +automaton::NPDA ToPostfixPushdownAutomatonGlushkov::convert ( const rte::FormalRTE & rte ) { - ret.push_back ( it->second ); - } + // step 1; index RTE + rte::FormalRTE indexedRTE = rte::GlushkovTraversal::index ( rte ); - return ret; -} + // step 2; compute: + // - first set + const std::set < alphabet::RankedSymbol > firstSet = rte::GlushkovTraversal::first ( indexedRTE ); -automaton::NPDA ToPostfixPushdownAutomatonGlushkov::convert ( const rte::FormalRTE & rte ) { + // - follow set for every element of (non-indexed) RTE alphabet element + std::map < alphabet::RankedSymbol, std::set < std::vector < alphabet::RankedSymbol > > > followSet; + + for ( const alphabet::RankedSymbol & symbol : indexedRTE.getAlphabet ( ) ) + followSet.insert ( std::make_pair ( symbol, rte::GlushkovTraversal::follow ( indexedRTE, symbol ) ) ); + + /* check for exceptions -> there must be NO substitution symbol in first or follow sets */ + if ( isSubstSymbolPresent ( firstSet, rte.getConstantAlphabet ( ) ) ) + throw exception::CommonException ( "GlushkovRTE: Substitution symbol appeared in the first set" ); + + for ( const auto & kv : followSet ) + for ( const auto & followTuple : kv.second ) + if ( isSubstSymbolPresent ( std::set < alphabet::RankedSymbol > ( followTuple.begin ( ), followTuple.end ( ) ), rte.getConstantAlphabet ( ) ) ) + throw exception::CommonException ( "GlushkovRTE: Substitution symbol appeared in a follow set" ); + + /* check end */ + + // step 3; create PDA (w/o transitions yet) and initialize input alphabet = (non-indexed) RTE alphabet and END symbol automaton::State q ( label::labelFrom ( 'q' ) ); automaton::State f ( label::labelFrom ( 'f' ) ); automaton::NPDA automaton ( q, alphabet::Symbol { alphabet::BottomOfTheStackSymbol::BOTTOM_OF_THE_STACK } ); - // step 3 automaton.addState ( f ); automaton.addFinalState ( f ); - // step 4 for ( const alphabet::RankedSymbol & symbol : rte.getAlphabet ( ) ) automaton.addInputSymbol ( symbol.getSymbol ( ) ); automaton.addInputSymbol ( alphabet::Symbol { alphabet::EndSymbol::END } ); - std::set < GlushkovSymbol > posSet = rte::GlushkovTraversal::getSymbols ( rte ); - std::set < GlushkovSymbol > firstSet = rte::GlushkovTraversal::first ( rte ); - std::map < GlushkovSymbol, std::set < std::vector < GlushkovSymbol > > > followSet; - - for ( const auto & symbol : posSet ) - followSet.insert ( std::make_pair ( symbol, rte::GlushkovTraversal::follow ( rte, symbol ) ) ); + // step 4; create pushdown store alphabet; it consists of elements of indexed RTE alphabet and BotS symbol + for ( const alphabet::RankedSymbol & symb : indexedRTE.getAlphabet ( ) ) + automaton.addPushdownStoreSymbol ( alphabet::Symbol ( symb ) ); /* DEBUG */ if ( common::GlobalData::verbose ) { - std::cerr << "Pos:" << std::endl; + std::cerr << "RTE:" << std::endl; + + for ( const auto & symbol : indexedRTE.getAlphabet ( ) ) + std::cerr << "\t" << symbol << std::endl; - for ( const auto & symbol : posSet ) std::cerr << "\t" << symbol.getInputSymbol ( ) << " id=" << symbol.getId ( ) << " ptr=" << symbol.getSymbolPtr ( ) << std::endl; + std::cerr << std::endl; - std::cerr << "First:" << std::endl; + std::cerr << "First(RTE):" << std::endl; - for ( const auto & symbol : firstSet ) std::cerr << "\t" << symbol.getInputSymbol ( ) << " id=" << symbol.getId ( ) << " ptr=" << symbol.getSymbolPtr ( ) << std::endl; + for ( const auto & symbol : firstSet ) + std::cerr << "\t" << symbol << std::endl; - std::cerr << "Follow:" << std::endl; + std::cerr << std::endl; for ( const auto & kv : followSet ) { - std::cerr << "\t" << kv.first.getInputSymbol ( ) << " id=" << kv.first.getId ( ) << " ptr=0x" << kv.first.getSymbolPtr ( ) << std::endl; + std::cerr << "Follow(RTE, " << kv.first << "):" << std::endl; + + if ( kv.second.empty ( ) ) + std::cerr << "\t" << "{}" << std::endl; for ( const auto & f : kv.second ) { for ( const auto & symbol : f ) - std::cerr << "\t\t" << symbol.getInputSymbol ( ) << " id=" << symbol.getId ( ) << " ptr=" << symbol.getSymbolPtr ( ) << std::endl; + std::cerr << "\t" << symbol << std::endl; std::cerr << std::endl; } + + std::cerr << std::endl; } } - /* DEBUG END */ - std::map < GlushkovSymbol, alphabet::Symbol > pos_stack_map; - - for ( const GlushkovSymbol & gsymb : posSet ) { - auto label = label::Label ( label::LabelPairLabel ( std::make_pair ( label::Label ( label::ObjectLabel ( alib::Object ( gsymb.getInputSymbol ( ).getSymbol ( ).getData ( ) ) ) ), label::labelFrom ( gsymb.getId ( ) ) ) ) ); - - pos_stack_map.insert ( std::make_pair ( gsymb, alphabet::Symbol ( alphabet::LabeledSymbol ( label ) ) ) ); - automaton.addPushdownStoreSymbol ( alphabet::Symbol ( alphabet::LabeledSymbol ( label ) ) ); - } - - for ( const GlushkovSymbol & symb : posSet ) { - auto it = pos_stack_map.find ( symb ); - - if ( it == pos_stack_map.end ( ) ) - throw exception::CommonException ( "GlushkovRTE: fail" ); - - if ( symb.getInputSymbol ( ).getRank ( ) == primitive::Unsigned ( 0 ) ) - automaton.addTransition ( q, symb.getInputSymbol ( ).getSymbol ( ), { }, q, { it->second } ); + for ( const alphabet::RankedSymbol & symb : indexedRTE.getAlphabet ( ) ) { + if ( symb.getRank ( ) == primitive::Unsigned ( 0 ) ) + automaton.addTransition ( q, rte::GlushkovTraversal::getSymbolFromGlushkovPair ( symb ).getSymbol ( ), { }, q, { alphabet::Symbol ( symb ) } ); else - for ( const std::vector < GlushkovSymbol > & f : followSet[symb] ) { - std::vector < alphabet::Symbol > fstring = phi ( f, pos_stack_map ); + for ( const std::vector < alphabet::RankedSymbol > & f : followSet[symb] ) { + std::vector < alphabet::Symbol > fstring = phi ( f ); std::reverse ( fstring.begin ( ), fstring.end ( ) ); - automaton.addTransition ( q, symb.getInputSymbol ( ).getSymbol ( ), fstring, q, { it->second } ); + automaton.addTransition ( q, rte::GlushkovTraversal::getSymbolFromGlushkovPair ( symb ).getSymbol ( ), fstring, q, { alphabet::Symbol ( symb ) } ); } } - for ( const GlushkovSymbol & symb : firstSet ) { - auto it = pos_stack_map.find ( symb ); - - if ( it == pos_stack_map.end ( ) ) - throw exception::CommonException ( "GlushkovRTE: fail2" ); - - automaton.addTransition ( q, alphabet::Symbol { alphabet::EndSymbol::END }, { it->second, alphabet::Symbol { alphabet::BottomOfTheStackSymbol::BOTTOM_OF_THE_STACK } - }, f, { } ); + for ( const alphabet::RankedSymbol & symb : firstSet ) { + std::vector < alphabet::Symbol > pop; + pop.push_back ( alphabet::Symbol ( symb ) ); + pop.push_back ( alphabet::Symbol ( alphabet::BottomOfTheStackSymbol::BOTTOM_OF_THE_STACK ) ); + automaton.addTransition ( q, alphabet::Symbol { alphabet::EndSymbol::END }, pop, f, { } ); } return automaton; @@ -132,14 +140,6 @@ automaton::NPDA ToPostfixPushdownAutomatonGlushkov::convert ( const rte::FormalR auto ToAutomatonGlushkovFormalRegExp = ToPostfixPushdownAutomatonGlushkov::RegistratorWrapper < automaton::NPDA, rte::FormalRTE > ( ToPostfixPushdownAutomatonGlushkov::convert ); -/* - * automaton::NPDA ToPostfixPushdownAutomatonGlushkov::convert(const rte::UnboundedRTE& rte) - * { - * throw exception::CommonException("Glushkov: Converting Unbounded RTE to postfix rhPDA NYI"); // TODO - * } - * - * auto ToAutomatonGlushkovUnboundedRegExp = ToAutomatonGlushkov::RegistratorWrapper<automaton::NPDA, rte::UnboundedRTE>(ToAutomatonGlushkov::getInstance(), ToAutomatonGlushkov::convert); - */ } /* namespace convert */ } /* namespace rte */ diff --git a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.h b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.h index 1019d8255206ef62cc3171787e333755097d3fa2..e874e0f04264305cdbd4f7df9da31ef2e8dbdecc 100644 --- a/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.h +++ b/alib2algo/src/rte/convert/ToPostfixPushdownAutomatonGlushkov.h @@ -12,9 +12,9 @@ #include <map> #include <automaton/FSM/NFA.h> +#include <automaton/PDA/NPDA.h> #include <rte/RTE.h> #include <rte/formal/FormalRTE.h> -#include <automaton/PDA/NPDA.h> // #include <rte/unbounded/UnboundedRegExp.h> @@ -23,8 +23,8 @@ namespace rte { namespace convert { /** - * Converts regular expression to finite automaton using Glushkov's NFA construction algorithm. - * Source: Melichar 2.107 + * Converts regular tree expression to real-time height-deterministic pda + * Source: Master Thesis, Pecka Tomas, CTU FIT, 2016, chapter 4.2 */ class ToPostfixPushdownAutomatonGlushkov : public std::SingleDispatch < ToPostfixPushdownAutomatonGlushkov, automaton::NPDA, rte::RTEBase > { public: diff --git a/alib2algo/src/rte/glushkov/GlushkovTraversal.cpp b/alib2algo/src/rte/glushkov/GlushkovTraversal.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c29726e2894de9a54beaae51bff337bcd87cd1be --- /dev/null +++ b/alib2algo/src/rte/glushkov/GlushkovTraversal.cpp @@ -0,0 +1,411 @@ +/* + * GlushkovTraversal.cpp + * + * Created on: 14. 4. 2016 + * Author: Tomas Pecka + */ + +#include "GlushkovTraversal.h" +#include <alphabet/SymbolPairSymbol.h> +#include <iterator> +#include <vector> + +namespace rte { + +alphabet::RankedSymbol GlushkovTraversal::getSymbolFromGlushkovPair ( const alphabet::RankedSymbol & symbol ) { + const std::pair < alphabet::Symbol, alphabet::Symbol > sps = ( ( const alphabet::SymbolPairSymbol & ) symbol.getSymbol ( ).getData ( ) ).getData ( ); + + return alphabet::RankedSymbol ( sps.first, symbol.getRank ( ) ); +} + +// ----------------------------------------------------------------------------- + +bool GlushkovTraversal::pos ( const alphabet::RankedSymbol & symbol, const rte::FormalRTE & rte ) { + return pos ( rte.getRTE ( ), symbol ); +} + +std::set < alphabet::RankedSymbol > GlushkovTraversal::first ( const rte::FormalRTE & rte ) { + return first ( rte.getRTE ( ) ); +} + +std::set < std::vector < alphabet::RankedSymbol > > GlushkovTraversal::follow ( const rte::FormalRTE & rte, const alphabet::RankedSymbol & symbol ) { + std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > subMap; + + /* Init substitution map, ie \forall a \in K: sub[a] = \emptyset */ + for ( const alphabet::RankedSymbol & ssymb : rte.getConstantAlphabet ( ) ) + subMap.insert ( std::make_pair ( ssymb, std::set < alphabet::RankedSymbol > { } ) ); + + /* recursively compute follow */ + return follow ( rte.getRTE ( ), symbol, rte.getConstantAlphabet ( ), subMap ); +} + +// ----------------------------------------------------------------------------- + +std::set < alphabet::RankedSymbol > GlushkovTraversal::first ( const rte::FormalRTEElement & node ) { + const rte::FormalRTEAlternation * alternation = dynamic_cast < const rte::FormalRTEAlternation * > ( & node ); + const rte::FormalRTESubstitution * concatenation = dynamic_cast < const rte::FormalRTESubstitution * > ( & node ); + const rte::FormalRTEIteration * iteration = dynamic_cast < const rte::FormalRTEIteration * > ( & node ); + const rte::FormalRTESymbolAlphabet * symbol = dynamic_cast < const rte::FormalRTESymbolAlphabet * > ( & node ); + const rte::FormalRTESymbolSubst * substSymbol = dynamic_cast < const rte::FormalRTESymbolSubst * > ( & node ); + const rte::FormalRTEEmpty * empty = dynamic_cast < const rte::FormalRTEEmpty * > ( & node ); + + if ( alternation ) + return first ( * alternation ); + else if ( concatenation ) + return first ( * concatenation ); + else if ( iteration ) + return first ( * iteration ); + else if ( empty ) + return first ( * empty ); + else if ( symbol ) + return first ( * symbol ); + else if ( substSymbol ) + return first ( * substSymbol ); + + throw exception::CommonException ( "GlushkovTraversal::first - invalid FormalRTEElement node" ); +} + +std::set < alphabet::RankedSymbol > GlushkovTraversal::first ( const rte::FormalRTEAlternation & node ) { + std::set < alphabet::RankedSymbol > ret, tmp; + + tmp = first ( node.getLeftElement ( ) ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + tmp = first ( node.getRightElement ( ) ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + return ret; +} + +std::set < alphabet::RankedSymbol > GlushkovTraversal::first ( const rte::FormalRTESubstitution & node ) { + std::set < alphabet::RankedSymbol > ret, tmp; + + tmp = first ( node.getLeftElement ( ) ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + // First() returns a set. hence only one occurrence. + auto it = std::find_if ( ret.begin ( ), ret.end ( ), [node] ( const alphabet::RankedSymbol & a ) { + return a == node.getSubstitutionSymbol ( ).getSymbol ( ); + } ); + + if ( it != ret.end ( ) ) { + ret.erase ( it ); + tmp = first ( node.getRightElement ( ) ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + } + + return ret; +} + +std::set < alphabet::RankedSymbol > GlushkovTraversal::first ( const rte::FormalRTEIteration & node ) { + std::set < alphabet::RankedSymbol > ret; + + ret = first ( node.getElement ( ) ); + ret.insert ( node.getSubstitutionSymbol ( ).getSymbol ( ) ); + return ret; +} + +std::set < alphabet::RankedSymbol > GlushkovTraversal::first ( const rte::FormalRTESymbolAlphabet & node ) { + return std::set < alphabet::RankedSymbol > { node.getSymbol ( ) }; +} + +std::set < alphabet::RankedSymbol > GlushkovTraversal::first ( const rte::FormalRTESymbolSubst & node ) { + return std::set < alphabet::RankedSymbol > { node.getSymbol ( ) }; +} + +std::set < alphabet::RankedSymbol > GlushkovTraversal::first ( const rte::FormalRTEEmpty & /* node */ ) { + return std::set < alphabet::RankedSymbol > ( ); +} + +// ---------------------------------------------------------------------------- + +std::set < std::vector < alphabet::RankedSymbol > > GlushkovTraversal::follow ( const rte::FormalRTEElement & node, const alphabet::RankedSymbol & symbolF, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subMap ) { + const rte::FormalRTEAlternation * alternation = dynamic_cast < const rte::FormalRTEAlternation * > ( & node ); + const rte::FormalRTESubstitution * concatenation = dynamic_cast < const rte::FormalRTESubstitution * > ( & node ); + const rte::FormalRTEIteration * iteration = dynamic_cast < const rte::FormalRTEIteration * > ( & node ); + const rte::FormalRTESymbolAlphabet * symbol = dynamic_cast < const rte::FormalRTESymbolAlphabet * > ( & node ); + const rte::FormalRTESymbolSubst * substSymbol = dynamic_cast < const rte::FormalRTESymbolSubst * > ( & node ); + const rte::FormalRTEEmpty * empty = dynamic_cast < const rte::FormalRTEEmpty * > ( & node ); + + if ( alternation ) + return follow ( * alternation, symbolF, alphabetK, subMap ); + + else if ( concatenation ) + return follow ( * concatenation, symbolF, alphabetK, subMap ); + + else if ( iteration ) + return follow ( * iteration, symbolF, alphabetK, subMap ); + + else if ( symbol ) + return follow ( * symbol, symbolF, alphabetK, subMap ); + + else if ( substSymbol ) + return follow ( * substSymbol, symbolF, alphabetK, subMap ); + + else if ( empty ) + return follow ( * empty, symbolF, alphabetK, subMap ); + + throw exception::CommonException ( "GlushkovTraversal::follow() - unknown FormalRTEElement node" ); +} + +std::set < std::vector < alphabet::RankedSymbol > > GlushkovTraversal::follow ( const rte::FormalRTEAlternation & node, const alphabet::RankedSymbol & symbolF, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subMap ) { + std::set < std::vector < alphabet::RankedSymbol > > ret, tmp; + + tmp = follow ( node.getLeftElement ( ), symbolF, alphabetK, subMap ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + tmp = follow ( node.getRightElement ( ), symbolF, alphabetK, subMap ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + + return ret; +} + +std::set < std::vector < alphabet::RankedSymbol > > GlushkovTraversal::follow ( const rte::FormalRTESubstitution & node, const alphabet::RankedSymbol & symbolF, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subMap ) { + + std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > subMap2 ( subMap ); + auto itMap = subMap2.find ( node.getSubstitutionSymbol ( ).getSymbol ( ) ); + + itMap->second.clear ( ); + + for ( const auto & s : first ( node.getRightElement ( ) ) ) + itMap->second.insert ( s ); + + /* + * E sub F + * 1. if symbolF in F subtree, then Follow(F, symbolF); + * 2. if symbolF in E subtree, then Follow(E, symbolF); + */ + + std::set < std::vector < alphabet::RankedSymbol > > ret; + + if ( pos ( node.getLeftElement ( ), symbolF ) ) + ret = follow ( node.getLeftElement ( ), symbolF, alphabetK, subMap2 ); + else + ret = follow ( node.getRightElement ( ), symbolF, alphabetK, subMap ); + + return ret; +} + +std::set < std::vector < alphabet::RankedSymbol > > GlushkovTraversal::follow ( const rte::FormalRTEIteration & node, const alphabet::RankedSymbol & symbolF, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subMap ) { + + std::set < std::vector < alphabet::RankedSymbol > > ret; + + for ( const auto & s : first ( node.getElement ( ) ) ) + subMap[node.getSubstitutionSymbol ( ).getSymbol ( )].insert ( s ); + + ret = follow ( node.getElement ( ), symbolF, alphabetK, subMap ); + + return ret; +} + +void preprocessSubMap ( const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subMap ) { + bool change = true; + + while ( change ) { + change = false; + + for ( auto & kv : subMap ) { + std::set < alphabet::RankedSymbol > & value = kv.second; + + for ( const auto & v : value ) { + if ( alphabetK.find ( v ) == alphabetK.end ( ) ) continue; + + // std::cerr << "Gonna replace in submap: " << kv.first << ": " << v->getSymbol() << std::endl; + + auto it = subMap.find ( v ); + size_t vsize = value.size ( ); + value.insert ( it->second.begin ( ), it->second.end ( ) ); + change = ( vsize != value.size ( ) ); + value.erase ( v ); + + /* + * for(const auto& x : it->second) + * std::cerr << "\t" << x->getSymbol() << std::endl; + */ + } + } + } +} + +std::set < std::vector < alphabet::RankedSymbol > > replaceConstants ( const std::set < alphabet::RankedSymbol > & alphabetK, const std::vector < alphabet::RankedSymbol > & f, const std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subMap2 ) { + std::set < std::vector < alphabet::RankedSymbol > > ret; + + if ( f.size ( ) == 0 ) return ret; + + std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > subMap ( subMap2 ); + preprocessSubMap ( alphabetK, subMap ); + + auto subIt = subMap.find ( f[0] ); + + if ( subIt == subMap.end ( ) ) { + std::vector < alphabet::RankedSymbol > v; + v.push_back ( f[0] ); + ret.insert ( v ); + } else { + for ( const auto & subSymbol : subIt->second ) { + std::vector < alphabet::RankedSymbol > v; + v.push_back ( subSymbol ); + ret.insert ( v ); + } + } + + for ( size_t i = 1; i < f.size ( ); i++ ) { + std::set < std::vector < alphabet::RankedSymbol > > tmp; + + subIt = subMap.find ( f[i] ); + + if ( subIt == subMap.end ( ) ) + for ( const auto & r : ret ) { + std::vector < alphabet::RankedSymbol > v = r; + v.push_back ( f[i] ); + tmp.insert ( v ); + } + + else + + for ( const auto & r : ret ) + for ( const auto & subSymbol : subIt->second ) { + std::vector < alphabet::RankedSymbol > v = r; + v.push_back ( subSymbol ); + tmp.insert ( v ); + } + + ret = tmp; + } + + return ret; +} + +std::set < std::vector < alphabet::RankedSymbol > > GlushkovTraversal::follow ( const rte::FormalRTESymbolAlphabet & node, const alphabet::RankedSymbol & symbolF, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subMap ) { + + std::set < std::vector < alphabet::RankedSymbol > > ret, tmp; + + if ( symbolF == node.getSymbol ( ) ) { + std::vector < alphabet::RankedSymbol > children; + + for ( const std::smart_ptr < const rte::FormalRTESymbol > & c : node.getElements ( ) ) + children.push_back ( c->getSymbol ( ) ); + + ret = replaceConstants ( alphabetK, children, subMap ); + return ret; + } + + for ( const auto & c : node.getElements ( ) ) { + tmp = follow ( * c, symbolF, alphabetK, subMap ); + ret.insert ( tmp.begin ( ), tmp.end ( ) ); + } + + return ret; +} + +std::set < std::vector < alphabet::RankedSymbol > > GlushkovTraversal::follow ( const rte::FormalRTESymbolSubst & /* node */, const alphabet::RankedSymbol & /* symbolF */, const std::set < alphabet::RankedSymbol > & /* alphabetK */, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & /* subMap */ ) { + return std::set < std::vector < alphabet::RankedSymbol > > ( ); +} + +std::set < std::vector < alphabet::RankedSymbol > > GlushkovTraversal::follow ( const rte::FormalRTEEmpty & /* node */, const alphabet::RankedSymbol & /* symbolF */, const std::set < alphabet::RankedSymbol > & /* alphabetK */, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & /* subMap */ ) { + return std::set < std::vector < alphabet::RankedSymbol > > ( ); +} + +// ---------------------------------------------------------------------------- + +bool GlushkovTraversal::pos ( const rte::FormalRTEElement & node, const alphabet::RankedSymbol & symbolF ) { + const rte::FormalRTEAlternation * alternation = dynamic_cast < const rte::FormalRTEAlternation * > ( & node ); + const rte::FormalRTEIteration * iteration = dynamic_cast < const rte::FormalRTEIteration * > ( & node ); + const rte::FormalRTESubstitution * concatenation = dynamic_cast < const rte::FormalRTESubstitution * > ( & node ); + const rte::FormalRTESymbolAlphabet * symbol = dynamic_cast < const rte::FormalRTESymbolAlphabet * > ( & node ); + const rte::FormalRTESymbolSubst * substSymbol = dynamic_cast < const rte::FormalRTESymbolSubst * > ( & node ); + const rte::FormalRTEEmpty * empty = dynamic_cast < const rte::FormalRTEEmpty * > ( & node ); + + if ( alternation ) + return pos ( * alternation, symbolF ); + + else if ( concatenation ) + return pos ( * concatenation, symbolF ); + + else if ( iteration ) + return pos ( * iteration, symbolF ); + + else if ( symbol ) + return pos ( * symbol, symbolF ); + + else if ( substSymbol ) + return pos ( * substSymbol, symbolF ); + + else if ( empty ) + return pos ( * empty, symbolF ); + + throw exception::CommonException ( "GlushkovTraversal::pos() - unknown FormalRTEElement node" ); +} + +bool GlushkovTraversal::pos ( const rte::FormalRTEAlternation & node, const alphabet::RankedSymbol & symbolF ) { + return pos ( node.getLeftElement ( ), symbolF ) || pos ( node.getRightElement ( ), symbolF ); +} + +bool GlushkovTraversal::pos ( const rte::FormalRTESubstitution & node, const alphabet::RankedSymbol & symbolF ) { + return pos ( node.getLeftElement ( ), symbolF ) || pos ( node.getRightElement ( ), symbolF ); +} + +bool GlushkovTraversal::pos ( const rte::FormalRTEIteration & node, const alphabet::RankedSymbol & symbolF ) { + return pos ( node.getElement ( ), symbolF ); +} + +bool GlushkovTraversal::pos ( const rte::FormalRTESymbolAlphabet & node, const alphabet::RankedSymbol & symbolF ) { + if ( symbolF == node.getSymbol ( ) ) return true; + + for ( const std::smart_ptr < const rte::FormalRTESymbol > & element : node.getElements ( ) ) + if ( pos ( * element.get ( ), symbolF ) ) + return true; + + return false; +} + +bool GlushkovTraversal::pos ( const rte::FormalRTESymbolSubst & node, const alphabet::RankedSymbol & symbolF ) { + return symbolF == node.getSymbol ( ); +} + +bool GlushkovTraversal::pos ( const rte::FormalRTEEmpty & /* node */, const alphabet::RankedSymbol & /* symbolF */ ) { + return false; +} + +// ---------------------------------------------------------------------------- + +FormalRTE GlushkovTraversal::index ( const rte::FormalRTE & rte ) { + int i = 1; + + return FormalRTE ( index ( rte, rte.getRTE ( ), i ) ); +} + +std::rvalue_ref < FormalRTEElement > GlushkovTraversal::index ( const rte::FormalRTE & rte, const rte::FormalRTEElement & node, int & i ) { + const rte::FormalRTEAlternation * alternation = dynamic_cast < const rte::FormalRTEAlternation * > ( & node ); + const rte::FormalRTESubstitution * substitution = dynamic_cast < const rte::FormalRTESubstitution * > ( & node ); + const rte::FormalRTEIteration * iteration = dynamic_cast < const rte::FormalRTEIteration * > ( & node ); + const rte::FormalRTESymbolAlphabet * symbol = dynamic_cast < const rte::FormalRTESymbolAlphabet * > ( & node ); + const rte::FormalRTESymbolSubst * substSymbol = dynamic_cast < const rte::FormalRTESymbolSubst * > ( & node ); + const rte::FormalRTEEmpty * empty = dynamic_cast < const rte::FormalRTEEmpty * > ( & node ); + + if ( symbol ) { + alphabet::SymbolPairSymbol sps = alphabet::SymbolPairSymbol ( std::make_pair ( alphabet::Symbol ( symbol->getSymbol ( ).getSymbol ( ) ), alphabet::symbolFrom ( i++ ) ) ); + FormalRTESymbolAlphabet * ns = new FormalRTESymbolAlphabet ( alphabet::RankedSymbol ( alphabet::Symbol ( sps ), symbol->getSymbol ( ).getRank ( ) ) ); + + for ( const std::smart_ptr < const rte::FormalRTESymbol > & e : symbol->getElements ( ) ) { + std::rvalue_ref < FormalRTEElement > child = index ( rte, * e.get ( ), i ); + ns->appendElement ( * static_cast < FormalRTESymbol * > ( child->clone ( ) ) ); // FIXME typecast + } + + return std::rvalue_ref < FormalRTESymbolAlphabet > ( ns ); + } else if ( substSymbol ) { + return std::rvalue_ref < FormalRTEElement > ( new FormalRTESymbolSubst ( * substSymbol ) ); + } else if ( alternation ) { + return std::rvalue_ref < FormalRTEElement > ( new FormalRTEAlternation ( index ( rte, alternation->getLeftElement ( ), i ), index ( rte, alternation->getRightElement ( ), i ) ) ); + } else if ( substitution ) { + return std::rvalue_ref < FormalRTEElement > ( new FormalRTESubstitution ( index ( rte, substitution->getLeftElement ( ), i ), index ( rte, substitution->getRightElement ( ), i ), substitution->getSubstitutionSymbol ( ) ) ); + } else if ( iteration ) { + return std::rvalue_ref < FormalRTEElement > ( new FormalRTEIteration ( index ( rte, iteration->getElement ( ), i ), iteration->getSubstitutionSymbol ( ) ) ); + } else if ( empty ) { + return std::rvalue_ref < FormalRTEElement > ( new FormalRTEEmpty ( ) ); + } else { + throw exception::CommonException ( "GlushkovTraversal::index() - unknown rteElement node" ); + } +} + +} /* namespace rte */ diff --git a/alib2algo/src/rte/glushkov/GlushkovTraversal.h b/alib2algo/src/rte/glushkov/GlushkovTraversal.h new file mode 100644 index 0000000000000000000000000000000000000000..7327fd0dd08423356c88fe5fc3e7bfe6b3c29218 --- /dev/null +++ b/alib2algo/src/rte/glushkov/GlushkovTraversal.h @@ -0,0 +1,87 @@ +/* + * GlushkovTraversal.h + * + * Created on: 14. 4. 2016 + * Author: Tomas Pecka + */ + +#ifndef RTE_GLUSHKOVTRAVERSAL_H_ +#define RTE_GLUSHKOVTRAVERSAL_H_ + +#include <algorithm> +#include <map> +#include <set> +#include <vector> + +#include <rte/formal/FormalRTE.h> +#include <rte/formal/FormalRTEElements.h> + +#include <exception/CommonException.h> + +#include <alphabet/RankedSymbol.h> + +namespace rte { + +class GlushkovTraversal { +public: + /** + * @param re rte to probe + * @return all rteSymbols which can be root of the tree. + */ + static std::set < alphabet::RankedSymbol > first ( const rte::FormalRTE & re ); + + /** + * @param re rte to probe + * @param symbol FormalRTESymbol for which we need the follow(), i.e., Follow(RTE, symbol) + * @return all symbols that can follow specific symbol in word + */ + static std::set < std::vector < alphabet::RankedSymbol > > follow ( const rte::FormalRTE & re, const alphabet::RankedSymbol & symbol ); + + /** + * @param re rte to index + * @return FormalRTE with indexed elements + */ + static FormalRTE index ( const rte::FormalRTE & re ); + + /** + * @param symbol Glushkov Pair symbol, i.e., SymbolPair of RankedSymbol and integer index + * @return RankedSymbol from the pair on input + */ + static alphabet::RankedSymbol getSymbolFromGlushkovPair ( const alphabet::RankedSymbol & symbol ); + +private: + /** + * @return bool true if symbol pointer is in this subtree + */ + static bool pos ( const alphabet::RankedSymbol & symbol, const rte::FormalRTE & node ); + + static std::rvalue_ref < FormalRTEElement > index ( const rte::FormalRTE & rte, const rte::FormalRTEElement & node, int & i ); + + static std::set < alphabet::RankedSymbol > first ( const rte::FormalRTEElement & node ); + static std::set < alphabet::RankedSymbol > first ( const rte::FormalRTEAlternation & node ); + static std::set < alphabet::RankedSymbol > first ( const rte::FormalRTESubstitution & node ); + static std::set < alphabet::RankedSymbol > first ( const rte::FormalRTEIteration & node ); + static std::set < alphabet::RankedSymbol > first ( const rte::FormalRTESymbolAlphabet & node ); + static std::set < alphabet::RankedSymbol > first ( const rte::FormalRTESymbolSubst & node ); + static std::set < alphabet::RankedSymbol > first ( const rte::FormalRTEEmpty & node ); + + static bool pos ( const rte::FormalRTEElement & node, const alphabet::RankedSymbol & symbSearch ); + static bool pos ( const rte::FormalRTEAlternation & node, const alphabet::RankedSymbol & symbSearch ); + static bool pos ( const rte::FormalRTESubstitution & node, const alphabet::RankedSymbol & symbSearch ); + static bool pos ( const rte::FormalRTEIteration & node, const alphabet::RankedSymbol & symbSearch ); + static bool pos ( const rte::FormalRTESymbolAlphabet & node, const alphabet::RankedSymbol & symbSearch ); + static bool pos ( const rte::FormalRTESymbolSubst & node, const alphabet::RankedSymbol & symbSearch ); + static bool pos ( const rte::FormalRTEEmpty & node, const alphabet::RankedSymbol & symbSearch ); + + static std::set < std::vector < alphabet::RankedSymbol > > follow ( const rte::FormalRTEElement & node, const alphabet::RankedSymbol & symbol, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subM ); + static std::set < std::vector < alphabet::RankedSymbol > > follow ( const rte::FormalRTEAlternation & node, const alphabet::RankedSymbol & symbol, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subM ); + static std::set < std::vector < alphabet::RankedSymbol > > follow ( const rte::FormalRTESubstitution & node, const alphabet::RankedSymbol & symbol, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subM ); + static std::set < std::vector < alphabet::RankedSymbol > > follow ( const rte::FormalRTEIteration & node, const alphabet::RankedSymbol & symbol, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subM ); + static std::set < std::vector < alphabet::RankedSymbol > > follow ( const rte::FormalRTESymbolAlphabet & node, const alphabet::RankedSymbol & symbol, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subM ); + static std::set < std::vector < alphabet::RankedSymbol > > follow ( const rte::FormalRTESymbolSubst & node, const alphabet::RankedSymbol & symbol, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subM ); + static std::set < std::vector < alphabet::RankedSymbol > > follow ( const rte::FormalRTEEmpty & node, const alphabet::RankedSymbol & symbol, const std::set < alphabet::RankedSymbol > & alphabetK, std::map < alphabet::RankedSymbol, std::set < alphabet::RankedSymbol > > & subM ); +}; + +} /* namespace rte */ + +#endif /* RTE_GLUSHKOVTRAVERSAL_H_ */