Skip to content
Snippets Groups Projects
CompactSuffixAutomatonTerminatingSymbol.h 8.01 KiB
Newer Older
  • Learn to ignore specific revisions
  • #include <ext/iostream>
    
    
    #include <alib/string>
    #include <alib/map>
    
    #include <common/DefaultSymbolType.h>
    
    #include <sax/FromXMLParserHelper.h>
    #include <core/xmlApi.hpp>
    #include <primitive/xml/Unsigned.h>
    #include <primitive/xml/UnsignedLong.h>
    #include <container/xml/ObjectsVector.h>
    #include <container/xml/ObjectsMap.h>
    
    #include <alphabet/common/SymbolNormalize.h>
    
    
    #include <automaton/FSM/CompactDFA.h>
    
    
    namespace indexes {
    
    namespace stringology {
    
    template < class SymbolType = DefaultSymbolType >
    
    class CompactSuffixAutomatonTerminatingSymbol {
    
    	ext::vector < SymbolType > m_string;
    	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > m_delta;
    
    public:
    	void setString(const ext::vector < SymbolType > & str) {
    		m_string = str;
    	}
    
    	const ext::vector < SymbolType > & getString ( ) const & {
    		return m_string;
    	}
    
    	ext::vector < SymbolType > && getString ( ) && {
    		return std::move ( m_string );
    	}
    
    	void setNumberOfVertices(int n) {
    		m_delta.resize(n);
    	}
    
    	void insertVertex(unsigned vertexNumber, const ext::map<ext::pair< int,int>,int> & edges) {
    
    		for(const auto & edge : edges ) {
    			m_delta[vertexNumber].insert({{edge.first.first - 1, edge.first.second - 1}, edge.second}); // to match indexing the string from 0
    
    		}
    	}
    
    	const ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > & getTransitions ( ) const & {
    		return m_delta;
    	}
    
    	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > && getTransitions ( ) && {
    		return std::move ( m_delta );
    	}
    
    	bool addTransition ( unsigned from, size_t startIndex, size_t endIndex, unsigned to ) {
    		return m_delta [ from ].insert ( ext::make_pair ( ext::make_pair ( startIndex, endIndex ), to ) ).second;
    	}
    
    	void setTransitions ( ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > delta ) {
    		m_delta = std::move ( delta );
    	}
    
    /*	void print() {
    		cout << "PRINT" << endl;
    		cout << m_delta.size() << endl;
    		for(int i = 0;i<m_delta.size();i++) {
    			cout << "Vrchol " << i << endl;
    			for(auto it = m_delta[i].begin(); it != m_delta[i].end(); ++it) {
    				cout << it->first.first << "  " << it->first.second << endl;
    				cout << "	   -- ";
    				for(int j = it->first.first;j<=it->first.second;j++) {
    					cout << m_string[j];
    				}
    				cout << " --> " << it->second << endl;
    			}
    		}
    	}*/
    
    	void GetAllPathsLen(unsigned state, unsigned curLen, ext::set < unsigned > & res) const {
    
    		if(m_delta[state].empty ( ))
    
    			res.insert(m_string.size ( ) - curLen);
    
    
    		for(const auto & it : m_delta[state]) {
    			GetAllPathsLen(it.second,curLen+it.first.second-it.first.first+1,res);
    
    		}
    	}
    
    	unsigned GetNextState(unsigned state,const ext::vector <SymbolType > & pattern, unsigned & index) const {
    
    		for(const auto & it : m_delta[state]) {
    			if( m_string[it.first.first] != pattern[index])//hledám hranu, která má první znak stejný
    
    			for(unsigned i = it.first.first;i<=it.first.second;i++) { //chci projít všechny znaky, jestli se shodují se vzorem
    
    				if(index == pattern.size()) {
    
    					index += (it.first.second-i+1);
    					return it.second;
    
    				}
    
    				if(m_string[i] != pattern[index++]) { //pokud se neshodují - konec
    					throw "notfound";
    				}
    			}
    
    	friend ext::ostream & operator << ( ext::ostream & out, const CompactSuffixAutomatonTerminatingSymbol & instance ) {
    
    		return out << "(CompactSuffixAutomatonTerminatingSymbol " << instance.m_string << ", " << instance.m_delta << ")";
    	}
    
    	auto operator <=> ( const CompactSuffixAutomatonTerminatingSymbol & other ) const {
    		return std::tie ( getString ( ), getTransitions ( ) ) <=> std::tie ( other.getString ( ), other.getTransitions ( ) );
    	}
    
    	bool operator == ( const CompactSuffixAutomatonTerminatingSymbol & other ) const {
    		return std::tie ( getString ( ), getTransitions ( ) ) == std::tie ( other.getString ( ), other.getTransitions ( ) );
    	}
    
    	explicit operator automaton::CompactDFA < SymbolType, unsigned > ( ) const;
    
    };
    
    template < class SymbolType >
    
    CompactSuffixAutomatonTerminatingSymbol < SymbolType >::operator automaton::CompactDFA < SymbolType, unsigned > ( ) const {
    	automaton::CompactDFA < SymbolType, unsigned > res ( 0 );
    
    	res.setInputAlphabet ( ext::set < SymbolType > ( getString ( ).begin ( ), getString ( ).end ( ) ) );
    
    	ext::set < unsigned > states;
    
    Jan Trávníček's avatar
    Jan Trávníček committed
    	for ( size_t state = 0; state < getTransitions ( ).size ( ); ++ state )
    
    		states.insert ( state );
    	res.setStates ( std::move ( states ) );
    
    	res.addFinalState ( 1 );
    
    
    Jan Trávníček's avatar
    Jan Trávníček committed
    	for ( size_t state = 0; state < getTransitions ( ).size ( ); ++ state )
    
    		for ( const std::pair < const ext::pair < size_t, size_t >, unsigned > & transition : getTransitions ( ) [ state ] )
    			res.addTransition ( state, ext::vector < SymbolType > ( getString ( ).begin ( ) + transition.first.first, getString ( ).begin ( ) + transition.first.second + 1 ), transition.second );
    
    	return res;
    }
    
    } /* namespace stringology */
    
    } /* namespace indexes */
    
    namespace core {
    
    template < class SymbolType >
    
    struct normalize < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > > {
    
    	static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > eval ( indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > && value ) {
    		indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < DefaultStateType > res;
    		res.setString ( alphabet::SymbolNormalize::normalizeSymbols ( std::move ( value ).getString ( ) ) );
    		res.setTransitions ( std::move ( value ).getTransitions ( ) );
    
    		return res;
    	}
    };
    
    template < class SymbolType >
    struct xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > > {
    	static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > parse ( ext::deque < sax::Token >::iterator & input );
    	static bool first ( const ext::deque < sax::Token >::const_iterator & input );
    
    	static std::string xmlTagName ( );
    
    	static void compose ( ext::deque < sax::Token > & output, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & index );
    
    };
    
    template < class SymbolType >
    indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::parse ( ext::deque < sax::Token >::iterator & input ) {
    	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) );
    	ext::vector < SymbolType > string = core::xmlApi < ext::vector < SymbolType > >::parse ( input );
    	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > delta = core::xmlApi < ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > >::parse ( input );
    	indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > res;
    	res.setString ( std::move ( string ) );
    	res.setTransitions ( std::move ( delta ) );
    
    	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, xmlTagName ( ) );
    	return res;
    }
    
    template < class SymbolType >
    bool xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::first ( const ext::deque < sax::Token >::const_iterator & input ) {
    	return sax::FromXMLParserHelper::isToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) );
    }
    
    template < class SymbolType >
    
    std::string xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::xmlTagName ( ) {
    	return "CompactSuffixAutomatonTerminatingSymbol";
    
    }
    
    template < class SymbolType >
    void xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::compose ( ext::deque < sax::Token > & output, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & index ) {
    	output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::START_ELEMENT );
    	core::xmlApi < ext::vector < SymbolType > >::compose ( output, index.getString ( ) );
    	core::xmlApi < ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > >::compose ( output, index.getTransitions ( ) );
    	output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::END_ELEMENT );
    }
    
    } /* namespace core */