Skip to content
Snippets Groups Projects
CompactSuffixAutomatonTerminatingSymbol.h 10.4 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * CompactSuffixAutomatonTerminatingSymbol.h
     *
     *  Created on: Jan 8, 2017
     *      Author: Jan Travnicek
     */
    
    #ifndef COMPACT_SUFFIX_AUTOMATON_TERMINATING_SYMBOL_H_
    #define COMPACT_SUFFIX_AUTOMATON_TERMINATING_SYMBOL_H_
    
    #include <alib/string>
    #include <alib/map>
    #include <alib/iostream>
    #include <sstream>
    
    #include <common/DefaultSymbolType.h>
    
    #include <object/UniqueObject.h>
    #include <object/ObjectBase.h>
    
    #include <sax/FromXMLParserHelper.h>
    #include <core/xmlApi.hpp>
    #include <primitive/xml/Unsigned.h>
    #include <primitive/xml/UnsignedLong.h>
    #include <container/xml/ObjectsVector.h>
    #include <container/xml/ObjectsMap.h>
    
    #include <alphabet/common/SymbolNormalize.h>
    
    #include <automaton/FSM/CompactNFA.h>
    
    namespace indexes {
    
    namespace stringology {
    
    class GeneralAlphabet;
    
    template < class SymbolType = DefaultSymbolType >
    class CompactSuffixAutomatonTerminatingSymbol : public object::ObjectBase {
    	ext::vector < SymbolType > m_string;
    	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > m_delta;
    
    public:
    	/**
    
    	 * @copydoc SuffixTrieNode::clone ( ) const &
    
    	virtual ObjectBase * clone ( ) const &;
    
    	 * @copydoc SuffixTrieNode::clone ( ) const &
    
    	virtual ObjectBase * clone ( ) &&;
    
    
    	void setString(const ext::vector < SymbolType > & str) {
    		m_string = str;
    	}
    
    	const ext::vector < SymbolType > & getString ( ) const & {
    		return m_string;
    	}
    
    	ext::vector < SymbolType > && getString ( ) && {
    		return std::move ( m_string );
    	}
    
    	void setNumberOfVertices(int n) {
    		m_delta.resize(n);
    	}
    
    	void insertVertex(unsigned vertexNumber, const ext::map<ext::pair< int,int>,int> & edges) {
    		for(auto it = edges.begin();it!=edges.end();++it) {
    			m_delta[vertexNumber].insert({{it->first.first - 1,it->first.second - 1},it->second}); // to match indexing the string from 0
    		}
    	}
    
    	const ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > & getTransitions ( ) const & {
    		return m_delta;
    	}
    
    	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > && getTransitions ( ) && {
    		return std::move ( m_delta );
    	}
    
    	bool addTransition ( unsigned from, size_t startIndex, size_t endIndex, unsigned to ) {
    		return m_delta [ from ].insert ( ext::make_pair ( ext::make_pair ( startIndex, endIndex ), to ) ).second;
    	}
    
    	void setTransitions ( ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > delta ) {
    		m_delta = std::move ( delta );
    	}
    
    /*	void print() {
    		cout << "PRINT" << endl;
    		cout << m_delta.size() << endl;
    		for(int i = 0;i<m_delta.size();i++) {
    			cout << "Vrchol " << i << endl;
    			for(auto it = m_delta[i].begin(); it != m_delta[i].end(); ++it) {
    				cout << it->first.first << "  " << it->first.second << endl;
    				cout << "	   -- ";
    				for(int j = it->first.first;j<=it->first.second;j++) {
    					cout << m_string[j];
    				}
    				cout << " --> " << it->second << endl;
    			}
    		}
    	}*/
    
    	void GetAllPathsLen(unsigned state, unsigned curLen, ext::set < unsigned > & res) const {
    		if(m_delta[state].size() == 0)
    			res.insert(m_string.size ( ) - curLen);
    
    		for(auto it = m_delta[state].begin();it!=m_delta[state].end();++it) {
    			GetAllPathsLen(it->second,curLen+it->first.second-it->first.first+1,res);
    		}
    	}
    
    	unsigned GetNextState(unsigned state,const ext::vector <SymbolType > & pattern, unsigned & index) const {
    		for(auto it = m_delta[state].begin();it!=m_delta[state].end();++it) {
    			if( m_string[it->first.first] != pattern[index])//hledám hranu, která má první znak stejný
    				continue;
    
    			for(unsigned i = it->first.first;i<=it->first.second;i++) { //chci projít všechny znaky, jestli se shodují se vzorem
    				if(index == pattern.size()) {
    					index += (it->first.second-i+1);
    					return it->second;
    				}
    
    				if(m_string[i] != pattern[index++]) { //pokud se neshodují - konec
    					throw "notfound";
    				}
    			}
    			return it->second;
    		}
    
    		throw "notfound";
    	}
    
    	/**
    	 * Prints XML representation of the tree to the output stream.
    	 * @param out output stream to which print the tree
    	 * @param tree tree to print
    	 */
    	virtual void operator >>( std::ostream & out ) const;
    
    	virtual int compare ( const ObjectBase & other ) const {
    		if ( ext::type_index ( typeid ( * this ) ) == ext::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other );
    
    		return ext::type_index ( typeid ( * this ) ) - ext::type_index ( typeid ( other ) );
    	}
    
    	virtual int compare ( const CompactSuffixAutomatonTerminatingSymbol & other ) const;
    
    	virtual explicit operator std::string ( ) const;
    
    	explicit operator automaton::CompactNFA < SymbolType, unsigned > ( ) const;
    
    	virtual object::ObjectBase * inc ( ) &&;
    
    	typedef CompactSuffixAutomatonTerminatingSymbol < > normalized_type;
    
    };
    
    template < class SymbolType >
    
    object::ObjectBase * CompactSuffixAutomatonTerminatingSymbol < SymbolType >::clone ( ) const & {
    
    	return new CompactSuffixAutomatonTerminatingSymbol ( * this );
    }
    
    template < class SymbolType >
    
    object::ObjectBase * CompactSuffixAutomatonTerminatingSymbol < SymbolType >::clone ( ) && {
    
    	return new CompactSuffixAutomatonTerminatingSymbol ( std::move ( * this ) );
    }
    
    template < class SymbolType >
    void CompactSuffixAutomatonTerminatingSymbol < SymbolType >::operator >>( std::ostream & out ) const {
    	out << "(CompactSuffixAutomatonTerminatingSymbol " << this->m_string << ", " << this->m_delta << ")";
    }
    
    template < class SymbolType >
    int CompactSuffixAutomatonTerminatingSymbol < SymbolType >::compare ( const CompactSuffixAutomatonTerminatingSymbol & other ) const {
    	auto first = ext::tie ( getString ( ), getTransitions ( ) );
    	auto second = ext::tie ( other.getString ( ), other.getTransitions ( ) );
    
    	static ext::compare < decltype ( first ) > comp;
    
    	return comp ( first, second );
    }
    
    template < class SymbolType >
    CompactSuffixAutomatonTerminatingSymbol < SymbolType >::operator std::string ( ) const {
    	std::stringstream ss;
    	ss << * this;
    	return ss.str ( );
    }
    
    template < class SymbolType >
    CompactSuffixAutomatonTerminatingSymbol < SymbolType >::operator automaton::CompactNFA < SymbolType, unsigned > ( ) const {
    	automaton::CompactNFA < SymbolType, unsigned > res ( 0 );
    	res.setInputAlphabet ( ext::set < SymbolType > ( getString ( ).begin ( ), getString ( ).end ( ) ) );
    
    	ext::set < unsigned > states;
    	for ( unsigned state = 0; state < getTransitions ( ).size ( ); ++ state )
    		states.insert ( state );
    	res.setStates ( std::move ( states ) );
    
    	res.addFinalState ( 1 );
    
    	for ( unsigned state = 0; state < getTransitions ( ).size ( ); ++ state )
    		for ( const std::pair < const ext::pair < size_t, size_t >, unsigned > & transition : getTransitions ( ) [ state ] )
    			res.addTransition ( state, ext::vector < SymbolType > ( getString ( ).begin ( ) + transition.first.first, getString ( ).begin ( ) + transition.first.second + 1 ), transition.second );
    
    	return res;
    }
    
    template < class SymbolType >
    object::ObjectBase* CompactSuffixAutomatonTerminatingSymbol < SymbolType >::inc() && {
    	return new object::UniqueObject(object::Object(std::move(*this)), primitive::Integer(0));
    }
    
    } /* namespace stringology */
    
    } /* namespace indexes */
    
    namespace core {
    
    template < class SymbolType >
    struct normalize < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType >, typename std::enable_if < ! std::is_same < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType >, indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > >::value >::type > {
    	static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > eval ( indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > && value ) {
    		indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < DefaultStateType > res;
    		res.setString ( alphabet::SymbolNormalize::normalizeSymbols ( std::move ( value ).getString ( ) ) );
    		res.setTransitions ( std::move ( value ).getTransitions ( ) );
    
    		return res;
    	}
    };
    
    template < class SymbolType >
    struct xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > > {
    	static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > parse ( ext::deque < sax::Token >::iterator & input );
    	static bool first ( const ext::deque < sax::Token >::const_iterator & input );
    	static const std::string & xmlTagName ( );
    	static void compose ( ext::deque < sax::Token > & output, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & data );
    };
    
    template < class SymbolType >
    indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::parse ( ext::deque < sax::Token >::iterator & input ) {
    	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) );
    	ext::vector < SymbolType > string = core::xmlApi < ext::vector < SymbolType > >::parse ( input );
    	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > delta = core::xmlApi < ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > >::parse ( input );
    	indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > res;
    	res.setString ( std::move ( string ) );
    	res.setTransitions ( std::move ( delta ) );
    
    	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, xmlTagName ( ) );
    	return res;
    }
    
    template < class SymbolType >
    bool xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::first ( const ext::deque < sax::Token >::const_iterator & input ) {
    	return sax::FromXMLParserHelper::isToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) );
    }
    
    template < class SymbolType >
    const std::string & xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::xmlTagName ( ) {
    	static std::string xmlTagName = "CompactSuffixAutomatonTerminatingSymbol";
    
    	return xmlTagName;
    }
    
    template < class SymbolType >
    void xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::compose ( ext::deque < sax::Token > & output, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & index ) {
    	output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::START_ELEMENT );
    	core::xmlApi < ext::vector < SymbolType > >::compose ( output, index.getString ( ) );
    	core::xmlApi < ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > >::compose ( output, index.getTransitions ( ) );
    	output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::END_ELEMENT );
    }
    
    } /* namespace core */
    
    #endif /* COMPACT_SUFFIX_AUTOMATON_H_ */