Skip to content
Snippets Groups Projects
CompactSuffixAutomatonTerminatingSymbol.h 8.01 KiB
Newer Older
#include <ext/iostream>

#include <alib/string>
#include <alib/map>

#include <common/DefaultSymbolType.h>

#include <sax/FromXMLParserHelper.h>
#include <core/xmlApi.hpp>
#include <primitive/xml/Unsigned.h>
#include <primitive/xml/UnsignedLong.h>
#include <container/xml/ObjectsVector.h>
#include <container/xml/ObjectsMap.h>

#include <alphabet/common/SymbolNormalize.h>

#include <automaton/FSM/CompactDFA.h>

namespace indexes {

namespace stringology {

template < class SymbolType = DefaultSymbolType >
class CompactSuffixAutomatonTerminatingSymbol {
	ext::vector < SymbolType > m_string;
	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > m_delta;

public:
	void setString(const ext::vector < SymbolType > & str) {
		m_string = str;
	}

	const ext::vector < SymbolType > & getString ( ) const & {
		return m_string;
	}

	ext::vector < SymbolType > && getString ( ) && {
		return std::move ( m_string );
	}

	void setNumberOfVertices(size_t n) {
		m_delta.resize(n);
	}

	void insertVertex(unsigned vertexNumber, const ext::map<ext::pair< int,int>,int> & edges) {
		for(const auto & edge : edges ) {
			m_delta[vertexNumber].insert({{edge.first.first - 1, edge.first.second - 1}, edge.second}); // to match indexing the string from 0
		}
	}

	const ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > & getTransitions ( ) const & {
		return m_delta;
	}

	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > && getTransitions ( ) && {
		return std::move ( m_delta );
	}

	bool addTransition ( unsigned from, size_t startIndex, size_t endIndex, unsigned to ) {
		return m_delta [ from ].insert ( ext::make_pair ( ext::make_pair ( startIndex, endIndex ), to ) ).second;
	}

	void setTransitions ( ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > delta ) {
		m_delta = std::move ( delta );
	}

/*	void print() {
		cout << "PRINT" << endl;
		cout << m_delta.size() << endl;
		for(int i = 0;i<m_delta.size();i++) {
			cout << "Vrchol " << i << endl;
			for(auto it = m_delta[i].begin(); it != m_delta[i].end(); ++it) {
				cout << it->first.first << "  " << it->first.second << endl;
				cout << "	   -- ";
				for(int j = it->first.first;j<=it->first.second;j++) {
					cout << m_string[j];
				}
				cout << " --> " << it->second << endl;
			}
		}
	}*/

	void GetAllPathsLen(unsigned state, unsigned curLen, ext::set < unsigned > & res) const {
		if(m_delta[state].empty ( ))
			res.insert(m_string.size ( ) - curLen);

		for(const auto & it : m_delta[state]) {
			GetAllPathsLen(it.second,curLen+it.first.second-it.first.first+1,res);
		}
	}

	unsigned GetNextState(unsigned state,const ext::vector <SymbolType > & pattern, unsigned & index) const {
		for(const auto & it : m_delta[state]) {
			if( m_string[it.first.first] != pattern[index])//hledám hranu, která má první znak stejný
			for(unsigned i = it.first.first;i<=it.first.second;i++) { //chci projít všechny znaky, jestli se shodují se vzorem
				if(index == pattern.size()) {
					index += (it.first.second-i+1);
					return it.second;
				}

				if(m_string[i] != pattern[index++]) { //pokud se neshodují - konec
					throw "notfound";
				}
			}
	friend ext::ostream & operator << ( ext::ostream & out, const CompactSuffixAutomatonTerminatingSymbol & instance ) {
		return out << "(CompactSuffixAutomatonTerminatingSymbol " << instance.m_string << ", " << instance.m_delta << ")";
	}
	auto operator <=> ( const CompactSuffixAutomatonTerminatingSymbol & other ) const {
		return std::tie ( getString ( ), getTransitions ( ) ) <=> std::tie ( other.getString ( ), other.getTransitions ( ) );
	}

	bool operator == ( const CompactSuffixAutomatonTerminatingSymbol & other ) const {
		return std::tie ( getString ( ), getTransitions ( ) ) == std::tie ( other.getString ( ), other.getTransitions ( ) );
	}
	explicit operator automaton::CompactDFA < SymbolType, unsigned > ( ) const;
};

template < class SymbolType >
CompactSuffixAutomatonTerminatingSymbol < SymbolType >::operator automaton::CompactDFA < SymbolType, unsigned > ( ) const {
	automaton::CompactDFA < SymbolType, unsigned > res ( 0 );
	res.setInputAlphabet ( ext::set < SymbolType > ( getString ( ).begin ( ), getString ( ).end ( ) ) );

	ext::set < unsigned > states;
Jan Trávníček's avatar
Jan Trávníček committed
	for ( size_t state = 0; state < getTransitions ( ).size ( ); ++ state )
		states.insert ( state );
	res.setStates ( std::move ( states ) );

	res.addFinalState ( 1 );

Jan Trávníček's avatar
Jan Trávníček committed
	for ( size_t state = 0; state < getTransitions ( ).size ( ); ++ state )
		for ( const std::pair < const ext::pair < size_t, size_t >, unsigned > & transition : getTransitions ( ) [ state ] )
			res.addTransition ( state, ext::vector < SymbolType > ( getString ( ).begin ( ) + transition.first.first, getString ( ).begin ( ) + transition.first.second + 1 ), transition.second );

	return res;
}

} /* namespace stringology */

} /* namespace indexes */

namespace core {

template < class SymbolType >
struct normalize < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > > {
	static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > eval ( indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > && value ) {
		indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < DefaultStateType > res;
		res.setString ( alphabet::SymbolNormalize::normalizeSymbols ( std::move ( value ).getString ( ) ) );
		res.setTransitions ( std::move ( value ).getTransitions ( ) );

		return res;
	}
};

template < class SymbolType >
struct xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > > {
	static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > parse ( ext::deque < sax::Token >::iterator & input );
	static bool first ( const ext::deque < sax::Token >::const_iterator & input );
	static std::string xmlTagName ( );
	static void compose ( ext::deque < sax::Token > & output, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & index );
};

template < class SymbolType >
indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::parse ( ext::deque < sax::Token >::iterator & input ) {
	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) );
	ext::vector < SymbolType > string = core::xmlApi < ext::vector < SymbolType > >::parse ( input );
	ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > delta = core::xmlApi < ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > >::parse ( input );
	indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > res;
	res.setString ( std::move ( string ) );
	res.setTransitions ( std::move ( delta ) );

	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, xmlTagName ( ) );
	return res;
}

template < class SymbolType >
bool xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::first ( const ext::deque < sax::Token >::const_iterator & input ) {
	return sax::FromXMLParserHelper::isToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) );
}

template < class SymbolType >
std::string xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::xmlTagName ( ) {
	return "CompactSuffixAutomatonTerminatingSymbol";
}

template < class SymbolType >
void xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::compose ( ext::deque < sax::Token > & output, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & index ) {
	output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::START_ELEMENT );
	core::xmlApi < ext::vector < SymbolType > >::compose ( output, index.getString ( ) );
	core::xmlApi < ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > >::compose ( output, index.getTransitions ( ) );
	output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::END_ELEMENT );
}

} /* namespace core */