diff --git a/alib2algo_experimental/src/stringology/indexing/ExperimentalCompactSuffixAutomatonConstruct.cpp b/alib2algo_experimental/src/stringology/indexing/ExperimentalCompactSuffixAutomatonConstruct.cpp new file mode 100644 index 0000000000000000000000000000000000000000..53fd08eff6bd153d955c7747fe67ac22d840253d --- /dev/null +++ b/alib2algo_experimental/src/stringology/indexing/ExperimentalCompactSuffixAutomatonConstruct.cpp @@ -0,0 +1,7 @@ +#include "ExperimentalCompactSuffixAutomatonConstruct.h" +#include <registration/AlgoRegistration.hpp> + +auto CompactSuffixAutomatonTerminatingSymbolLinearStringTerminatingSymbol = registration::AbstractRegister < stringology::indexing::ExperimentalCompactSuffixAutomatonConstruct, indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < >, const string::LinearStringTerminatingSymbol & > ( stringology::indexing::ExperimentalCompactSuffixAutomatonConstruct::construct ); + +auto CompactSuffixAutomatonTerminatingSymbolLinearString = registration::AbstractRegister < stringology::indexing::ExperimentalCompactSuffixAutomatonConstruct, indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < >, const string::LinearString < > & > ( stringology::indexing::ExperimentalCompactSuffixAutomatonConstruct::construct ); + diff --git a/alib2algo_experimental/src/stringology/indexing/ExperimentalCompactSuffixAutomatonConstruct.h b/alib2algo_experimental/src/stringology/indexing/ExperimentalCompactSuffixAutomatonConstruct.h new file mode 100644 index 0000000000000000000000000000000000000000..9a14360d81dbdbd10b9ac4f74e5e5efad70fb6e2 --- /dev/null +++ b/alib2algo_experimental/src/stringology/indexing/ExperimentalCompactSuffixAutomatonConstruct.h @@ -0,0 +1,335 @@ +#include <indexes/stringology/CompactSuffixAutomatonTerminatingSymbol.h> +#include <string/LinearStringTerminatingSymbol.h> +#include <alphabet/EndSymbol.h> +#include <string/LinearString.h> + +namespace stringology { + +namespace indexing { + +class ExperimentalCompactSuffixAutomatonConstruct { + + template <class SymbolType> + class CompactSuffixAutomatonConstructInt { + int nil; + int newVertexNumber;//hodnota pouĹľitá pĹ™i vytvoĹ™enĂ novĂ©ho vrcholu + int e; + int T;//T == â”´ + int source; + int sink; + int m; //velikost abecedy + ext::map<int,SymbolType> w; //slovo, kde na indexech -m aĹľ -1 je abeceda + int wLen;//dĂ©lka slova w + SymbolType endChar;//znak, kterĂ˝m konÄŤĂ Ĺ™etÄ›zec - unikátnĂ v celĂ©m Ĺ™etÄ›zci. V ÄŤlánku $ + + // vrchol popis-hrany koncovĂ˝-vrchol + // v v v + ext::map<int,ext::map<ext::pair<int,int>,int>> edges; + ext::map<int,int> suf; + ext::map<int,int> length; + + int createNode() { + edges.insert({newVertexNumber,ext::map<ext::pair<int,int>,int>()}); + return newVertexNumber++; + } + + void createEdge(int fromVertex, int first, int second, int toVertex) { + edges.at(fromVertex).insert({{first, second},toVertex}); + } + + //hledánĂ w[k]-edge. PouĹľito na nÄ›kolika mĂstech v algoritmu. + //char c je w[k] + void wkEdge(int s, SymbolType c, int & sc, int & kc, int & pc) { + for ( const std::pair < const ext::pair < int, int >, int > edge : edges.at ( s ) ) { + if ( w.at ( edge.first.first ) == c) { + sc = edge.second; + kc = edge.first.first; + pc = edge.first.second; + return; + } + + } + throw "neniHrana"; + } + + //pomocná funkce, která reprezentuje řádek 3 ve funkci split_edge() + void replaceTheEdgeBy(int s, int k, int p, int sc, int kc, int pc, int r) { + edges.at(s).erase({kc,pc}); + + createEdge(s,kc,kc+p-k,r); + createEdge(r,kc+p-k+1,pc,sc); + } + + //pomocná funkce ve funkci redirect_edge řádek 2 + void replaceTheEdgeByEdge(int s, int k, int p, int kc, int pc, int r) { + edges.at(s).erase({kc,pc}); + + createEdge(s,kc,kc+p-k,r); + } + + //pomocná funkce ve funkci separate_node řádek 9 + void replaceThewkEdge(int s, int k, int p, int rc) { + int kcc,scc,pcc; + wkEdge(s,w.at(k),scc,kcc,pcc); + + edges.at(s).erase({kcc,pcc}); + createEdge(s,k,p,rc); + } + + //pomocná funkce, která reprezentuje řádek 4 ve funkci check_end_point + bool thereIsACedgeFromS(int s, SymbolType c) { + //procházĂm hrany vrcholu s a hledám, jestli se prvnĂ znak hrany rovná c + for ( const std::pair < const ext::pair < int, int >, int > & edge : edges.at ( s ) ) + if ( w.at ( edge.first.first ) == c ) + return true; + + return false; + } + + //tvořà kopii vrcholu. (PĹ™ekopĂruje hrany) + int duplicationOf(int sc) { + int rc = createNode(); + + for ( const std::pair < const ext::pair < int, int >, int > & edge : edges.at ( sc ) ) + createEdge ( rc, edge.first.first, edge.first.second, edge.second ); + + return rc; + } + + //konec pomocnĂ˝ch funkcĂ. Dále je co nejpĹ™esnÄ›jšà pĹ™epis pseudokodu + + bool check_end_point(int s,int k,int p,SymbolType c) { + if(k<= p) { + int sc,kc,pc;//s' k' p' + wkEdge(s,w.at(k),sc,kc,pc); + return (c == w.at(kc+p-k+1)); + } + return thereIsACedgeFromS(s,c); + } + + int split_edge(int s,int k,int p) { + int sc,kc,pc;//s' k' p' + wkEdge(s,w.at(k),sc,kc,pc); + + int r = createNode(); + replaceTheEdgeBy(s,k,p,sc,kc,pc,r); + + int len = length.at(s); + if(len == INT_MAX) + len = e; + + length[r] = len + (p-k+1); + return r; + } + + ext::pair<int,int> canonize(int s,int k,int p) { //p se mĹŻĹľe rovnat e + if(k>p) + return ext::make_pair ( s, k ); + + int sc,kc,pc; + wkEdge(s,w.at(k),sc,kc,pc); + + if(pc == INT_MAX) + pc = e; + + while(pc-kc <= p-k) { + k = k+pc-kc+1; + s = sc; + if(k<=p) { + wkEdge(s,w.at(k),sc,kc,pc); + if(pc == INT_MAX) + pc = e; + } + } + return ext::make_pair ( s, k ); + } + + void redirect_edge(int s, int k, int p, int r) { + int sc,kc,pc; + wkEdge(s,w.at(k),sc,kc,pc); + + replaceTheEdgeByEdge(s,k,p,kc,pc,r); + } + + int extension(int s, int k, int p) { + if(k > p) + return s; + + int kc,pc,sc; + wkEdge(s,w.at(k),sc,kc,pc); + return sc; + } + + ext::pair<int,int> separate_node(int s,int k,int p) { //p==e + int sc, kc; + std::tie ( sc, kc ) = canonize(s,k,p); + if ( kc <= p) + return ext::make_pair ( sc, kc ); + + int len = length.at(s); + if(len == INT_MAX) + len = e; + + int lensc = length.at(sc); + if(lensc == INT_MAX) + lensc = e; + + if(lensc == len+(p-k+1)) + return ext::make_pair ( sc, kc ); + + int rc = duplicationOf(sc); + suf[rc] = suf.at(sc); suf[sc] = rc; + length[rc] = len +(p-k+1); + do { + replaceThewkEdge(s,k,p,rc); + + std::tie ( s, k ) = canonize(suf.at(s),k,p-1); + } while ( ext::make_pair ( sc, kc ) == canonize(s,k,p)); + + return ext::make_pair ( rc, p+1 ); + } + + ext::pair<int,int> update(int s,int k,int p) { + SymbolType c = w.at(p); + int oldr = nil; + + int sc = nil; // v ÄŤlánku nenĂ s' inicializovanĂ©, ale je potĹ™eba to na nÄ›co inicializovat, protoĹľe nĂĹľe docházĂ k porovnávnĂ + int r; + while(!check_end_point(s,k,p-1,c)) { + if(k<= p-1) { + if(sc == extension(s,k,p)) { + redirect_edge(s,k,p-1,r); + std::tie ( s, k ) = canonize(suf.at(s),k,p-1); + continue; + } else { + sc = extension(s,k,p); + r = split_edge(s,k,p-1); + } + } else + r = s; + + createEdge(r,p,INT_MAX,sink); //p == e vĹľdy. p je nahrazeno INT_MAX + + if(oldr != nil) + suf[oldr] = r; + oldr = r; + + std::tie ( s, k ) = canonize(suf.at(s),k,p-1); + } + + if(oldr != nil) + suf[oldr] = s; + return separate_node(s,k,p); + } + + public: + CompactSuffixAutomatonConstructInt ( const /*string::LinearString*/ext::vector < SymbolType > & subject ) : wLen ( subject.size ( ) ), endChar ( subject.back ( ) ) { + nil = INT_MIN; + newVertexNumber = -1; + ext::set < SymbolType > alphabet ( subject.begin ( ), subject.end ( ) ); + + for ( int i = 0; i < wLen; ++ i ) + w.insert ( std::make_pair ( i + 1, subject [ i ] ) ); + + m = alphabet.size(); + + int i = -1; + for ( const SymbolType & symbol : alphabet )//abecedu je potĹ™eba vyskládat na zápornĂ© indexy od -1 do -m + w.insert ( std::make_pair ( i --, symbol ) ); + } + + void startConstruction() { + T = createNode(); + source = createNode(); + sink = createNode(); + for(int j = 1;j<=m;j++) + createEdge(T,-j,-j,source); + suf[source] = T; + length[source] = 0; length[T] = -1; + e = 0; length[sink] = INT_MAX; + + int s = source; + int k = 1; + int i = 0; + do { + i = i+1; e = i; + std::tie ( s, k ) = update ( s, k, i ); + //print ( i ); + } while (w.at(i) != endChar); + } + + void print ( int i ) const { + std::cout << "Krok " << i << std::endl; + for(int pi = 0;pi<newVertexNumber;pi++) { + std::cout << "Vrchol " << pi << std::endl; + for(ext::map<ext::pair<int,int>,int>::const_iterator it = edges.at(pi).begin(); it != edges.at(pi).end(); ++it) { + std::cout << it->first.first << " " << it->first.second; + std::cout << " -- "; + for(int j = it->first.first;j<=it->first.second;j++) { + if(j>i) break; + std::cout << w.find(j)->second; + } + std::cout << " --> " << it->second << std::endl; + } + } + + std::cout << "-------------" << std::endl; + + std::cout << "Delky" << std::endl; + for(int pi = 0;pi<newVertexNumber;pi++) + std::cout << "Vrchol " << pi << " " << length.find(pi)->second << std::endl; + + std::cout << "Suffix links" << std::endl; + for(int pi = 0;pi<newVertexNumber;pi++) + std::cout << "Vrchol " << pi << " " << suf.find(pi)->second << std::endl; + } + + void changeIntMaxToE() { + for(auto it = edges.begin();it!=edges.end();++it) + for(auto it2 = it->second.begin();it2!=it->second.end();++it2) + if(it2->first.second == INT_MAX) { + int k = it2->first.first; + int r = it2->second; + + it->second.erase(it2); + it->second.insert({{k,e},r}); + it2 = it->second.begin(); + } + } + + const ext::map<int,ext::map<ext::pair<int,int>,int>> & getEdges ( ) const { + return edges; + } + }; + +public: + static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < DefaultSymbolType > construct ( const string::LinearStringTerminatingSymbol & subject ) { + CompactSuffixAutomatonConstructInt < DefaultSymbolType > algo (subject.getContent ( ) ); + algo.startConstruction(); + algo.changeIntMaxToE(); + //algo.print ( subject.getContent().size ( ) ); + + indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < DefaultSymbolType > res; + res.setString(subject.getContent ( ) ); + res.setNumberOfVertices(algo.getEdges().size()-1); + + for(auto it = algo.getEdges().begin();it!=algo.getEdges().end();++it) + if(it->first != -1) + res.insertVertex(it->first,it->second); + + return res; + } + + static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < DefaultSymbolType > construct ( const string::LinearString < DefaultSymbolType > & subject ) { + DefaultSymbolType endSymbol = common::createUnique ( alphabet::EndSymbol::instance < DefaultSymbolType > ( ), subject.getAlphabet ( ) ); + ext::vector < DefaultSymbolType > content = subject.getContent ( ); + content.push_back ( endSymbol ); + ext::set < DefaultSymbolType > alphabet = subject.getAlphabet ( ); + alphabet.insert ( endSymbol ); + return construct ( string::LinearStringTerminatingSymbol ( alphabet, endSymbol, content ) ); + } +}; + +} /* namespace indexing */ + +} /* namespace stringology */ diff --git a/alib2algo_experimental/src/stringology/query/CompactSuffixAutomatonFactors.cpp b/alib2algo_experimental/src/stringology/query/CompactSuffixAutomatonFactors.cpp new file mode 100644 index 0000000000000000000000000000000000000000..87b6d366fd4f335907f6b64ef751ca5dceff07a1 --- /dev/null +++ b/alib2algo_experimental/src/stringology/query/CompactSuffixAutomatonFactors.cpp @@ -0,0 +1,4 @@ +#include "CompactSuffixAutomatonFactors.h" +#include <registration/AlgoRegistration.hpp> + +auto CompactSuffixAutomatonQueryLinearString = registration::AbstractRegister < stringology::query::CompactSuffixAutomatonFactors, ext::set < unsigned >, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > &, const string::LinearString < > & > ( stringology::query::CompactSuffixAutomatonFactors::query ); diff --git a/alib2algo_experimental/src/stringology/query/CompactSuffixAutomatonFactors.h b/alib2algo_experimental/src/stringology/query/CompactSuffixAutomatonFactors.h new file mode 100644 index 0000000000000000000000000000000000000000..110950baf198cafcdff49a0cf9a9cd35faf6ec8e --- /dev/null +++ b/alib2algo_experimental/src/stringology/query/CompactSuffixAutomatonFactors.h @@ -0,0 +1,32 @@ +#include <indexes/stringology/CompactSuffixAutomatonTerminatingSymbol.h> +#include <string/LinearString.h> + +namespace stringology { + +namespace query { + +class CompactSuffixAutomatonFactors { +public: + + template < class SymbolType > + static ext::set < unsigned > query ( const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & a, const string::LinearString < SymbolType > & pattern ) { + try { + unsigned curState = 0; + unsigned index = 0; + + while(index < pattern.getContent ().size()) { + curState = a.GetNextState(curState,pattern.getContent(),index); + } + ext::set < unsigned > res; + a.GetAllPathsLen(curState,index,res); + + return res; + } catch ( ... ) { + return { }; + } + } +}; + +} /* namespace query */ + +} /* namespace stringology */ diff --git a/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.cpp b/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ccf61d48df4f7291c0155f8a2ba6f10d5b49ad4a --- /dev/null +++ b/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.cpp @@ -0,0 +1,60 @@ +#include "CompactSuffixAutomatonQueryTest.h" + +#include <string/String.h> +#include <stringology/indexing/ExperimentalCompactSuffixAutomatonConstruct.h> +#include <stringology/query/CompactSuffixAutomatonFactors.h> +#include <stringology/exact/ExactFactorMatch.h> + +#include <string/generate/RandomStringFactory.h> +#include <string/generate/RandomSubstringFactory.h> + +#include <primitive/Character.h> + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( CompactSuffixAutomatonQueryTest, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( CompactSuffixAutomatonQueryTest ); + +void CompactSuffixAutomatonQueryTest::setUp ( ) { +} + +void CompactSuffixAutomatonQueryTest::tearDown ( ) { +} + +void CompactSuffixAutomatonQueryTest::testCDAWG ( ) { + + ext::vector<std::string> subjects; + ext::vector<std::string> patterns; + + subjects.push_back("a"); patterns.push_back("a"); + subjects.push_back("a"); patterns.push_back("b"); + subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfa"); + subjects.push_back("alfalfalfa"); patterns.push_back("blfalfalfa"); + subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfb"); + subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); + subjects.push_back("alfalfalfaalfalfalfaabfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); + subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); + subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); + subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); + + for(size_t i = 0; i < subjects.size(); ++i) { + string::LinearString < > subject = core::normalize < string::LinearString < char > >::eval ( string::LinearString < char > ( subjects[i] ) ); + string::LinearString < > pattern = core::normalize < string::LinearString < char > >::eval ( string::LinearString < char > ( patterns[i] ) ); + + indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > index = stringology::indexing::ExperimentalCompactSuffixAutomatonConstruct::construct ( subject ); + + ext::set < unsigned > res = stringology::query::CompactSuffixAutomatonFactors::query ( index, pattern ); + ext::set < unsigned > ref = stringology::exact::ExactFactorMatch::match ( subject, pattern ); + CPPUNIT_ASSERT ( res == ref ); + + std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl; + } + + auto longSubject = string::generate::RandomStringFactory::generateLinearString (4000, 26, false, true); + auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(2, longSubject ); + + indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > index = stringology::indexing::ExperimentalCompactSuffixAutomatonConstruct::construct ( longSubject ); + + ext::set < unsigned > res = stringology::query::CompactSuffixAutomatonFactors::query ( index, longPattern ); + ext::set < unsigned > ref = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); + std::cout << "long: " << res << std::endl; + CPPUNIT_ASSERT ( res == ref); +} diff --git a/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.h b/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.h new file mode 100644 index 0000000000000000000000000000000000000000..67286b0117a825cd725fe02796622ef8164c0616 --- /dev/null +++ b/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.h @@ -0,0 +1,18 @@ +#ifndef COMPACT_SUFFIX_AUTOMATON_QUERY_TEST_H_ +#define COMPACT_SUFFIX_AUTOMATON_QUERY_TEST_H_ + +#include <cppunit/extensions/HelperMacros.h> + +class CompactSuffixAutomatonQueryTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE ( CompactSuffixAutomatonQueryTest ); + CPPUNIT_TEST ( testCDAWG ); + CPPUNIT_TEST_SUITE_END ( ); + +public: + void setUp ( ); + void tearDown ( ); + + void testCDAWG ( ); +}; + +#endif // COMPACT_SUFFIX_AUTOMATON_QUERY_TEST_H_ diff --git a/alib2data_experimental/src/indexes/stringology/CompactSuffixAutomatonTerminatingSymbol.cpp b/alib2data_experimental/src/indexes/stringology/CompactSuffixAutomatonTerminatingSymbol.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a9b9ab4a9170763e4815727d63a5a31fbcb7b4a6 --- /dev/null +++ b/alib2data_experimental/src/indexes/stringology/CompactSuffixAutomatonTerminatingSymbol.cpp @@ -0,0 +1,25 @@ +/* + * CompactSuffixAutomatonTerminatingSymbol.cpp + * + * Created on: Jan 8, 2017 + * Author: Jan Travnicek + */ + +#include "CompactSuffixAutomatonTerminatingSymbol.h" + +#include <registration/CastRegistration.hpp> +#include <registration/ValuePrinterRegistration.hpp> +#include <registration/XmlRegistration.hpp> + +namespace { + +static auto DFAFromSuffixAutomaton = registration::CastRegister < automaton::CompactNFA < DefaultSymbolType, unsigned >, indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > > ( ); + +static auto valuePrinter = registration::ValuePrinterRegister < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > > ( ); + +static auto xmlWrite = registration::XmlWriterRegister < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > > ( ); +static auto xmlRead = registration::XmlReaderRegister < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > > ( ); + +static auto xmlGroup = registration::XmlRegisterTypeInGroup < object::Object, indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > > ( ); + +} /* namespace */ diff --git a/alib2data_experimental/src/indexes/stringology/CompactSuffixAutomatonTerminatingSymbol.h b/alib2data_experimental/src/indexes/stringology/CompactSuffixAutomatonTerminatingSymbol.h new file mode 100644 index 0000000000000000000000000000000000000000..5e65edd491f5aec81731bda57272cf746c50a19d --- /dev/null +++ b/alib2data_experimental/src/indexes/stringology/CompactSuffixAutomatonTerminatingSymbol.h @@ -0,0 +1,279 @@ +/* + * CompactSuffixAutomatonTerminatingSymbol.h + * + * Created on: Jan 8, 2017 + * Author: Jan Travnicek + */ + +#ifndef COMPACT_SUFFIX_AUTOMATON_TERMINATING_SYMBOL_H_ +#define COMPACT_SUFFIX_AUTOMATON_TERMINATING_SYMBOL_H_ + +#include <alib/string> +#include <alib/map> +#include <alib/iostream> +#include <sstream> + +#include <common/DefaultSymbolType.h> + +#include <object/UniqueObject.h> +#include <object/ObjectBase.h> + +#include <sax/FromXMLParserHelper.h> +#include <core/xmlApi.hpp> +#include <primitive/xml/Unsigned.h> +#include <primitive/xml/UnsignedLong.h> +#include <container/xml/ObjectsVector.h> +#include <container/xml/ObjectsMap.h> + +#include <alphabet/common/SymbolNormalize.h> + +#include <automaton/FSM/CompactNFA.h> + +namespace indexes { + +namespace stringology { + +class GeneralAlphabet; + +template < class SymbolType = DefaultSymbolType > +class CompactSuffixAutomatonTerminatingSymbol : public object::ObjectBase { + ext::vector < SymbolType > m_string; + ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > m_delta; + +public: + /** + * @copydoc SuffixTrieNode::clone() const + */ + virtual ObjectBase * clone ( ) const; + + /** + * @copydoc SuffixTrieNode::plunder() const + */ + virtual ObjectBase * plunder ( ) &&; + + void setString(const ext::vector < SymbolType > & str) { + m_string = str; + } + + const ext::vector < SymbolType > & getString ( ) const & { + return m_string; + } + + ext::vector < SymbolType > && getString ( ) && { + return std::move ( m_string ); + } + + void setNumberOfVertices(int n) { + m_delta.resize(n); + } + + void insertVertex(unsigned vertexNumber, const ext::map<ext::pair< int,int>,int> & edges) { + for(auto it = edges.begin();it!=edges.end();++it) { + m_delta[vertexNumber].insert({{it->first.first - 1,it->first.second - 1},it->second}); // to match indexing the string from 0 + } + } + + const ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > & getTransitions ( ) const & { + return m_delta; + } + + ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > && getTransitions ( ) && { + return std::move ( m_delta ); + } + + bool addTransition ( unsigned from, size_t startIndex, size_t endIndex, unsigned to ) { + return m_delta [ from ].insert ( ext::make_pair ( ext::make_pair ( startIndex, endIndex ), to ) ).second; + } + + void setTransitions ( ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > delta ) { + m_delta = std::move ( delta ); + } + +/* void print() { + cout << "PRINT" << endl; + cout << m_delta.size() << endl; + for(int i = 0;i<m_delta.size();i++) { + cout << "Vrchol " << i << endl; + for(auto it = m_delta[i].begin(); it != m_delta[i].end(); ++it) { + cout << it->first.first << " " << it->first.second << endl; + cout << " -- "; + for(int j = it->first.first;j<=it->first.second;j++) { + cout << m_string[j]; + } + cout << " --> " << it->second << endl; + } + } + }*/ + + void GetAllPathsLen(unsigned state, unsigned curLen, ext::set < unsigned > & res) const { + if(m_delta[state].size() == 0) + res.insert(m_string.size ( ) - curLen); + + for(auto it = m_delta[state].begin();it!=m_delta[state].end();++it) { + GetAllPathsLen(it->second,curLen+it->first.second-it->first.first+1,res); + } + } + + unsigned GetNextState(unsigned state,const ext::vector <SymbolType > & pattern, unsigned & index) const { + for(auto it = m_delta[state].begin();it!=m_delta[state].end();++it) { + if( m_string[it->first.first] != pattern[index])//hledám hranu, která má prvnĂ znak stejnĂ˝ + continue; + + for(unsigned i = it->first.first;i<=it->first.second;i++) { //chci projĂt všechny znaky, jestli se shodujĂ se vzorem + if(index == pattern.size()) { + index += (it->first.second-i+1); + return it->second; + } + + if(m_string[i] != pattern[index++]) { //pokud se neshodujĂ - konec + throw "notfound"; + } + } + return it->second; + } + + throw "notfound"; + } + + /** + * Prints XML representation of the tree to the output stream. + * @param out output stream to which print the tree + * @param tree tree to print + */ + virtual void operator >>( std::ostream & out ) const; + + virtual int compare ( const ObjectBase & other ) const { + if ( ext::type_index ( typeid ( * this ) ) == ext::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); + + return ext::type_index ( typeid ( * this ) ) - ext::type_index ( typeid ( other ) ); + } + + virtual int compare ( const CompactSuffixAutomatonTerminatingSymbol & other ) const; + + virtual explicit operator std::string ( ) const; + + explicit operator automaton::CompactNFA < SymbolType, unsigned > ( ) const; + + virtual object::ObjectBase * inc ( ) &&; + + typedef CompactSuffixAutomatonTerminatingSymbol < > normalized_type; + +}; + +template < class SymbolType > +object::ObjectBase * CompactSuffixAutomatonTerminatingSymbol < SymbolType >::clone ( ) const { + return new CompactSuffixAutomatonTerminatingSymbol ( * this ); +} + +template < class SymbolType > +object::ObjectBase * CompactSuffixAutomatonTerminatingSymbol < SymbolType >::plunder ( ) && { + return new CompactSuffixAutomatonTerminatingSymbol ( std::move ( * this ) ); +} + +template < class SymbolType > +void CompactSuffixAutomatonTerminatingSymbol < SymbolType >::operator >>( std::ostream & out ) const { + out << "(CompactSuffixAutomatonTerminatingSymbol " << this->m_string << ", " << this->m_delta << ")"; +} + +template < class SymbolType > +int CompactSuffixAutomatonTerminatingSymbol < SymbolType >::compare ( const CompactSuffixAutomatonTerminatingSymbol & other ) const { + auto first = ext::tie ( getString ( ), getTransitions ( ) ); + auto second = ext::tie ( other.getString ( ), other.getTransitions ( ) ); + + static ext::compare < decltype ( first ) > comp; + + return comp ( first, second ); +} + +template < class SymbolType > +CompactSuffixAutomatonTerminatingSymbol < SymbolType >::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +template < class SymbolType > +CompactSuffixAutomatonTerminatingSymbol < SymbolType >::operator automaton::CompactNFA < SymbolType, unsigned > ( ) const { + automaton::CompactNFA < SymbolType, unsigned > res ( 0 ); + res.setInputAlphabet ( ext::set < SymbolType > ( getString ( ).begin ( ), getString ( ).end ( ) ) ); + + ext::set < unsigned > states; + for ( unsigned state = 0; state < getTransitions ( ).size ( ); ++ state ) + states.insert ( state ); + res.setStates ( std::move ( states ) ); + + res.addFinalState ( 1 ); + + for ( unsigned state = 0; state < getTransitions ( ).size ( ); ++ state ) + for ( const std::pair < const ext::pair < size_t, size_t >, unsigned > & transition : getTransitions ( ) [ state ] ) + res.addTransition ( state, ext::vector < SymbolType > ( getString ( ).begin ( ) + transition.first.first, getString ( ).begin ( ) + transition.first.second + 1 ), transition.second ); + + return res; +} + +template < class SymbolType > +object::ObjectBase* CompactSuffixAutomatonTerminatingSymbol < SymbolType >::inc() && { + return new object::UniqueObject(object::Object(std::move(*this)), primitive::Integer(0)); +} + +} /* namespace stringology */ + +} /* namespace indexes */ + +namespace core { + +template < class SymbolType > +struct normalize < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType >, typename std::enable_if < ! std::is_same < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType >, indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > >::value >::type > { + static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < > eval ( indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > && value ) { + indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < DefaultStateType > res; + res.setString ( alphabet::SymbolNormalize::normalizeSymbols ( std::move ( value ).getString ( ) ) ); + res.setTransitions ( std::move ( value ).getTransitions ( ) ); + + return res; + } +}; + +template < class SymbolType > +struct xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > > { + static indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > parse ( ext::deque < sax::Token >::iterator & input ); + static bool first ( const ext::deque < sax::Token >::const_iterator & input ); + static const std::string & xmlTagName ( ); + static void compose ( ext::deque < sax::Token > & output, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & data ); +}; + +template < class SymbolType > +indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::parse ( ext::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) ); + ext::vector < SymbolType > string = core::xmlApi < ext::vector < SymbolType > >::parse ( input ); + ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > delta = core::xmlApi < ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > >::parse ( input ); + indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > res; + res.setString ( std::move ( string ) ); + res.setTransitions ( std::move ( delta ) ); + + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, xmlTagName ( ) ); + return res; +} + +template < class SymbolType > +bool xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::first ( const ext::deque < sax::Token >::const_iterator & input ) { + return sax::FromXMLParserHelper::isToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) ); +} + +template < class SymbolType > +const std::string & xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::xmlTagName ( ) { + static std::string xmlTagName = "CompactSuffixAutomatonTerminatingSymbol"; + + return xmlTagName; +} + +template < class SymbolType > +void xmlApi < indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > >::compose ( ext::deque < sax::Token > & output, const indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < SymbolType > & index ) { + output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::START_ELEMENT ); + core::xmlApi < ext::vector < SymbolType > >::compose ( output, index.getString ( ) ); + core::xmlApi < ext::vector < ext::map < ext::pair < size_t, size_t >, unsigned > > >::compose ( output, index.getTransitions ( ) ); + output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::END_ELEMENT ); +} + +} /* namespace core */ + +#endif /* COMPACT_SUFFIX_AUTOMATON_H_ */