From ee759f5dd519f15f340645c19ecb3cff6bad7bde Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Fri, 27 Apr 2018 13:29:43 +0200
Subject: [PATCH] document suffix array

---
 .../stringology/indexing/SuffixArrayNaive.h   |  2 +-
 .../stringology/query/SuffixArrayFactors.h    |  4 +-
 .../src/indexes/stringology/SuffixArray.h     | 85 +++++++++----------
 .../src/indexes/stringology/SuffixTrie.h      |  3 +-
 4 files changed, 44 insertions(+), 50 deletions(-)

diff --git a/alib2algo/src/stringology/indexing/SuffixArrayNaive.h b/alib2algo/src/stringology/indexing/SuffixArrayNaive.h
index 977d008183..df004a59eb 100644
--- a/alib2algo/src/stringology/indexing/SuffixArrayNaive.h
+++ b/alib2algo/src/stringology/indexing/SuffixArrayNaive.h
@@ -53,7 +53,7 @@ indexes::stringology::SuffixArray < SymbolType > SuffixArrayNaive::construct ( c
 			return first > second;
 	} );
 
-	return indexes::stringology::SuffixArray < SymbolType > ( w.getAlphabet ( ), std::move ( data ), w.getContent ( ) );
+	return indexes::stringology::SuffixArray < SymbolType > ( std::move ( data ), string::LinearString < SymbolType > ( w ) );
 }
 
 } /* namespace indexing */
diff --git a/alib2algo/src/stringology/query/SuffixArrayFactors.h b/alib2algo/src/stringology/query/SuffixArrayFactors.h
index 3dd245a346..a9ab191165 100644
--- a/alib2algo/src/stringology/query/SuffixArrayFactors.h
+++ b/alib2algo/src/stringology/query/SuffixArrayFactors.h
@@ -56,12 +56,12 @@ ext::set < unsigned > SuffixArrayFactors::query ( const indexes::stringology::Su
 
 	// The value returned by comparator indicates whether the first argument is considered to go before the second.
 	ext::vector < unsigned >::const_iterator low = std::lower_bound ( suffixArray.getData ( ).begin ( ), suffixArray.getData ( ).end ( ), string, [ & ] ( unsigned first, const string::LinearString < SymbolType > & str ) {
-			return comparator ( suffixArray.getString ( ), first, str.getContent ( ), 0, str.getContent ( ).size ( ) ) < 0;
+			return comparator ( suffixArray.getString ( ).getContent ( ), first, str.getContent ( ), 0, str.getContent ( ).size ( ) ) < 0;
 	} );
 
 	// The value returned by comparator indicates whether the first argument is considered to go before the second.
 	ext::vector < unsigned >::const_iterator high = std::upper_bound ( suffixArray.getData ( ).begin ( ), suffixArray.getData ( ).end ( ), string, [ & ] ( const string::LinearString < SymbolType > & str, unsigned second ) {
-			return comparator ( str.getContent ( ), 0, suffixArray.getString ( ), second, str.getContent ( ).size ( ) ) < 0;
+			return comparator ( str.getContent ( ), 0, suffixArray.getString ( ).getContent ( ), second, str.getContent ( ).size ( ) ) < 0;
 	} );
 
 	return ext::set < unsigned > ( low, high );
diff --git a/alib2data/src/indexes/stringology/SuffixArray.h b/alib2data/src/indexes/stringology/SuffixArray.h
index cfd546653e..1b02f70471 100644
--- a/alib2data/src/indexes/stringology/SuffixArray.h
+++ b/alib2data/src/indexes/stringology/SuffixArray.h
@@ -1,6 +1,22 @@
 /*
  * SuffixArray.h
  *
+ * This file is part of Algorithms library toolkit.
+ * Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz)
+
+ * Algorithms library toolkit is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * Algorithms library toolkit is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with Algorithms library toolkit.  If not, see <http://www.gnu.org/licenses/>.
+ *
  *  Created on: Jan 8, 2017
  *      Author: Jan Travnicek
  */
@@ -18,7 +34,6 @@
 
 #include <common/DefaultSymbolType.h>
 
-#include <core/components.hpp>
 #include <exception/CommonException.h>
 
 #include <object/UniqueObject.h>
@@ -29,29 +44,28 @@
 #include <primitive/Unsigned.h>
 #include <primitive/xml/Unsigned.h>
 
-#include <container/ObjectsSet.h>
 #include <container/ObjectsVector.h>
 
-#include <container/xml/ObjectsSet.h>
 #include <container/xml/ObjectsVector.h>
 
+#include <string/LinearString.h>
+#include <string/xml/LinearString.h>
+
 #include <alphabet/common/SymbolNormalize.h>
 
 namespace indexes {
 
 namespace stringology {
 
-class GeneralAlphabet;
-
 /**
- * Represents regular expression parsed from the XML. Regular expression is stored
- * as a tree of RegExpElement.
+ * \brief Suffix array string index. Linear representation of all suffixes ordered lexicographically. Suffixes are represented as indexes to the indexed string and alphabet is stored within the string as well. Therefore the string is stored allong with Tree like representation of all suffixes. The class does not checks whether the suffixes order is correct.
+ *
+ * \tparam SymbolType type of symbols of indexed string
  */
 template < class SymbolType = DefaultSymbolType >
-class SuffixArray final : public object::ObjectBase, public core::Components < SuffixArray < SymbolType >, ext::set < SymbolType >, component::Set, GeneralAlphabet > {
-protected:
+class SuffixArray final : public object::ObjectBase {
 	ext::vector < unsigned > m_data;
-	ext::vector < SymbolType > m_string;
+	string::LinearString < SymbolType > m_string;
 
 public:
 	/**
@@ -64,7 +78,7 @@ public:
 	 */
 	virtual ObjectBase * clone ( ) &&;
 
-	explicit SuffixArray ( ext::set < SymbolType > alphabet, ext::vector < unsigned > data, ext::vector < SymbolType > string );
+	explicit SuffixArray ( ext::vector < unsigned > data, string::LinearString < SymbolType > string );
 
 	/**
 	 * @return Root node of the trie
@@ -73,16 +87,16 @@ public:
 
 	ext::vector < unsigned > && getData ( ) &&;
 
-	const ext::vector < SymbolType > & getString ( ) const &;
+	const string::LinearString < SymbolType > & getString ( ) const &;
 
-	ext::vector < SymbolType > && getString ( ) &&;
+	string::LinearString < SymbolType > && getString ( ) &&;
 
 	const ext::set < SymbolType > & getAlphabet ( ) const & {
-		return this->template accessComponent < GeneralAlphabet > ( ).get ( );
+		return m_string.getAlphabet ( );
 	}
 
 	ext::set < SymbolType > && getAlphabet ( ) && {
-		return std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) );
+		return std::move ( m_string ).getAlphabet ( );
 	}
 
 	/**
@@ -96,7 +110,7 @@ public:
 	 * @param symbol removed symbol from the alphabet
 	 */
 	bool removeSymbolFromAlphabet ( const SymbolType & symbol ) {
-		return this->template accessComponent < GeneralAlphabet > ( ).remove ( symbol );
+		return m_string.removeSymbol ( symbol );
 	}
 
 	/**
@@ -130,7 +144,7 @@ namespace indexes {
 namespace stringology {
 
 template < class SymbolType >
-SuffixArray < SymbolType >::SuffixArray ( ext::set < SymbolType > alphabet, ext::vector < unsigned > data, ext::vector < SymbolType > string ) : core::Components < SuffixArray, ext::set < SymbolType >, component::Set, GeneralAlphabet > ( std::move ( alphabet ) ), m_data ( std::move ( data ) ), m_string ( std::move ( string ) ) {
+SuffixArray < SymbolType >::SuffixArray ( ext::vector < unsigned > data, string::LinearString < SymbolType > string ) : m_data ( std::move ( data ) ), m_string ( std::move ( string ) ) {
 	// TODO check validity of the string like in LinearString
 }
 
@@ -155,12 +169,12 @@ ext::vector < unsigned > && SuffixArray < SymbolType >::getData ( ) && {
 }
 
 template < class SymbolType >
-const ext::vector < SymbolType > & SuffixArray < SymbolType >::getString ( ) const & {
+const string::LinearString < SymbolType > & SuffixArray < SymbolType >::getString ( ) const & {
 	return m_string;
 }
 
 template < class SymbolType >
-ext::vector < SymbolType > && SuffixArray < SymbolType >::getString ( ) && {
+string::LinearString < SymbolType > && SuffixArray < SymbolType >::getString ( ) && {
 	return std::move ( m_string );
 }
 
@@ -176,8 +190,8 @@ void SuffixArray < SymbolType >::operator >>( std::ostream & out ) const {
 
 template < class SymbolType >
 int SuffixArray < SymbolType >::compare ( const SuffixArray & other ) const {
-	auto first = ext::tie ( getData ( ), getString ( ), getAlphabet ( ) );
-	auto second = ext::tie ( other.getData ( ), other.getString ( ), other.getAlphabet ( ) );
+	auto first = ext::tie ( getData ( ), getString ( ) );
+	auto second = ext::tie ( other.getData ( ), other.getString ( ) );
 
 	static ext::compare < decltype ( first ) > comp;
 
@@ -202,29 +216,12 @@ object::ObjectBase* SuffixArray < SymbolType >::inc() && {
 
 namespace core {
 
-template < class SymbolType >
-class SetConstraint < indexes::stringology::SuffixArray < SymbolType >, SymbolType, indexes::stringology::GeneralAlphabet > {
-public:
-	static bool used ( const indexes::stringology::SuffixArray < SymbolType > & index, const SymbolType & symbol ) {
-		const ext::vector < SymbolType > & content = index.getString ( );
-		return std::find ( content.begin(), content.end(), symbol ) != content.end();
-	}
-
-	static bool available ( const indexes::stringology::SuffixArray < SymbolType > &, const SymbolType & ) {
-		return true;
-	}
-
-	static void valid ( const indexes::stringology::SuffixArray < SymbolType > &, const SymbolType & ) {
-	}
-};
-
 template < class SymbolType >
 struct normalize < indexes::stringology::SuffixArray < SymbolType >, typename std::enable_if < ! std::is_same < indexes::stringology::SuffixArray < SymbolType >, indexes::stringology::SuffixArray < > >::value >::type > {
 	static indexes::stringology::SuffixArray < > eval ( indexes::stringology::SuffixArray < SymbolType > && value ) {
-		ext::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( value ).getAlphabet ( ) );
-		ext::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( value ).getString ( ) );
+		string::LinearString < DefaultSymbolType > string = normalize < string::LinearString < SymbolType > >::eval ( std::move ( value ).getString ( ) );
 
-		return indexes::stringology::SuffixArray < > ( std::move ( alphabet ), std::move ( value ).getData ( ), std::move ( string ) );
+		return indexes::stringology::SuffixArray < > ( std::move ( value ).getData ( ), std::move ( string ) );
 	}
 };
 
@@ -239,10 +236,9 @@ struct xmlApi < indexes::stringology::SuffixArray < SymbolType > > {
 template < class SymbolType >
 indexes::stringology::SuffixArray < SymbolType > xmlApi < indexes::stringology::SuffixArray < SymbolType > >::parse ( ext::deque < sax::Token >::iterator & input ) {
 	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) );
-	ext::set < SymbolType > alphabet = core::xmlApi < ext::set < SymbolType > >::parse ( input );
 	ext::vector < unsigned > data = core::xmlApi < ext::vector < unsigned > >::parse ( input );
-	ext::vector < SymbolType > string = core::xmlApi < ext::vector < SymbolType > >::parse ( input );
-	indexes::stringology::SuffixArray < SymbolType > res ( std::move ( alphabet ), std::move ( data ), std::move ( string ) );
+	string::LinearString < SymbolType > string = core::xmlApi < string::LinearString < SymbolType > >::parse ( input );
+	indexes::stringology::SuffixArray < SymbolType > res ( std::move ( data ), std::move ( string ) );
 
 	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, xmlTagName ( ) );
 	return res;
@@ -263,9 +259,8 @@ const std::string & xmlApi < indexes::stringology::SuffixArray < SymbolType > >:
 template < class SymbolType >
 void xmlApi < indexes::stringology::SuffixArray < SymbolType > >::compose ( ext::deque < sax::Token > & output, const indexes::stringology::SuffixArray < SymbolType > & index ) {
 	output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::START_ELEMENT );
-	core::xmlApi < ext::set < SymbolType > >::compose ( output, index.getAlphabet ( ) );
 	core::xmlApi < ext::vector < unsigned > >::compose ( output, index.getData ( ) );
-	core::xmlApi < ext::vector < SymbolType > >::compose ( output, index.getString ( ) );
+	core::xmlApi < string::LinearString < SymbolType > >::compose ( output, index.getString ( ) );
 	output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::END_ELEMENT );
 }
 
diff --git a/alib2data/src/indexes/stringology/SuffixTrie.h b/alib2data/src/indexes/stringology/SuffixTrie.h
index 430e445057..97115116c7 100644
--- a/alib2data/src/indexes/stringology/SuffixTrie.h
+++ b/alib2data/src/indexes/stringology/SuffixTrie.h
@@ -66,8 +66,7 @@ namespace stringology {
 class GeneralAlphabet;
 
 /**
- * \brief
- * Suffix trie string index. Tree like representation of all suffixes. Nodes of the trie are either containing index of the suffix or void. The parent child relationship of nodes is represented by single symbol. The class does not checks whether the trie actually is suffix trie.
+ * \brief Suffix trie string index. Tree like representation of all suffixes. Nodes of the trie are either containing index of the suffix or void. The parent child relationship of nodes is represented by single symbol. The class does not checks whether the trie actually is suffix trie.
  *
  * \tparam SymbolType type of symbols of indexed string
  */
-- 
GitLab