From 8072b2464d659fc4932fd590150c5377f081f03f Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Mon, 10 Apr 2017 09:43:41 +0200
Subject: [PATCH] normalize indexes

---
 .../arbology/CompressedBitParallelTreeIndex.h | 15 ++++-
 .../src/indexes/arbology/FullAndLinearIndex.h |  8 +++
 .../src/indexes/common/IndexesNormalize.h     | 58 +++++++++++++++++++
 .../indexes/stringology/BitParallelIndex.h    | 15 ++++-
 .../stringology/CompressedBitParallelIndex.h  | 15 ++++-
 .../src/indexes/stringology/PositionHeap.h    | 11 ++++
 .../src/indexes/stringology/SuffixArray.h     |  9 +++
 .../src/indexes/stringology/SuffixTrie.h      | 10 ++++
 8 files changed, 135 insertions(+), 6 deletions(-)
 create mode 100644 alib2data/src/indexes/common/IndexesNormalize.h

diff --git a/alib2data/src/indexes/arbology/CompressedBitParallelTreeIndex.h b/alib2data/src/indexes/arbology/CompressedBitParallelTreeIndex.h
index 8005354788..214f5aa312 100644
--- a/alib2data/src/indexes/arbology/CompressedBitParallelTreeIndex.h
+++ b/alib2data/src/indexes/arbology/CompressedBitParallelTreeIndex.h
@@ -29,6 +29,8 @@
 #include <common/SparseBoolVector.hpp>
 #include <primitive/Bool.h>
 
+#include <alphabet/common/SymbolNormalize.h>
+
 namespace indexes {
 
 namespace arbology {
@@ -68,7 +70,7 @@ public:
 	 */
 	const std::vector < int > & getJumps ( ) const;
 
-	const std::vector < SymbolType > & getString ( ) const;
+	std::vector < SymbolType > getString ( ) const;
 
 	const std::set < SymbolType > & getAlphabet ( ) const {
 		return this->template accessComponent < GeneralAlphabet > ( ).get ( );
@@ -116,6 +118,15 @@ public:
 	void compose ( std::deque < sax::Token > & out ) const;
 
 	virtual alib::ObjectBase * inc ( ) &&;
+
+	virtual ObjectBase * normalize ( ) && {
+		std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) );
+		std::map < DefaultSymbolType, common::SparseBoolVector > vectors;
+		for ( std::pair < SymbolType, common::SparseBoolVector > && vector : std::make_moveable_map ( m_vectors ) )
+			vectors.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( vector.first ) ), std::move ( vector.second ) ) );
+
+		return new CompressedBitParallelTreeIndex < > ( std::move ( alphabet ), std::move ( vectors ), std::move ( m_jumpTable ) );
+	}
 };
 
 } /* namespace arbology */
@@ -151,7 +162,7 @@ const std::vector < int > & CompressedBitParallelTreeIndex < SymbolType >::getJu
 }
 
 template < class SymbolType >
-const std::vector < SymbolType > & CompressedBitParallelTreeIndex < SymbolType >::getString ( ) const {
+std::vector < SymbolType > CompressedBitParallelTreeIndex < SymbolType >::getString ( ) const {
 	std::vector < SymbolType > res;
 
 	unsigned index = 0;
diff --git a/alib2data/src/indexes/arbology/FullAndLinearIndex.h b/alib2data/src/indexes/arbology/FullAndLinearIndex.h
index ef6e121bf3..d1baf0bf26 100644
--- a/alib2data/src/indexes/arbology/FullAndLinearIndex.h
+++ b/alib2data/src/indexes/arbology/FullAndLinearIndex.h
@@ -24,6 +24,8 @@
 
 #include <indexes/stringology/PositionHeap.h>
 
+#include <alphabet/common/SymbolNormalize.h>
+
 namespace indexes {
 
 namespace arbology {
@@ -111,6 +113,12 @@ public:
 	void compose ( std::deque < sax::Token > & out ) const;
 
 	virtual alib::ObjectBase * inc ( ) &&;
+
+	virtual ObjectBase * normalize ( ) && {
+		indexes::stringology::PositionHeap < DefaultSymbolType > stringIndex = manage_move ( std::move ( m_StringIndex ).normalize ( ) );
+
+		return new FullAndLinearIndex < > ( std::move ( stringIndex ), std::move ( m_JumpTable ) );
+	}
 };
 
 } /* namespace arbology */
diff --git a/alib2data/src/indexes/common/IndexesNormalize.h b/alib2data/src/indexes/common/IndexesNormalize.h
new file mode 100644
index 0000000000..0b3aaaa381
--- /dev/null
+++ b/alib2data/src/indexes/common/IndexesNormalize.h
@@ -0,0 +1,58 @@
+/*
+ * IndexesNormalize.h
+ *
+ *  Created on: Apr 7, 2017
+ *      Author: Jan Travnicek
+ */
+
+#ifndef INDEXES_NORMALIZE_H_
+#define INDEXES_NORMALIZE_H_
+
+#include <vector>
+#include <tuple>
+#include <set>
+#include <variant>
+#include <trie>
+
+#include <object/AnyObject.h>
+
+#include <alphabet/common/SymbolNormalize.h>
+
+namespace indexes {
+
+/**
+ * This class contains methods to print XML representation of automata to the output stream.
+ */
+class IndexesNormalize {
+	template < class SymbolType, class ValueType >
+	static std::trie < DefaultSymbolType, ValueType > normalizeTrieInner ( std::trie < SymbolType, ValueType > && node ) {
+		std::map < DefaultSymbolType, std::trie < DefaultSymbolType, ValueType > > children;
+
+		for ( std::pair < SymbolType, std::trie < SymbolType, ValueType > > && child : std::make_moveable_map ( node.getChildren ( ) ) ) {
+			children.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( child.first ) ), normalizeTrieInner ( std::move ( child.second ) ) ) );
+		}
+
+		return std::trie < DefaultSymbolType, ValueType > ( std::move ( node.getData ( ) ), std::move ( children ) );
+	}
+
+public:
+	template < class SymbolType >
+	static std::trie < DefaultSymbolType, unsigned > normalizeTrie ( std::trie < SymbolType, unsigned > && trie );
+
+	template < class SymbolType >
+	static std::trie < DefaultSymbolType, std::variant < void, unsigned > > normalizeTrie ( std::trie < SymbolType, std::variant < void, unsigned > > && trie );
+};
+
+template < class SymbolType >
+std::trie < DefaultSymbolType, unsigned > IndexesNormalize::normalizeTrie ( std::trie < SymbolType, unsigned > && trie ) {
+	return  normalizeTrieInner ( std::move ( trie ) ) ;
+}
+
+template < class SymbolType >
+std::trie < DefaultSymbolType, std::variant < void, unsigned > > IndexesNormalize::normalizeTrie ( std::trie < SymbolType, std::variant < void, unsigned > > && trie ) {
+	return  normalizeTrieInner ( std::move ( trie ) ) ;
+}
+
+} /* namespace indexes */
+
+#endif /* INDEXES_NORMALIZE_H_ */
diff --git a/alib2data/src/indexes/stringology/BitParallelIndex.h b/alib2data/src/indexes/stringology/BitParallelIndex.h
index 100a2c9962..8b984af866 100644
--- a/alib2data/src/indexes/stringology/BitParallelIndex.h
+++ b/alib2data/src/indexes/stringology/BitParallelIndex.h
@@ -28,6 +28,8 @@
 #include <container/ObjectsVector.h>
 #include <primitive/Bool.h>
 
+#include <alphabet/common/SymbolNormalize.h>
+
 namespace indexes {
 
 namespace stringology {
@@ -61,7 +63,7 @@ public:
 	 */
 	const std::map < SymbolType, std::vector < bool > > & getData ( ) const;
 
-	const std::vector < SymbolType > & getString ( ) const;
+	std::vector < SymbolType > getString ( ) const;
 
 	const std::set < SymbolType > & getAlphabet ( ) const {
 		return this->template accessComponent < GeneralAlphabet > ( ).get ( );
@@ -109,6 +111,15 @@ public:
 	void compose ( std::deque < sax::Token > & out ) const;
 
 	virtual alib::ObjectBase * inc ( ) &&;
+
+	virtual ObjectBase * normalize ( ) && {
+		std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) );
+		std::map < DefaultSymbolType, std::vector < bool > > vectors;
+		for ( std::pair < SymbolType, std::vector < bool > > && vector : std::make_moveable_map ( m_vectors ) )
+			vectors.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( vector.first ) ), std::move ( vector.second ) ) );
+
+		return new BitParallelIndex < > ( std::move ( alphabet ), std::move ( vectors ) );
+	}
 };
 
 } /* namespace stringology */
@@ -139,7 +150,7 @@ const std::map < SymbolType, std::vector < bool > > & BitParallelIndex < SymbolT
 }
 
 template < class SymbolType >
-const std::vector < SymbolType > & BitParallelIndex < SymbolType >::getString ( ) const {
+std::vector < SymbolType > BitParallelIndex < SymbolType >::getString ( ) const {
 	std::vector < SymbolType > res;
 
 	unsigned index = 0;
diff --git a/alib2data/src/indexes/stringology/CompressedBitParallelIndex.h b/alib2data/src/indexes/stringology/CompressedBitParallelIndex.h
index 54ceee7133..f2e75d5ef5 100644
--- a/alib2data/src/indexes/stringology/CompressedBitParallelIndex.h
+++ b/alib2data/src/indexes/stringology/CompressedBitParallelIndex.h
@@ -28,6 +28,8 @@
 #include <common/SparseBoolVector.hpp>
 #include <primitive/Bool.h>
 
+#include <alphabet/common/SymbolNormalize.h>
+
 namespace indexes {
 
 namespace stringology {
@@ -61,7 +63,7 @@ public:
 	 */
 	const std::map < SymbolType, common::SparseBoolVector > & getData ( ) const;
 
-	const std::vector < SymbolType > & getString ( ) const;
+	std::vector < SymbolType > getString ( ) const;
 
 	const std::set < SymbolType > & getAlphabet ( ) const {
 		return this->template accessComponent < GeneralAlphabet > ( ).get ( );
@@ -109,6 +111,15 @@ public:
 	void compose ( std::deque < sax::Token > & out ) const;
 
 	virtual alib::ObjectBase * inc ( ) &&;
+
+	virtual ObjectBase * normalize ( ) && {
+		std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) );
+		std::map < DefaultSymbolType, common::SparseBoolVector > vectors;
+		for ( std::pair < SymbolType, common::SparseBoolVector > && vector : std::make_moveable_map ( m_vectors ) )
+			vectors.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( vector.first ) ), std::move ( vector.second ) ) );
+
+		return new CompressedBitParallelIndex < > ( std::move ( alphabet ), std::move ( vectors ) );
+	}
 };
 
 } /* namespace stringology */
@@ -139,7 +150,7 @@ const std::map < SymbolType, common::SparseBoolVector > & CompressedBitParallelI
 }
 
 template < class SymbolType >
-const std::vector < SymbolType > & CompressedBitParallelIndex < SymbolType >::getString ( ) const {
+std::vector < SymbolType > CompressedBitParallelIndex < SymbolType >::getString ( ) const {
 	std::vector < SymbolType > res;
 
 	unsigned index = 0;
diff --git a/alib2data/src/indexes/stringology/PositionHeap.h b/alib2data/src/indexes/stringology/PositionHeap.h
index 058710b7b4..b72c23fb1d 100644
--- a/alib2data/src/indexes/stringology/PositionHeap.h
+++ b/alib2data/src/indexes/stringology/PositionHeap.h
@@ -33,6 +33,9 @@
 
 #include <primitive/Unsigned.h>
 
+#include <alphabet/common/SymbolNormalize.h>
+#include <indexes/common/IndexesNormalize.h>
+
 namespace indexes {
 
 namespace stringology {
@@ -118,6 +121,14 @@ public:
 	void compose ( std::deque < sax::Token > & out ) const;
 
 	virtual alib::ObjectBase * inc ( ) &&;
+
+	virtual PositionHeap < > * normalize ( ) && {
+		std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) );
+		std::trie < DefaultSymbolType, unsigned > trie = IndexesNormalize::normalizeTrie ( std::move ( m_trie ) );
+		std::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( m_string ) );
+
+		return new PositionHeap < > ( std::move ( alphabet ), std::move ( trie ), std::move ( string ) );
+	}
 };
 
 } /* namespace stringology */
diff --git a/alib2data/src/indexes/stringology/SuffixArray.h b/alib2data/src/indexes/stringology/SuffixArray.h
index 58d74706c8..e9aefeadc5 100644
--- a/alib2data/src/indexes/stringology/SuffixArray.h
+++ b/alib2data/src/indexes/stringology/SuffixArray.h
@@ -31,6 +31,8 @@
 #include <container/ObjectsSet.h>
 #include <container/ObjectsVector.h>
 
+#include <alphabet/common/SymbolNormalize.h>
+
 namespace indexes {
 
 namespace stringology {
@@ -113,6 +115,13 @@ public:
 	void compose ( std::deque < sax::Token > & out ) const;
 
 	virtual alib::ObjectBase * inc ( ) &&;
+
+	virtual ObjectBase * normalize ( ) && {
+		std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) );
+		std::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( m_string ) );
+
+		return new SuffixArray ( std::move ( alphabet ), std::move ( m_data ), std::move ( string ) );
+	}
 };
 
 } /* namespace stringology */
diff --git a/alib2data/src/indexes/stringology/SuffixTrie.h b/alib2data/src/indexes/stringology/SuffixTrie.h
index ed2123bc0a..a029a10281 100644
--- a/alib2data/src/indexes/stringology/SuffixTrie.h
+++ b/alib2data/src/indexes/stringology/SuffixTrie.h
@@ -35,6 +35,9 @@
 
 #include <primitive/Unsigned.h>
 
+#include <alphabet/common/SymbolNormalize.h>
+#include <indexes/common/IndexesNormalize.h>
+
 namespace indexes {
 
 namespace stringology {
@@ -118,6 +121,13 @@ public:
 	void compose ( std::deque < sax::Token > & out ) const;
 
 	virtual alib::ObjectBase * inc ( ) &&;
+
+	virtual SuffixTrie < > * normalize ( ) && {
+		std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) );
+		std::trie < DefaultSymbolType, std::variant < void, unsigned > > trie = IndexesNormalize::normalizeTrie ( std::move ( m_trie ) );
+
+		return new SuffixTrie < > ( std::move ( alphabet ), std::move ( trie ) );
+	}
 };
 
 } /* namespace stringology */
-- 
GitLab