From af0a2a5735d7f224d0e734572079635ec9ef2dcc Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Tue, 7 Feb 2017 16:23:27 +0100
Subject: [PATCH] original string in the position heap data struct

---
 .../stringology/indexing/PositionHeapNaive.h  |  5 +++-
 alib2data/src/indexes/PositionHeap.h          | 25 ++++++++++++++-----
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/alib2algo/src/stringology/indexing/PositionHeapNaive.h b/alib2algo/src/stringology/indexing/PositionHeapNaive.h
index e523eedb27..08b636be33 100644
--- a/alib2algo/src/stringology/indexing/PositionHeapNaive.h
+++ b/alib2algo/src/stringology/indexing/PositionHeapNaive.h
@@ -20,6 +20,9 @@ namespace indexing {
 
 /**
  * Constructs a position heap for given string.
+ *
+ * Source: Position heaps: A simple and dynamic text indexing data structure
+ * Andrzej Ehrenfeucht, Ross M. McConnell, Nissa Osheim, Sung-Whan Woo
  */
 
 class PositionHeapNaive : public std::SingleDispatch < PositionHeapNaive, indexes::PositionHeap < DefaultSymbolType >, const string::StringBase & > {
@@ -54,7 +57,7 @@ indexes::PositionHeap < SymbolType > PositionHeapNaive::construct ( const string
 		n = & n->getChildren ( ).insert ( std::make_pair ( w.getContent ( )[k], std::trie < SymbolType, unsigned > ( node ) ) ).first->second;
 	}
 
-	return indexes::PositionHeap < SymbolType > ( w.getAlphabet ( ), trie );
+	return indexes::PositionHeap < SymbolType > ( w.getAlphabet ( ), trie, w.getContent ( ) );
 }
 
 } /* namespace indexing */
diff --git a/alib2data/src/indexes/PositionHeap.h b/alib2data/src/indexes/PositionHeap.h
index b1468051c7..950fc95d93 100644
--- a/alib2data/src/indexes/PositionHeap.h
+++ b/alib2data/src/indexes/PositionHeap.h
@@ -29,6 +29,7 @@
 
 #include <container/ObjectsSet.h>
 #include <container/ObjectsTrie.h>
+#include <container/ObjectsVector.h>
 
 #include <primitive/Unsigned.h>
 
@@ -44,6 +45,7 @@ template < class SymbolType = DefaultSymbolType >
 class PositionHeap : public alib::ObjectBase, public std::Components < PositionHeap < SymbolType >, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > {
 protected:
 	std::trie < SymbolType, unsigned > m_trie;
+	std::vector < SymbolType > m_string;
 
 public:
 	/**
@@ -56,8 +58,8 @@ public:
 	 */
 	virtual ObjectBase * plunder ( ) &&;
 
-	explicit PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie );
-	explicit PositionHeap ( std::trie < SymbolType, unsigned > trie );
+	explicit PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie, std::vector < SymbolType > string );
+	explicit PositionHeap ( std::trie < SymbolType, unsigned > trie, std::vector < SymbolType > string );
 
 	void checkTrie ( const std::trie < SymbolType, unsigned > & trie );
 
@@ -66,6 +68,8 @@ public:
 	 */
 	const std::trie < SymbolType, unsigned > & getRoot ( ) const;
 
+	const std::vector < SymbolType > & getString ( ) const;
+
 	const std::set < SymbolType > & getAlphabet ( ) const {
 		return this->template accessComponent < GeneralAlphabet > ( ).get ( );
 	}
@@ -119,12 +123,13 @@ public:
 namespace indexes {
 
 template < class SymbolType >
-PositionHeap < SymbolType >::PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie ) : std::Components < PositionHeap, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( edgeAlphabet ) ), std::tuple < > ( ) ), m_trie ( std::move ( trie ) ) {
+PositionHeap < SymbolType >::PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie, std::vector < SymbolType > string ) : std::Components < PositionHeap, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( edgeAlphabet ) ), std::tuple < > ( ) ), m_trie ( std::move ( trie ) ), m_string ( std::move ( string ) ) {
 	checkTrie ( this->m_trie );
+	// TODO check validity of the string like in LinearString
 }
 
 template < class SymbolType >
-PositionHeap < SymbolType >::PositionHeap ( std::trie < SymbolType, unsigned > trie ) : PositionHeap ( computeMinimalEdgeAlphabet ( trie ), trie ) {
+PositionHeap < SymbolType >::PositionHeap ( std::trie < SymbolType, unsigned > trie, std::vector < SymbolType > string ) : PositionHeap ( computeMinimalEdgeAlphabet ( trie ), trie, std::move ( string ) ) {
 }
 
 template < class SymbolType >
@@ -151,6 +156,11 @@ const std::trie < SymbolType, unsigned > & PositionHeap < SymbolType >::getRoot
 	return m_trie;
 }
 
+template < class SymbolType >
+const std::vector < SymbolType > & PositionHeap < SymbolType >::getString ( ) const {
+	return m_string;
+}
+
 template < class SymbolType >
 void PositionHeap < SymbolType >::setTree ( std::trie < SymbolType, unsigned > trie ) {
 	checkTrie ( trie );
@@ -184,7 +194,8 @@ PositionHeap < SymbolType > PositionHeap < SymbolType >::parse ( std::deque < sa
 	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, PositionHeap::getXmlTagName() );
 	std::set < SymbolType > edgeAlphabet = alib::xmlApi < std::set < SymbolType > >::parse ( input );
 	std::trie < SymbolType, unsigned > root = alib::xmlApi < std::trie < SymbolType, unsigned > >::parse ( input );
-	PositionHeap < SymbolType > trie ( std::move ( edgeAlphabet ), std::move ( root ) );
+	std::vector < SymbolType > string = alib::xmlApi < std::vector < SymbolType > >::parse ( input );
+	PositionHeap < SymbolType > trie ( std::move ( edgeAlphabet ), std::move ( root ), std::move ( string ) );
 
 	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, PositionHeap::getXmlTagName() );
 	return trie;
@@ -195,6 +206,7 @@ void PositionHeap < SymbolType >::compose ( std::deque < sax::Token > & out ) co
 	out.emplace_back ( PositionHeap::getXmlTagName(), sax::Token::TokenType::START_ELEMENT );
 	alib::xmlApi < std::set < SymbolType > >::compose ( out, getAlphabet ( ) );
 	alib::xmlApi < std::trie < SymbolType, unsigned > >::compose ( out, getRoot ( ) );
+	alib::xmlApi < std::vector < SymbolType > >::compose ( out, getString ( ) );
 	out.emplace_back ( PositionHeap::getXmlTagName(), sax::Token::TokenType::END_ELEMENT );
 }
 
@@ -220,7 +232,8 @@ class ComponentConstraint < indexes::PositionHeap < SymbolType >, SymbolType, in
 
 public:
 	static bool used ( const indexes::PositionHeap < SymbolType > & index, const SymbolType & symbol ) {
-		return used ( index.getRoot ( ), symbol );
+		const std::vector < SymbolType > & content = index.getString ( );
+		return std::find ( content.begin(), content.end(), symbol ) != content.end() || used ( index.getRoot ( ), symbol );
 	}
 
 	static bool available ( const indexes::PositionHeap < SymbolType > &, const SymbolType & ) {
-- 
GitLab