From b3695643e1336644d1d89b2d5ff07e79df267aad Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Mon, 6 Feb 2017 10:20:43 +0100
Subject: [PATCH] basic position heap data struct and construct algo

---
 .../indexing/PositionHeapNaive.cpp            |  24 ++
 .../stringology/indexing/PositionHeapNaive.h  |  64 +++++
 alib2data/src/indexes/PositionHeap.cpp        |  14 ++
 alib2data/src/indexes/PositionHeap.h          | 236 ++++++++++++++++++
 astringology2/src/astringology.cpp            |  14 ++
 5 files changed, 352 insertions(+)
 create mode 100644 alib2algo/src/stringology/indexing/PositionHeapNaive.cpp
 create mode 100644 alib2algo/src/stringology/indexing/PositionHeapNaive.h
 create mode 100644 alib2data/src/indexes/PositionHeap.cpp
 create mode 100644 alib2data/src/indexes/PositionHeap.h

diff --git a/alib2algo/src/stringology/indexing/PositionHeapNaive.cpp b/alib2algo/src/stringology/indexing/PositionHeapNaive.cpp
new file mode 100644
index 0000000000..2a714f6ef9
--- /dev/null
+++ b/alib2algo/src/stringology/indexing/PositionHeapNaive.cpp
@@ -0,0 +1,24 @@
+/*
+ * PositionHeapNaive.cpp
+ *
+ *  Created on: 6. 2. 2017
+ *      Author: Jan Travnicek
+ */
+
+#include "PositionHeapNaive.h"
+
+#include <string/LinearString.h>
+
+namespace stringology {
+
+namespace indexing {
+
+indexes::PositionHeap < DefaultSymbolType > PositionHeapNaive::construct ( const string::String & string ) {
+	return dispatch ( string.getData ( ) );
+}
+
+auto PositionHeapNaiveLinearString = PositionHeapNaive::RegistratorWrapper < indexes::PositionHeap < DefaultSymbolType >, string::LinearString < > > ( PositionHeapNaive::construct );
+
+} /* namespace indexing */
+
+} /* namespace stringology */
diff --git a/alib2algo/src/stringology/indexing/PositionHeapNaive.h b/alib2algo/src/stringology/indexing/PositionHeapNaive.h
new file mode 100644
index 0000000000..71d40b0e53
--- /dev/null
+++ b/alib2algo/src/stringology/indexing/PositionHeapNaive.h
@@ -0,0 +1,64 @@
+/*
+ * PositionHeapNaive.h
+ *
+ *  Created on: 6. 2. 2017
+ *      Author: Jan Travnicek
+ */
+
+#ifndef POSITION_HEAP_NAIVE_H_
+#define POSITION_HEAP_NAIVE_H_
+
+#include <indexes/PositionHeap.h>
+#include <string/String.h>
+#include <string/LinearString.h>
+#include <core/multipleDispatch.hpp>
+#include <exception/CommonException.h>
+
+namespace stringology {
+
+namespace indexing {
+
+/**
+ * Constructs a position heap for given string.
+ */
+
+class PositionHeapNaive : public std::SingleDispatch < PositionHeapNaive, indexes::PositionHeap < DefaultSymbolType >, const string::StringBase & > {
+public:
+	/**
+	 * Creates suffix trie
+	 * @param string string to construct suffix trie for
+	 * @return automaton
+	 */
+	static indexes::PositionHeap < DefaultSymbolType > construct ( const string::String & string );
+
+	template < class SymbolType >
+	static indexes::PositionHeap < SymbolType > construct ( const string::LinearString < SymbolType > & string );
+
+};
+
+template < class SymbolType >
+indexes::PositionHeap < SymbolType > PositionHeapNaive::construct ( const string::LinearString < SymbolType > & w ) {
+	if ( w.getContent ( ).size ( ) == 0 )
+		throw exception::CommonException ( "Position heap can't index empty string" );
+
+	std::trie < SymbolType, unsigned > trie ( 1 );
+
+	for ( unsigned i = w.getContent ( ).size ( ) - 1; i > 0; i-- ) {
+		unsigned k = i - 1;
+		std::trie < SymbolType, unsigned > * n = & trie;
+
+		while ( k < w.getContent ( ).size ( ) && n->getChildren ( ).count ( w.getContent ( )[k] ) )
+			n = & n->getChildren ( ).find ( w.getContent ( )[k++] )->second;
+
+		unsigned node = w.getContent ( ).size ( ) - i + 1;
+		n = & n->getChildren ( ).insert ( std::make_pair ( w.getContent ( )[k], std::trie < SymbolType, unsigned > ( node ) ) ).first->second;
+	}
+
+	return indexes::PositionHeap < SymbolType > ( w.getAlphabet ( ), trie );
+}
+
+} /* namespace indexing */
+
+} /* namespace stringology */
+
+#endif /* POSITION_HEAP_NAIVE_H_ */
diff --git a/alib2data/src/indexes/PositionHeap.cpp b/alib2data/src/indexes/PositionHeap.cpp
new file mode 100644
index 0000000000..9b47d21c39
--- /dev/null
+++ b/alib2data/src/indexes/PositionHeap.cpp
@@ -0,0 +1,14 @@
+/*
+ * PositionHeap.cpp
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: Jan Travnicek
+ */
+
+#include "PositionHeap.h"
+
+namespace alib {
+
+auto positionHeapParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::PositionHeap < > > ( );
+
+} /* namespace alib */
diff --git a/alib2data/src/indexes/PositionHeap.h b/alib2data/src/indexes/PositionHeap.h
new file mode 100644
index 0000000000..b1468051c7
--- /dev/null
+++ b/alib2data/src/indexes/PositionHeap.h
@@ -0,0 +1,236 @@
+/*
+ * PositionHeap.h
+ *
+ *  Created on: Nov 23, 2013
+ *      Author: Jan Travnicek
+ */
+
+#ifndef POSITION_HEAP_H_
+#define POSITION_HEAP_H_
+
+#include <string>
+#include <set>
+#include <trie>
+#include <iostream>
+#include <algorithm>
+#include <sstream>
+
+#include <common/DefaultSymbolType.h>
+
+#include <core/components.hpp>
+#include <exception/CommonException.h>
+
+#include <object/Object.h>
+#include <object/UniqueObject.h>
+#include <object/ObjectBase.h>
+
+#include <sax/FromXMLParserHelper.h>
+#include <core/xmlApi.hpp>
+
+#include <container/ObjectsSet.h>
+#include <container/ObjectsTrie.h>
+
+#include <primitive/Unsigned.h>
+
+namespace indexes {
+
+class GeneralAlphabet;
+
+/**
+ * Represents regular expression parsed from the XML. Regular expression is stored
+ * as a tree of RegExpElement.
+ */
+template < class SymbolType = DefaultSymbolType >
+class PositionHeap : public alib::ObjectBase, public std::Components < PositionHeap < SymbolType >, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > {
+protected:
+	std::trie < SymbolType, unsigned > m_trie;
+
+public:
+	/**
+	 * @copydoc PositionHeap::clone() const
+	 */
+	virtual ObjectBase * clone ( ) const;
+
+	/**
+	 * @copydoc PositionHeap::plunder() const
+	 */
+	virtual ObjectBase * plunder ( ) &&;
+
+	explicit PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie );
+	explicit PositionHeap ( std::trie < SymbolType, unsigned > trie );
+
+	void checkTrie ( const std::trie < SymbolType, unsigned > & trie );
+
+	/**
+	 * @return Root node of the trie
+	 */
+	const std::trie < SymbolType, unsigned > & getRoot ( ) const;
+
+	const std::set < SymbolType > & getAlphabet ( ) const {
+		return this->template accessComponent < GeneralAlphabet > ( ).get ( );
+	}
+
+	/**
+	 * Sets the root node of the regular expression tree
+	 * @param tree root node to set
+	 */
+	void setTree ( std::trie < SymbolType, unsigned > tree );
+
+	/**
+	 * Removes symbol from the alphabet of symbol available in the regular expression
+	 * @param symbol removed symbol from the alphabet
+	 */
+	bool removeSymbolFromEdgeAlphabet ( const SymbolType & symbol ) {
+		return this->template accessComponent < GeneralAlphabet > ( ).remove ( symbol );
+	}
+
+	/**
+	 * Prints XML representation of the tree to the output stream.
+	 * @param out output stream to which print the tree
+	 * @param tree tree to print
+	 */
+	virtual void operator >>( std::ostream & out ) const;
+
+	virtual int compare ( const ObjectBase & other ) const {
+		if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other );
+
+		return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) );
+	}
+
+	virtual int compare ( const PositionHeap & other ) const;
+
+	virtual explicit operator std::string ( ) const;
+
+	static const std::string & getXmlTagName() {
+		static std::string xmlTagName = "PositionHeap";
+
+		return xmlTagName;
+	}
+
+	static PositionHeap parse ( std::deque < sax::Token >::iterator & input );
+
+	void compose ( std::deque < sax::Token > & out ) const;
+
+	virtual alib::ObjectBase * inc ( ) &&;
+};
+
+} /* namespace indexes */
+
+namespace indexes {
+
+template < class SymbolType >
+PositionHeap < SymbolType >::PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie ) : std::Components < PositionHeap, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( edgeAlphabet ) ), std::tuple < > ( ) ), m_trie ( std::move ( trie ) ) {
+	checkTrie ( this->m_trie );
+}
+
+template < class SymbolType >
+PositionHeap < SymbolType >::PositionHeap ( std::trie < SymbolType, unsigned > trie ) : PositionHeap ( computeMinimalEdgeAlphabet ( trie ), trie ) {
+}
+
+template < class SymbolType >
+alib::ObjectBase * PositionHeap < SymbolType >::clone ( ) const {
+	return new PositionHeap ( * this );
+}
+
+template < class SymbolType >
+alib::ObjectBase * PositionHeap < SymbolType >::plunder ( ) && {
+	return new PositionHeap ( std::move ( * this ) );
+}
+
+template < class SymbolType >
+void PositionHeap < SymbolType >::checkTrie ( const std::trie < SymbolType, unsigned > & trie ) {
+	for ( const std::pair < const SymbolType, std::trie < SymbolType, unsigned > > & child : trie.getChildren ( ) ) {
+		if ( ! getAlphabet ( ).count ( child.first ) )
+			throw exception::CommonException ( "Symbol " + std::to_string ( child.first ) + "not in the alphabet." );
+		checkTrie ( child.second );
+	}
+}
+
+template < class SymbolType >
+const std::trie < SymbolType, unsigned > & PositionHeap < SymbolType >::getRoot ( ) const {
+	return m_trie;
+}
+
+template < class SymbolType >
+void PositionHeap < SymbolType >::setTree ( std::trie < SymbolType, unsigned > trie ) {
+	checkTrie ( trie );
+	this->m_trie = std::move ( trie ).plunder ( );
+}
+
+template < class SymbolType >
+void PositionHeap < SymbolType >::operator >>( std::ostream & out ) const {
+	out << "(PositionHeap " << this->m_trie << ")";
+}
+
+template < class SymbolType >
+int PositionHeap < SymbolType >::compare ( const PositionHeap & other ) const {
+	auto first = std::tie ( getRoot ( ), getAlphabet ( ) );
+	auto second = std::tie ( other.getRoot ( ), other.getAlphabet ( ) );
+
+	static std::compare < decltype ( first ) > comp;
+
+	return comp ( first, second );
+}
+
+template < class SymbolType >
+PositionHeap < SymbolType >::operator std::string ( ) const {
+	std::stringstream ss;
+	ss << * this;
+	return ss.str ( );
+}
+
+template < class SymbolType >
+PositionHeap < SymbolType > PositionHeap < SymbolType >::parse ( std::deque < sax::Token >::iterator & input ) {
+	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, PositionHeap::getXmlTagName() );
+	std::set < SymbolType > edgeAlphabet = alib::xmlApi < std::set < SymbolType > >::parse ( input );
+	std::trie < SymbolType, unsigned > root = alib::xmlApi < std::trie < SymbolType, unsigned > >::parse ( input );
+	PositionHeap < SymbolType > trie ( std::move ( edgeAlphabet ), std::move ( root ) );
+
+	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, PositionHeap::getXmlTagName() );
+	return trie;
+}
+
+template < class SymbolType >
+void PositionHeap < SymbolType >::compose ( std::deque < sax::Token > & out ) const {
+	out.emplace_back ( PositionHeap::getXmlTagName(), sax::Token::TokenType::START_ELEMENT );
+	alib::xmlApi < std::set < SymbolType > >::compose ( out, getAlphabet ( ) );
+	alib::xmlApi < std::trie < SymbolType, unsigned > >::compose ( out, getRoot ( ) );
+	out.emplace_back ( PositionHeap::getXmlTagName(), sax::Token::TokenType::END_ELEMENT );
+}
+
+template < class SymbolType >
+alib::ObjectBase* PositionHeap < SymbolType >::inc() && {
+	return new alib::UniqueObject(alib::Object(std::move(*this)), primitive::Integer(0));
+}
+
+} /* namespace indexes */
+
+namespace std {
+
+template < class SymbolType >
+class ComponentConstraint < indexes::PositionHeap < SymbolType >, SymbolType, indexes::GeneralAlphabet > {
+
+	static bool used ( const std::trie < SymbolType, unsigned > & trie, const SymbolType & symbol ) {
+		for ( const std::pair < const SymbolType, std::trie < SymbolType, unsigned > > & child : trie.getChildren ( ) ) {
+			if ( symbol == child.first || checkTrie ( trie, child.second ) )
+				return true;
+		}
+		return false;
+	}
+
+public:
+	static bool used ( const indexes::PositionHeap < SymbolType > & index, const SymbolType & symbol ) {
+		return used ( index.getRoot ( ), symbol );
+	}
+
+	static bool available ( const indexes::PositionHeap < SymbolType > &, const SymbolType & ) {
+		return true;
+	}
+
+	static void valid ( const indexes::PositionHeap < SymbolType > &, const SymbolType & ) {
+	}
+};
+
+} /* namespace std */
+
+#endif /* POSITION_HEAP_H_ */
diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp
index 923f019a55..777f125f79 100644
--- a/astringology2/src/astringology.cpp
+++ b/astringology2/src/astringology.cpp
@@ -35,6 +35,7 @@
 #include <stringology/exact/SuffixAutomaton.h>
 #include <string/properties/BorderArray.h>
 #include <stringology/indexing/SuffixTrieNaive.h>
+#include <stringology/indexing/PositionHeapNaive.h>
 #include <stringology/indexing/SuffixArrayNaive.h>
 
 int main ( int argc, char * argv[] ) {
@@ -61,6 +62,7 @@ int main ( int argc, char * argv[] ) {
 		allowed.push_back ( "backwardDAWGMatching" );
 		allowed.push_back ( "borderArray" );
 		allowed.push_back ( "suffixTrie" );
+		allowed.push_back ( "positionHeap" );
 		allowed.push_back ( "suffixArray" );
 		TCLAP::ValuesConstraint < std::string > allowedVals ( allowed );
 
@@ -291,6 +293,18 @@ int main ( int argc, char * argv[] ) {
 			measurements::start ( "Output write", measurements::Type::AUXILIARY );
 
 			alib::XmlDataFactory::toStdout ( suffixTrie );
+		} else if ( algorithm.getValue ( ) == "positionHeap" ) {
+			string::String subject = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );
+
+			measurements::end ( );
+			measurements::start ( "Algorithm", measurements::Type::MAIN );
+
+			indexes::PositionHeap < DefaultSymbolType > positionHeap = stringology::indexing::PositionHeapNaive::construct ( subject );
+
+			measurements::end ( );
+			measurements::start ( "Output write", measurements::Type::AUXILIARY );
+
+			alib::XmlDataFactory::toStdout ( positionHeap );
 		} else if ( algorithm.getValue ( ) == "suffixArray" ) {
 			string::String subject = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );
 
-- 
GitLab