diff --git a/alib2algo/src/stringology/indexing/PositionHeapNaive.h b/alib2algo/src/stringology/indexing/PositionHeapNaive.h index e523eedb2701987834ce64141dcfb8a770049f23..08b636be337725a3b52ae91c5f32e56091ba06ed 100644 --- a/alib2algo/src/stringology/indexing/PositionHeapNaive.h +++ b/alib2algo/src/stringology/indexing/PositionHeapNaive.h @@ -20,6 +20,9 @@ namespace indexing { /** * Constructs a position heap for given string. + * + * Source: Position heaps: A simple and dynamic text indexing data structure + * Andrzej Ehrenfeucht, Ross M. McConnell, Nissa Osheim, Sung-Whan Woo */ class PositionHeapNaive : public std::SingleDispatch < PositionHeapNaive, indexes::PositionHeap < DefaultSymbolType >, const string::StringBase & > { @@ -54,7 +57,7 @@ indexes::PositionHeap < SymbolType > PositionHeapNaive::construct ( const string n = & n->getChildren ( ).insert ( std::make_pair ( w.getContent ( )[k], std::trie < SymbolType, unsigned > ( node ) ) ).first->second; } - return indexes::PositionHeap < SymbolType > ( w.getAlphabet ( ), trie ); + return indexes::PositionHeap < SymbolType > ( w.getAlphabet ( ), trie, w.getContent ( ) ); } } /* namespace indexing */ diff --git a/alib2data/src/indexes/PositionHeap.h b/alib2data/src/indexes/PositionHeap.h index b1468051c79f07e71ca084ac99c775b40206ef50..950fc95d935598e9e6bb6ac1084a376e12499be2 100644 --- a/alib2data/src/indexes/PositionHeap.h +++ b/alib2data/src/indexes/PositionHeap.h @@ -29,6 +29,7 @@ #include <container/ObjectsSet.h> #include <container/ObjectsTrie.h> +#include <container/ObjectsVector.h> #include <primitive/Unsigned.h> @@ -44,6 +45,7 @@ template < class SymbolType = DefaultSymbolType > class PositionHeap : public alib::ObjectBase, public std::Components < PositionHeap < SymbolType >, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > { protected: std::trie < SymbolType, unsigned > m_trie; + std::vector < SymbolType > m_string; public: /** @@ -56,8 +58,8 @@ public: */ virtual ObjectBase * plunder ( ) &&; - explicit PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie ); - explicit PositionHeap ( std::trie < SymbolType, unsigned > trie ); + explicit PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie, std::vector < SymbolType > string ); + explicit PositionHeap ( std::trie < SymbolType, unsigned > trie, std::vector < SymbolType > string ); void checkTrie ( const std::trie < SymbolType, unsigned > & trie ); @@ -66,6 +68,8 @@ public: */ const std::trie < SymbolType, unsigned > & getRoot ( ) const; + const std::vector < SymbolType > & getString ( ) const; + const std::set < SymbolType > & getAlphabet ( ) const { return this->template accessComponent < GeneralAlphabet > ( ).get ( ); } @@ -119,12 +123,13 @@ public: namespace indexes { template < class SymbolType > -PositionHeap < SymbolType >::PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie ) : std::Components < PositionHeap, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( edgeAlphabet ) ), std::tuple < > ( ) ), m_trie ( std::move ( trie ) ) { +PositionHeap < SymbolType >::PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie, std::vector < SymbolType > string ) : std::Components < PositionHeap, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( edgeAlphabet ) ), std::tuple < > ( ) ), m_trie ( std::move ( trie ) ), m_string ( std::move ( string ) ) { checkTrie ( this->m_trie ); + // TODO check validity of the string like in LinearString } template < class SymbolType > -PositionHeap < SymbolType >::PositionHeap ( std::trie < SymbolType, unsigned > trie ) : PositionHeap ( computeMinimalEdgeAlphabet ( trie ), trie ) { +PositionHeap < SymbolType >::PositionHeap ( std::trie < SymbolType, unsigned > trie, std::vector < SymbolType > string ) : PositionHeap ( computeMinimalEdgeAlphabet ( trie ), trie, std::move ( string ) ) { } template < class SymbolType > @@ -151,6 +156,11 @@ const std::trie < SymbolType, unsigned > & PositionHeap < SymbolType >::getRoot return m_trie; } +template < class SymbolType > +const std::vector < SymbolType > & PositionHeap < SymbolType >::getString ( ) const { + return m_string; +} + template < class SymbolType > void PositionHeap < SymbolType >::setTree ( std::trie < SymbolType, unsigned > trie ) { checkTrie ( trie ); @@ -184,7 +194,8 @@ PositionHeap < SymbolType > PositionHeap < SymbolType >::parse ( std::deque < sa sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, PositionHeap::getXmlTagName() ); std::set < SymbolType > edgeAlphabet = alib::xmlApi < std::set < SymbolType > >::parse ( input ); std::trie < SymbolType, unsigned > root = alib::xmlApi < std::trie < SymbolType, unsigned > >::parse ( input ); - PositionHeap < SymbolType > trie ( std::move ( edgeAlphabet ), std::move ( root ) ); + std::vector < SymbolType > string = alib::xmlApi < std::vector < SymbolType > >::parse ( input ); + PositionHeap < SymbolType > trie ( std::move ( edgeAlphabet ), std::move ( root ), std::move ( string ) ); sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, PositionHeap::getXmlTagName() ); return trie; @@ -195,6 +206,7 @@ void PositionHeap < SymbolType >::compose ( std::deque < sax::Token > & out ) co out.emplace_back ( PositionHeap::getXmlTagName(), sax::Token::TokenType::START_ELEMENT ); alib::xmlApi < std::set < SymbolType > >::compose ( out, getAlphabet ( ) ); alib::xmlApi < std::trie < SymbolType, unsigned > >::compose ( out, getRoot ( ) ); + alib::xmlApi < std::vector < SymbolType > >::compose ( out, getString ( ) ); out.emplace_back ( PositionHeap::getXmlTagName(), sax::Token::TokenType::END_ELEMENT ); } @@ -220,7 +232,8 @@ class ComponentConstraint < indexes::PositionHeap < SymbolType >, SymbolType, in public: static bool used ( const indexes::PositionHeap < SymbolType > & index, const SymbolType & symbol ) { - return used ( index.getRoot ( ), symbol ); + const std::vector < SymbolType > & content = index.getString ( ); + return std::find ( content.begin(), content.end(), symbol ) != content.end() || used ( index.getRoot ( ), symbol ); } static bool available ( const indexes::PositionHeap < SymbolType > &, const SymbolType & ) {