From ad2507037dc474d1c37f3666049720d6fa2bf611 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 27 Aug 2015 17:52:38 +0200 Subject: [PATCH] introduction of suffixTrie --- alib2data/src/alphabet/Alphabet.cpp | 31 ++- alib2data/src/alphabet/Alphabet.h | 27 ++- .../src/indexes/common/IndexFromXMLParser.cpp | 42 ++++ .../src/indexes/common/IndexFromXMLParser.h | 31 +++ .../src/indexes/common/IndexToXMLComposer.cpp | 35 ++++ .../src/indexes/common/IndexToXMLComposer.h | 31 +++ .../src/indexes/suffixTrie/SuffixTrie.cpp | 151 +++++++++++++++ alib2data/src/indexes/suffixTrie/SuffixTrie.h | 115 +++++++++++ .../src/indexes/suffixTrie/SuffixTrieNode.cpp | 179 ++++++++++++++++++ .../src/indexes/suffixTrie/SuffixTrieNode.h | 99 ++++++++++ 10 files changed, 708 insertions(+), 33 deletions(-) create mode 100644 alib2data/src/indexes/common/IndexFromXMLParser.cpp create mode 100644 alib2data/src/indexes/common/IndexFromXMLParser.h create mode 100644 alib2data/src/indexes/common/IndexToXMLComposer.cpp create mode 100644 alib2data/src/indexes/common/IndexToXMLComposer.h create mode 100644 alib2data/src/indexes/suffixTrie/SuffixTrie.cpp create mode 100644 alib2data/src/indexes/suffixTrie/SuffixTrie.h create mode 100644 alib2data/src/indexes/suffixTrie/SuffixTrieNode.cpp create mode 100644 alib2data/src/indexes/suffixTrie/SuffixTrieNode.h diff --git a/alib2data/src/alphabet/Alphabet.cpp b/alib2data/src/alphabet/Alphabet.cpp index 41111b9360..212252f757 100644 --- a/alib2data/src/alphabet/Alphabet.cpp +++ b/alib2data/src/alphabet/Alphabet.cpp @@ -9,35 +9,32 @@ namespace alphabet { -const std::set<alphabet::Symbol>& Alphabet::getContent() const { - return this->m_Data; +const std::set < alphabet::Symbol > & Alphabet::getAlphabet ( ) const { + return this->alphabet; } -void Alphabet::insertSymbol(alphabet::Symbol&& symbol) { - this->m_Data.insert(std::move(symbol)); +void Alphabet::addSymbol ( alphabet::Symbol symbol ) { + this->alphabet.insert ( std::move ( symbol ) ); } -void Alphabet::insertSymbol(const alphabet::Symbol& symbol) { - this->m_Data.insert(symbol); +bool Alphabet::isEmpty ( ) const { + return this->alphabet.size ( ) == 0; } -bool Alphabet::isEmpty() const { - return this->m_Data.size() == 0; +bool Alphabet::operator ==( const Alphabet & other ) const { + return this->alphabet == other.alphabet; } -bool Alphabet::operator==(const Alphabet& other) const { - return this->m_Data == other.m_Data; -} - -bool Alphabet::operator<(const Alphabet& other) const { - return this->m_Data < other.m_Data; +bool Alphabet::operator <( const Alphabet & other ) const { + return this->alphabet < other.alphabet; } -std::ostream& operator <<(std::ostream& out, const Alphabet& alphabet) { +std::ostream & operator <<( std::ostream & out, const Alphabet & alphabet ) { out << "(Alphabet "; - for(const alphabet::Symbol& symbol : alphabet.m_Data) { + + for ( const alphabet::Symbol & symbol : alphabet.alphabet ) out << symbol; - } + out << ")"; return out; } diff --git a/alib2data/src/alphabet/Alphabet.h b/alib2data/src/alphabet/Alphabet.h index 806fc6bae2..80b83eeac5 100644 --- a/alib2data/src/alphabet/Alphabet.h +++ b/alib2data/src/alphabet/Alphabet.h @@ -18,46 +18,41 @@ namespace alphabet { * Represents alphabet of symbols. */ class Alphabet { - std::set<alphabet::Symbol> m_Data; +protected: + std::set < alphabet::Symbol > alphabet; public: /** * @return List of symbols in the alphabet. */ - const std::set<alphabet::Symbol>& getContent() const; - - /** - * @param symbol to insert - */ - void insertSymbol(alphabet::Symbol&& symbol); - + const std::set < alphabet::Symbol > & getAlphabet ( ) const; + /** * @param symbol to insert */ - void insertSymbol(const alphabet::Symbol& symbol); + void addSymbol ( alphabet::Symbol symbol ); /** * @return true if alphabet is an empty */ - bool isEmpty() const; - + bool isEmpty ( ) const; + /** * @param other other alphabet */ - bool operator==(const Alphabet& other) const; - + bool operator ==( const Alphabet & other ) const; + /** * @param other other alphabet */ - bool operator<(const Alphabet& other) const; + bool operator <( const Alphabet & other ) const; /** * Prints XML representation of the Alphabet to the output stream. * @param out output stream to which print the Alphabet * @param string Alphabet to print */ - friend std::ostream& operator<<(std::ostream& out, const Alphabet& string); - + friend std::ostream & operator <<( std::ostream & out, const Alphabet & string ); }; } /* namespace alphabet */ diff --git a/alib2data/src/indexes/common/IndexFromXMLParser.cpp b/alib2data/src/indexes/common/IndexFromXMLParser.cpp new file mode 100644 index 0000000000..fee9a012a1 --- /dev/null +++ b/alib2data/src/indexes/common/IndexFromXMLParser.cpp @@ -0,0 +1,42 @@ +/* + * IndexFromXMLParser.cpp + * + * Created on: Nov 16, 2014 + * Author: Stepan Plachy + */ + +#include "../../sax/FromXMLParserHelper.h" +#include "IndexFromXMLParser.h" +#include "../../sax/ParserException.h" +#include "../../XmlApi.hpp" + +namespace indexes { + +std::set < alphabet::Symbol > IndexFromXMLParser::parseAlphabet ( std::deque < sax::Token >::iterator & input ) { + std::set < alphabet::Symbol > symbols; + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "alphabet" ); + + while ( sax::FromXMLParserHelper::isTokenType ( input, sax::Token::TokenType::START_ELEMENT ) ) + symbols.insert ( alib::xmlApi < alphabet::Symbol >::parse ( input ) ); + + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "alphabet" ); + return symbols; +} + +SuffixTrieNode * IndexFromXMLParser::parseSuffixTrieNode ( std::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "node" ); + std::map < alphabet::Symbol, SuffixTrieNode * > children; + + while ( sax::FromXMLParserHelper::isTokenType ( input, sax::Token::TokenType::START_ELEMENT ) ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "child" ); + alphabet::Symbol symbol = alib::xmlApi < alphabet::Symbol >::parse ( input ); + children.insert ( std::make_pair ( std::move ( symbol ), parseSuffixTrieNode ( input ) ) ); + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "child" ); + } + + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "node" ); + + return new SuffixTrieNode ( children ); +} + +} /* namespace indexes */ diff --git a/alib2data/src/indexes/common/IndexFromXMLParser.h b/alib2data/src/indexes/common/IndexFromXMLParser.h new file mode 100644 index 0000000000..b0b7060307 --- /dev/null +++ b/alib2data/src/indexes/common/IndexFromXMLParser.h @@ -0,0 +1,31 @@ +/* + * IndexFromXMLParser.h + * + * Created on: Nov 16, 2014 + * Author: Stepan Plachy + */ + +#ifndef TREE_FROM_XML_PARSER_H_ +#define TREE_FROM_XML_PARSER_H_ + +#include <set> +#include <deque> + +#include "../../sax/Token.h" +#include "../../alphabet/SymbolFeatures.h" +#include "../suffixTrie/SuffixTrieNode.h" + +namespace indexes { + +/** + * Parser used to get indexes from XML parsed into list of Tokens. + */ +class IndexFromXMLParser { +public: + static SuffixTrieNode * parseSuffixTrieNode ( std::deque < sax::Token >::iterator & input ); + static std::set < alphabet::Symbol > parseAlphabet ( std::deque < sax::Token >::iterator & input ); +}; + +} /* namespace indexes */ + +#endif /* TREE_FROM_XML_PARSER_H_ */ diff --git a/alib2data/src/indexes/common/IndexToXMLComposer.cpp b/alib2data/src/indexes/common/IndexToXMLComposer.cpp new file mode 100644 index 0000000000..27cb1d04a5 --- /dev/null +++ b/alib2data/src/indexes/common/IndexToXMLComposer.cpp @@ -0,0 +1,35 @@ +/* + * IndexToXMLComposer.cpp + * + * Created on: Nov 16, 2014 + * Author: Stepan Plachy + */ + +#include "IndexToXMLComposer.h" +#include "../../XmlApi.hpp" + +namespace indexes { + +void IndexToXMLComposer::composeAlphabet ( std::deque < sax::Token > & out, const std::set < alphabet::Symbol > & symbols ) { + out.emplace_back ( sax::Token ( "alphabet", sax::Token::TokenType::START_ELEMENT ) ); + + for ( const auto & symbol : symbols ) + alib::xmlApi < alphabet::Symbol >::compose ( out, symbol ); + + out.emplace_back ( sax::Token ( "alphabet", sax::Token::TokenType::END_ELEMENT ) ); +} + +void IndexToXMLComposer::composeNode ( std::deque < sax::Token > & out, const SuffixTrieNode & node ) { + out.emplace_back ( sax::Token ( "node", sax::Token::TokenType::START_ELEMENT ) ); + + for ( const auto & child : node.getChildren ( ) ) { + out.emplace_back ( sax::Token ( "child", sax::Token::TokenType::START_ELEMENT ) ); + alib::xmlApi < alphabet::Symbol >::compose ( out, child.first ); + composeNode ( out, * child.second ); + out.emplace_back ( sax::Token ( "child", sax::Token::TokenType::END_ELEMENT ) ); + } + + out.emplace_back ( sax::Token ( "node", sax::Token::TokenType::END_ELEMENT ) ); +} + +} /* namespace indexes */ diff --git a/alib2data/src/indexes/common/IndexToXMLComposer.h b/alib2data/src/indexes/common/IndexToXMLComposer.h new file mode 100644 index 0000000000..958b4171bf --- /dev/null +++ b/alib2data/src/indexes/common/IndexToXMLComposer.h @@ -0,0 +1,31 @@ +/* + * IndexToXMLComposer.h + * + * Created on: Nov 16, 2014 + * Author: Stepan Plachy + */ + +#ifndef TREE_TO_XML_COMPOSER_H_ +#define TREE_TO_XML_COMPOSER_H_ + +#include <deque> +#include <set> +#include "../../alphabet/SymbolFeatures.h" +#include "../../sax/Token.h" +#include "../suffixTrie/SuffixTrieNode.h" + +namespace indexes { + +/** + * This class contains methods to print XML representation of indexes to the output stream. + */ +class IndexToXMLComposer { +public: + static void composeAlphabet ( std::deque < sax::Token > & out, const std::set < alphabet::Symbol > & symbols ); + + static void composeNode ( std::deque < sax::Token > & out, const SuffixTrieNode & node ); +}; + +} /* namespace indexes */ + +#endif /* TREE_TO_XML_COMPOSER_H_ */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrie.cpp b/alib2data/src/indexes/suffixTrie/SuffixTrie.cpp new file mode 100644 index 0000000000..ac32c50d05 --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrie.cpp @@ -0,0 +1,151 @@ +/* + * SuffixTrie.cpp + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#include "SuffixTrie.h" +#include "../../exception/AlibException.h" + +#include <iostream> +#include <algorithm> +#include <sstream> + +#include "../../sax/FromXMLParserHelper.h" +#include "../common/IndexFromXMLParser.h" +#include "../common/IndexToXMLComposer.h" +#include "../../object/Object.h" +#include "../../XmlApi.hpp" + +namespace indexes { + +SuffixTrie::SuffixTrie ( std::set < alphabet::Symbol > alphabet, SuffixTrieNode tree ) { + this->alphabet = std::move ( alphabet ); + this->tree = NULL; + setTree ( std::move ( tree ) ); +} + +SuffixTrie::SuffixTrie ( SuffixTrieNode tree ) { + tree.computeMinimalAlphabet ( alphabet ); + this->tree = NULL; + setTree ( std::move ( tree ) ); +} + +SuffixTrie::SuffixTrie ( const SuffixTrie & other ) : tree ( other.tree->clone ( ) ) { + alphabet = other.alphabet; + this->tree->attachTree ( this ); +} + +SuffixTrie::SuffixTrie ( SuffixTrie && other ) noexcept : tree ( other.tree ) { + alphabet = std::move ( other.alphabet ); + this->tree->attachTree ( this ); + other.tree = NULL; +} + +alib::ObjectBase * SuffixTrie::clone ( ) const { + return new SuffixTrie ( * this ); +} + +alib::ObjectBase * SuffixTrie::plunder ( ) && { + return new SuffixTrie ( std::move ( * this ) ); +} + +SuffixTrie & SuffixTrie::operator =( const SuffixTrie & other ) { + if ( this == & other ) + return * this; + + * this = SuffixTrie ( other ); + + return * this; +} + +SuffixTrie & SuffixTrie::operator =( SuffixTrie && other ) noexcept { + std::swap ( this->tree, other.tree ); + std::swap ( this->alphabet, other.alphabet ); + + return * this; +} + +SuffixTrie::~SuffixTrie ( ) noexcept { + delete tree; +} + +const SuffixTrieNode & SuffixTrie::getRoot ( ) const { + return * tree; +} + +SuffixTrieNode & SuffixTrie::getRoot ( ) { + return * tree; +} + +void SuffixTrie::setTree ( SuffixTrieNode tree ) { + delete this->tree; + this->tree = std::move ( tree ).plunder ( ); + + if ( !this->tree->attachTree ( this ) ) { + delete this->tree; + throw exception::AlibException ( "Input symbols not in the alphabet." ); + } +} + +bool SuffixTrie::removeSymbolFromAlphabet ( const alphabet::Symbol & symbol ) { + if ( this->tree->testSymbol ( symbol ) ) + throw exception::AlibException ( "Input symbol \"" + ( std::string ) symbol + "\" is used." ); + + return alphabet.erase ( symbol ); +} + +void SuffixTrie::operator >>( std::ostream & out ) const { + out << "(SuffixTrie " << * ( this->tree ) << ")"; +} + +std::ostream & operator <<( std::ostream & out, const SuffixTrie & instance ) { + instance >> out; + return out; +} + +int SuffixTrie::compare ( const SuffixTrie & other ) const { + int res = tree->compare ( * other.tree ); + + if ( res == 0 ) { + std::compare < std::set < alphabet::Symbol > > comp; + res = comp ( alphabet, other.alphabet ); + } + + return res; +} + +SuffixTrie::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +const std::string SuffixTrie::XML_TAG_NAME = "SuffixTrie"; + +SuffixTrie SuffixTrie::parse ( std::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, SuffixTrie::XML_TAG_NAME ); + std::set < alphabet::Symbol > rankedAlphabet = IndexFromXMLParser::parseAlphabet ( input ); + SuffixTrieNode * root = IndexFromXMLParser::parseSuffixTrieNode ( input ); + SuffixTrie tree ( std::move ( rankedAlphabet ), std::move ( * root ) ); + + delete root; + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, SuffixTrie::XML_TAG_NAME ); + return tree; +} + +void SuffixTrie::compose ( std::deque < sax::Token > & out ) const { + out.emplace_back ( SuffixTrie::XML_TAG_NAME, sax::Token::TokenType::START_ELEMENT ); + IndexToXMLComposer::composeAlphabet ( out, alphabet ); + IndexToXMLComposer::composeNode ( out, * tree ); + out.emplace_back ( SuffixTrie::XML_TAG_NAME, sax::Token::TokenType::END_ELEMENT ); +} + +} /* namespace indexes */ + +namespace alib { + +xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrie > suffixTreeParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrie > ( indexes::SuffixTrie::XML_TAG_NAME, indexes::SuffixTrie::parse ); + +} /* namespace alib */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrie.h b/alib2data/src/indexes/suffixTrie/SuffixTrie.h new file mode 100644 index 0000000000..dd9392dd1b --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrie.h @@ -0,0 +1,115 @@ +/* + * SuffixTrie.h + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#ifndef RANKED_TREE_H_ +#define RANKED_TREE_H_ + +#include <vector> +#include <list> +#include <string> +#include <set> +#include "SuffixTrieNode.h" +#include "../../object/ObjectBase.h" +#include "../../alphabet/Alphabet.h" + +namespace indexes { + +/** + * Represents regular expression parsed from the XML. Regular expression is stored + * as a tree of RegExpElement. + */ +class SuffixTrie : public alib::ObjectBase, public alphabet::Alphabet { +protected: + SuffixTrieNode * tree; + +public: + /** + * @copydoc SuffixTrieNode::clone() const + */ + virtual ObjectBase * clone ( ) const; + + /** + * @copydoc SuffixTrieNode::plunder() const + */ + virtual ObjectBase * plunder ( ) &&; + + explicit SuffixTrie ( std::set < alphabet::Symbol > alphabet, SuffixTrieNode tree ); + explicit SuffixTrie ( SuffixTrieNode tree ); + + /** + * Copy constructor. + * @param other tree to copy + */ + SuffixTrie ( const SuffixTrie & other ); + SuffixTrie ( SuffixTrie && other ) noexcept; + SuffixTrie & operator =( const SuffixTrie & other ); + SuffixTrie & operator =( SuffixTrie && other ) noexcept; + ~SuffixTrie ( ) noexcept; + + /** + * @return Root node of the regular expression tree + */ + const SuffixTrieNode & getRoot ( ) const; + + /** + * @return Root node of the regular expression tree + */ + SuffixTrieNode & getRoot ( ); + + /** + * Sets the root node of the regular expression tree + * @param tree root node to set + */ + void setTree ( SuffixTrieNode tree ); + + /** + * Removes symbol from the alphabet of symbol available in the regular expression + * @param symbol removed symbol from the alphabet + */ + bool removeSymbolFromAlphabet ( const alphabet::Symbol & symbol ); + + /** + * Prints XML representation of the tree to the output stream. + * @param out output stream to which print the tree + * @param tree tree to print + */ + virtual void operator >>( std::ostream & out ) const; + + friend std::ostream & operator <<( std::ostream & out, const SuffixTrie & instance ); + + virtual int compare ( const ObjectBase & other ) const { + if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); + + return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) ); + } + + virtual int compare ( const SuffixTrie & other ) const; + + virtual explicit operator std::string ( ) const; + + const static std::string XML_TAG_NAME; + + static SuffixTrie parse ( std::deque < sax::Token >::iterator & input ); + + void compose ( std::deque < sax::Token > & out ) const; +}; + +} /* namespace tree */ + +namespace std { + +template < > +struct compare < indexes::SuffixTrie > { + int operator ()( const indexes::SuffixTrie & first, const indexes::SuffixTrie & second ) const { + return first.compare ( second ); + } + +}; + +} /* namespace std */ + +#endif /* RANKED_TREE_H_ */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieNode.cpp b/alib2data/src/indexes/suffixTrie/SuffixTrieNode.cpp new file mode 100644 index 0000000000..8002bbfcef --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieNode.cpp @@ -0,0 +1,179 @@ +/* + * SuffixTrieNode.cpp + * + * Created on: Nov 23, 2013 + * Author: Stepan Plachy + */ + +#include "SuffixTrieNode.h" +#include "SuffixTrie.h" +#include "../../exception/AlibException.h" +#include <sstream> + +namespace indexes { + +SuffixTrieNode::SuffixTrieNode ( std::map < alphabet::Symbol, SuffixTrieNode * > children ) : children ( std::move ( children ) ), parentTree ( NULL ) { + for ( auto & element : this->children ) + element.second->parent = this; + + this->attachTree ( NULL ); + this->parent = NULL; +} + +SuffixTrieNode::SuffixTrieNode ( const SuffixTrieNode & other ) : parentTree ( NULL ) { + for ( const auto & element : other.children ) + children.insert ( std::make_pair ( element.first, element.second->clone ( ) ) ); + + for ( auto & element : this->children ) + element.second->parent = this; + + this->attachTree ( NULL ); + this->parent = NULL; +} + +SuffixTrieNode::SuffixTrieNode ( SuffixTrieNode && other ) noexcept : children ( std::move ( other.children ) ), parentTree ( NULL ) { + other.children.clear ( ); + + for ( auto & element : this->children ) + element.second->parent = this; + + this->attachTree ( NULL ); + this->parent = NULL; +} + +SuffixTrieNode & SuffixTrieNode::operator =( const SuffixTrieNode & other ) { + if ( this == & other ) + return * this; + + * this = SuffixTrieNode ( other ); + + return * this; +} + +SuffixTrieNode & SuffixTrieNode::operator =( SuffixTrieNode && other ) noexcept { + std::swap ( this->children, other.children ); + std::swap ( this->parentTree, other.parentTree ); // this->parentTree is stored within other.parentTree and it is reattached on the next line + + for ( auto & element : this->children ) + element.second->parent = this; + + this->attachTree ( other.parentTree ); + + return * this; +} + +SuffixTrieNode::~SuffixTrieNode ( ) noexcept { + for ( auto element : children ) + delete element.second; + + children.clear ( ); +} + +const std::map < const alphabet::Symbol, const SuffixTrieNode * > & SuffixTrieNode::getChildren ( ) const { + return * reinterpret_cast < const std::map < const alphabet::Symbol, const SuffixTrieNode * > * > ( & children ); +} + +const std::map < alphabet::Symbol, SuffixTrieNode * > & SuffixTrieNode::getChildren ( ) { + return children; +} + +SuffixTrieNode * SuffixTrieNode::getParent ( ) { + return parent; +} + +const SuffixTrieNode * SuffixTrieNode::getParent ( ) const { + return parent; +} + +void SuffixTrieNode::swap ( SuffixTrieNode & other ) { + const alphabet::Alphabet * thisParentTree = this->parentTree; + const alphabet::Alphabet * otherParentTree = other.parentTree; + + SuffixTrieNode tmp = std::move ( other ); + + other = std::move ( * this ); + * this = std::move ( tmp ); + + this->attachTree ( thisParentTree ); + other.attachTree ( otherParentTree ); +} + +SuffixTrieNode * SuffixTrieNode::clone ( ) const { + return new SuffixTrieNode ( * this ); +} + +SuffixTrieNode * SuffixTrieNode::plunder ( ) && { + return new SuffixTrieNode ( std::move ( * this ) ); +} + +int SuffixTrieNode::compare ( const SuffixTrieNode & other ) const { + int thisSize = this->children.size ( ); + int otherSize = other.children.size ( ); + + if ( thisSize < otherSize ) return -1; + + if ( thisSize > otherSize ) return 1; + + auto thisIter = this->children.begin ( ); + auto otherIter = other.children.begin ( ); + + for ( ; thisIter != this->children.end ( ); ++thisIter, ++otherIter ) { + int res = thisIter->first.compare ( otherIter->first ); + + if ( res != 0 ) return res; + + res = thisIter->second->compare ( * otherIter->second ); + + if ( res != 0 ) return res; + } + + return 0; +} + +void SuffixTrieNode::operator >>( std::ostream & out ) const { + out << "(SuffixTrieNode " << " children = " << this->children << ")"; +} + +std::ostream & operator <<( std::ostream & out, const SuffixTrieNode & node ) { + node >> out; + return out; +} + +bool SuffixTrieNode::testSymbol ( const alphabet::Symbol & symbol ) const { + for ( const auto & child : this->children ) { + if ( symbol == child.first ) return true; + + if ( child.second->testSymbol ( symbol ) ) return true; + } + + return false; +} + +bool SuffixTrieNode::attachTree ( const alphabet::Alphabet * tree ) { + if ( this->parentTree == tree ) return true; + + this->parentTree = tree; + + for ( const auto & child : this->children ) { + if ( ( this->parentTree != NULL ) && ( this->parentTree->getAlphabet ( ).find ( child.first ) == this->parentTree->getAlphabet ( ).end ( ) ) ) return false; + + if ( !child.second->attachTree ( tree ) ) return false; + } + + return true; +} + +void SuffixTrieNode::computeMinimalAlphabet ( std::set < alphabet::Symbol > & alphabet ) const { + for ( const auto & child : this->children ) { + alphabet.insert ( child.first ); + child.second->computeMinimalAlphabet ( alphabet ); + } +} + +SuffixTrieNode::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +} /* namespace indexes */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieNode.h b/alib2data/src/indexes/suffixTrie/SuffixTrieNode.h new file mode 100644 index 0000000000..68206812d9 --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieNode.h @@ -0,0 +1,99 @@ +/* + * SuffixTrieNode.h + * + * Created on: Nov 23, 2013 + * Author: Stepan Plachy + */ + +#ifndef SUFFIX_TRIE_NODE_H +#define SUFFIX_TRIE_NODE_H + +#include "../../alphabet/Symbol.h" +#include "../../primitive/Unsigned.h" +#include <map> +#include <set> +#include "../../alphabet/Alphabet.h" + +namespace indexes { + +/** + * Represents a node in the ranked tree. Contains name of the symbol. + */ +class SuffixTrieNode { +protected: + std::map < alphabet::Symbol, SuffixTrieNode * > children; + + SuffixTrieNode * parent; + + /** + * Parent tree contanining this instance of RankedTree + */ + const alphabet::Alphabet * parentTree; + + /** + * @copydoc SuffixTrieNode::testSymbol() const + */ + bool testSymbol ( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc SuffixTrieNode::attachTree() + */ + bool attachTree ( const alphabet::Alphabet * tree ); + + /** + * @copydoc SuffixTrieNode::computeMinimalAlphabet() + */ + void computeMinimalAlphabet ( std::set < alphabet::Symbol > & alphabet ) const; + +public: + explicit SuffixTrieNode ( std::map < alphabet::Symbol, SuffixTrieNode * > children ); + + SuffixTrieNode ( const SuffixTrieNode & other ); + SuffixTrieNode ( SuffixTrieNode && other ) noexcept; + SuffixTrieNode & operator =( const SuffixTrieNode & other ); + SuffixTrieNode & operator =( SuffixTrieNode && other ) noexcept; + ~SuffixTrieNode ( ) noexcept; + + /** + * @copydoc SuffixTrieNode::clone() const + */ + SuffixTrieNode * clone ( ) const; + + /** + * @copydoc SuffixTrieNode::plunder() const + */ + SuffixTrieNode * plunder ( ) &&; + + /** + * @return children + */ + const std::map < const alphabet::Symbol, const SuffixTrieNode * > & getChildren ( ) const; + + /** + * @return children + */ + const std::map < alphabet::Symbol, SuffixTrieNode * > & getChildren ( ); + + SuffixTrieNode * getParent ( ); + + const SuffixTrieNode * getParent ( ) const; + + void swap ( SuffixTrieNode & other ); + + int compare ( const SuffixTrieNode & ) const; + + /** + * @copydoc SuffixTrieNode::operator>>() const + */ + void operator >>( std::ostream & out ) const; + + friend std::ostream & operator <<( std::ostream &, const SuffixTrieNode & node ); + + explicit operator std::string ( ) const; + + friend class SuffixTrie; +}; + +} /* namespace indexes */ + +#endif /* SUFFIX_TRIE_NODE_H */ -- GitLab