diff --git a/alib2algo/src/stringology/indexing/SuffixTrie.cpp b/alib2algo/src/stringology/indexing/SuffixTrie.cpp index 700ab45602a3b57d64fa3ce54cbe6cd2a829538a..a4f550b70a6569dddbe68b7638b0017039ad6361 100644 --- a/alib2algo/src/stringology/indexing/SuffixTrie.cpp +++ b/alib2algo/src/stringology/indexing/SuffixTrie.cpp @@ -7,8 +7,6 @@ #include "SuffixTrie.h" -#include <indexes/suffixTrie/SuffixTrie.h> - #include <exception/AlibException.h> #include <string/LinearString.h> #include <string/Epsilon.h> @@ -18,35 +16,51 @@ namespace stringology { namespace indexing { -indexes::SuffixTrie SuffixTrie::construct ( const string::String & string ) { +indexes::SuffixTrieFinalMark SuffixTrie::construct ( const string::String & string ) { return getInstance ( ).dispatch ( string.getData ( ) ); } -indexes::SuffixTrie SuffixTrie::construct ( const string::LinearString & w ) { - indexes::SuffixTrie res ( w.getAlphabet ( ) ); - - if ( w.getAlphabet ( ).find ( alphabet::Symbol ( alphabet::EndSymbol::END ) ) != w.getAlphabet ( ).end ( ) ) throw exception::AlibException ( "Implicit termination symbol is already in the string alphabet" ); - - res.addSymbol ( alphabet::Symbol ( alphabet::EndSymbol::END ) ); +indexes::SuffixTrieFinalMark SuffixTrie::construct ( const string::LinearString & w ) { + indexes::SuffixTrieFinalMark res ( w.getAlphabet ( ) ); for ( unsigned int i = 0; i < w.getContent ( ).size ( ); i++ ) { unsigned int k = i; - indexes::SuffixTrieNode * n = & res.getRoot ( ); + indexes::SuffixTrieNodeFinalMark * n = & res.getRoot ( ); + // inlined slow_find_one from MI-EVY lectures while ( k < w.getContent ( ).size ( ) && n->hasChild ( w.getContent ( )[k] ) ) n = & n->getChild ( w.getContent ( )[k++] ); for ( ; k < w.getContent ( ).size ( ); k++ ) { - n = & n->addChild ( w.getContent ( )[k], indexes::SuffixTrieNode ( { } ) ); + n = & n->addChild ( w.getContent ( )[k], indexes::SuffixTrieNodeFinalMark ( { }, false ) ); } - n->addChild ( alphabet::Symbol ( alphabet::EndSymbol::END ), indexes::SuffixTrieNode ( { } ) ); + n->setFinalMark ( true ); } return res; } -auto SuffixTrieLinearString = SuffixTrie::RegistratorWrapper < indexes::SuffixTrie, string::LinearString > ( SuffixTrie::getInstance ( ), SuffixTrie::construct ); +/*indexes::SuffixTrieTerminatingSymbol SuffixTrie::construct ( const string::TerminatedLinearString & w ) { + * indexes::SuffixTrieTerminatingSymbol res ( w.getAlphabet ( ), w.getTerminatingSymbol() ); + * + * for ( unsigned int i = 0; i < w.getContent ( ).size ( ); i++ ) { + * unsigned int k = i; + * indexes::SuffixTrieNodeTerminatingSymbol * n = & res.getRoot ( ); + * + * // inlined slow_find_one from MI-EVY lectures + * while ( k < w.getContent ( ).size ( ) && n->hasChild ( w.getContent ( )[k] ) ) + * n = & n->getChild ( w.getContent ( )[k++] ); + * + * for ( ; k < w.getContent ( ).size ( ); k++ ) { + * n = & n->addChild ( w.getContent ( )[k], indexes::SuffixTrieNodeTerminatingSymbol ( { } ) ); + * } + * } + * + * return res; + * }*/ + +auto SuffixTrieLinearString = SuffixTrie::RegistratorWrapper < indexes::SuffixTrieFinalMark, string::LinearString > ( SuffixTrie::getInstance ( ), SuffixTrie::construct ); } /* namespace indexing */ diff --git a/alib2algo/src/stringology/indexing/SuffixTrie.h b/alib2algo/src/stringology/indexing/SuffixTrie.h index 0f8f9fb2b1feaf3bde66efd28868e529e0582cc7..f208afdb686bae185967878881202c8513ff284d 100644 --- a/alib2algo/src/stringology/indexing/SuffixTrie.h +++ b/alib2algo/src/stringology/indexing/SuffixTrie.h @@ -8,7 +8,7 @@ #ifndef SUFFIX_TRIE_H_ #define SUFFIX_TRIE_H_ -#include <indexes/suffixTrie/SuffixTrie.h> +#include <indexes/suffixTrie/SuffixTrieFinalMark.h> #include <string/String.h> #include <string/LinearString.h> #include <common/multipleDispatch.hpp> @@ -23,16 +23,16 @@ namespace indexing { * Source: Lectures MI-EVY (CTU in Prague), Year 2014, Lecture 3, slide 4 */ -class SuffixTrie : public std::SingleDispatch < indexes::SuffixTrie, string::StringBase > { +class SuffixTrie : public std::SingleDispatch < indexes::SuffixTrieFinalMark, string::StringBase > { public: /** * Creates suffix trie * @param string string to construct suffix trie for * @return automaton */ - static indexes::SuffixTrie construct ( const string::String & string ); + static indexes::SuffixTrieFinalMark construct ( const string::String & string ); - static indexes::SuffixTrie construct ( const string::LinearString & string ); + static indexes::SuffixTrieFinalMark construct ( const string::LinearString & string ); public: static SuffixTrie & getInstance ( ) { diff --git a/alib2data/src/indexes/common/IndexFromXMLParser.cpp b/alib2data/src/indexes/common/IndexFromXMLParser.cpp index fee9a012a1aae0d7e54554b6aa8bbf505827641d..4f9dd8bd8cdb71aa7fd50b6e4c1175518c7e71f0 100644 --- a/alib2data/src/indexes/common/IndexFromXMLParser.cpp +++ b/alib2data/src/indexes/common/IndexFromXMLParser.cpp @@ -2,13 +2,14 @@ * IndexFromXMLParser.cpp * * Created on: Nov 16, 2014 - * Author: Stepan Plachy + * Author: Jan Travnicek */ #include "../../sax/FromXMLParserHelper.h" #include "IndexFromXMLParser.h" #include "../../sax/ParserException.h" #include "../../XmlApi.hpp" +#include "../../primitive/Bool.h" namespace indexes { @@ -23,20 +24,37 @@ std::set < alphabet::Symbol > IndexFromXMLParser::parseAlphabet ( std::deque < s return symbols; } -SuffixTrieNode * IndexFromXMLParser::parseSuffixTrieNode ( std::deque < sax::Token >::iterator & input ) { +SuffixTrieNodeFinalMark * IndexFromXMLParser::parseSuffixTrieNodeFinalMark ( std::deque < sax::Token >::iterator & input ) { sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "node" ); - std::map < alphabet::Symbol, SuffixTrieNode * > children; + std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * > children; + bool finalMark = alib::xmlApi < bool >::parse ( input ); while ( sax::FromXMLParserHelper::isTokenType ( input, sax::Token::TokenType::START_ELEMENT ) ) { sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "child" ); alphabet::Symbol symbol = alib::xmlApi < alphabet::Symbol >::parse ( input ); - children.insert ( std::make_pair ( std::move ( symbol ), parseSuffixTrieNode ( input ) ) ); + children.insert ( std::make_pair ( std::move ( symbol ), parseSuffixTrieNodeFinalMark ( input ) ) ); sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "child" ); } sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "node" ); - return new SuffixTrieNode ( children ); + return new SuffixTrieNodeFinalMark ( children, finalMark ); +} + +SuffixTrieNodeTerminatingSymbol * IndexFromXMLParser::parseSuffixTrieNodeTerminatingSymbol ( std::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "node" ); + std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * > children; + + while ( sax::FromXMLParserHelper::isTokenType ( input, sax::Token::TokenType::START_ELEMENT ) ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "child" ); + alphabet::Symbol symbol = alib::xmlApi < alphabet::Symbol >::parse ( input ); + children.insert ( std::make_pair ( std::move ( symbol ), parseSuffixTrieNodeTerminatingSymbol ( input ) ) ); + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "child" ); + } + + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "node" ); + + return new SuffixTrieNodeTerminatingSymbol ( children ); } } /* namespace indexes */ diff --git a/alib2data/src/indexes/common/IndexFromXMLParser.h b/alib2data/src/indexes/common/IndexFromXMLParser.h index b0b7060307863a486e15ee106a29fd43845d23d5..10b5a85d8079959275158d37b193c0c32ee39068 100644 --- a/alib2data/src/indexes/common/IndexFromXMLParser.h +++ b/alib2data/src/indexes/common/IndexFromXMLParser.h @@ -2,18 +2,19 @@ * IndexFromXMLParser.h * * Created on: Nov 16, 2014 - * Author: Stepan Plachy + * Author: Jan Travnicek */ -#ifndef TREE_FROM_XML_PARSER_H_ -#define TREE_FROM_XML_PARSER_H_ +#ifndef INDEX_FROM_XML_PARSER_H_ +#define INDEX_FROM_XML_PARSER_H_ #include <set> #include <deque> #include "../../sax/Token.h" #include "../../alphabet/SymbolFeatures.h" -#include "../suffixTrie/SuffixTrieNode.h" +#include "../suffixTrie/SuffixTrieNodeFinalMark.h" +#include "../suffixTrie/SuffixTrieNodeTerminatingSymbol.h" namespace indexes { @@ -22,10 +23,11 @@ namespace indexes { */ class IndexFromXMLParser { public: - static SuffixTrieNode * parseSuffixTrieNode ( std::deque < sax::Token >::iterator & input ); + static SuffixTrieNodeFinalMark * parseSuffixTrieNodeFinalMark ( std::deque < sax::Token >::iterator & input ); + static SuffixTrieNodeTerminatingSymbol * parseSuffixTrieNodeTerminatingSymbol ( std::deque < sax::Token >::iterator & input ); static std::set < alphabet::Symbol > parseAlphabet ( std::deque < sax::Token >::iterator & input ); }; } /* namespace indexes */ -#endif /* TREE_FROM_XML_PARSER_H_ */ +#endif /* INDEX_FROM_XML_PARSER_H_ */ diff --git a/alib2data/src/indexes/common/IndexToXMLComposer.cpp b/alib2data/src/indexes/common/IndexToXMLComposer.cpp index 27cb1d04a5d321a174da34e11b0507e54eb2bd04..88976599928a3134487b24ee90ee2e4a4084067d 100644 --- a/alib2data/src/indexes/common/IndexToXMLComposer.cpp +++ b/alib2data/src/indexes/common/IndexToXMLComposer.cpp @@ -2,11 +2,12 @@ * IndexToXMLComposer.cpp * * Created on: Nov 16, 2014 - * Author: Stepan Plachy + * Author: Jan Travnicek */ #include "IndexToXMLComposer.h" #include "../../XmlApi.hpp" +#include "../../primitive/Bool.h" namespace indexes { @@ -19,7 +20,21 @@ void IndexToXMLComposer::composeAlphabet ( std::deque < sax::Token > & out, cons out.emplace_back ( sax::Token ( "alphabet", sax::Token::TokenType::END_ELEMENT ) ); } -void IndexToXMLComposer::composeNode ( std::deque < sax::Token > & out, const SuffixTrieNode & node ) { +void IndexToXMLComposer::composeNode ( std::deque < sax::Token > & out, const SuffixTrieNodeFinalMark & node ) { + out.emplace_back ( sax::Token ( "node", sax::Token::TokenType::START_ELEMENT ) ); + alib::xmlApi < bool >::compose ( out, node.getFinalMark ( ) ); + + for ( const auto & child : node.getChildren ( ) ) { + out.emplace_back ( sax::Token ( "child", sax::Token::TokenType::START_ELEMENT ) ); + alib::xmlApi < alphabet::Symbol >::compose ( out, child.first ); + composeNode ( out, * child.second ); + out.emplace_back ( sax::Token ( "child", sax::Token::TokenType::END_ELEMENT ) ); + } + + out.emplace_back ( sax::Token ( "node", sax::Token::TokenType::END_ELEMENT ) ); +} + +void IndexToXMLComposer::composeNode ( std::deque < sax::Token > & out, const SuffixTrieNodeTerminatingSymbol & node ) { out.emplace_back ( sax::Token ( "node", sax::Token::TokenType::START_ELEMENT ) ); for ( const auto & child : node.getChildren ( ) ) { diff --git a/alib2data/src/indexes/common/IndexToXMLComposer.h b/alib2data/src/indexes/common/IndexToXMLComposer.h index 958b4171bfe5ef422751e61d418737a326b2766d..59c69fda3d3cb94820aad5562733312b170fdc52 100644 --- a/alib2data/src/indexes/common/IndexToXMLComposer.h +++ b/alib2data/src/indexes/common/IndexToXMLComposer.h @@ -2,17 +2,18 @@ * IndexToXMLComposer.h * * Created on: Nov 16, 2014 - * Author: Stepan Plachy + * Author: Jan Travnicek */ -#ifndef TREE_TO_XML_COMPOSER_H_ -#define TREE_TO_XML_COMPOSER_H_ +#ifndef INDEX_TO_XML_COMPOSER_H_ +#define INDEX_TO_XML_COMPOSER_H_ #include <deque> #include <set> #include "../../alphabet/SymbolFeatures.h" #include "../../sax/Token.h" -#include "../suffixTrie/SuffixTrieNode.h" +#include "../suffixTrie/SuffixTrieNodeFinalMark.h" +#include "../suffixTrie/SuffixTrieNodeTerminatingSymbol.h" namespace indexes { @@ -23,9 +24,10 @@ class IndexToXMLComposer { public: static void composeAlphabet ( std::deque < sax::Token > & out, const std::set < alphabet::Symbol > & symbols ); - static void composeNode ( std::deque < sax::Token > & out, const SuffixTrieNode & node ); + static void composeNode ( std::deque < sax::Token > & out, const SuffixTrieNodeFinalMark & node ); + static void composeNode ( std::deque < sax::Token > & out, const SuffixTrieNodeTerminatingSymbol & node ); }; } /* namespace indexes */ -#endif /* TREE_TO_XML_COMPOSER_H_ */ +#endif /* INDEX_TO_XML_COMPOSER_H_ */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrie.cpp b/alib2data/src/indexes/suffixTrie/SuffixTrie.cpp deleted file mode 100644 index 9c54039524fd99ced29caf462949921033706fad..0000000000000000000000000000000000000000 --- a/alib2data/src/indexes/suffixTrie/SuffixTrie.cpp +++ /dev/null @@ -1,156 +0,0 @@ -/* - * SuffixTrie.cpp - * - * Created on: Nov 23, 2013 - * Author: Jan Travnicek - */ - -#include "SuffixTrie.h" -#include "../../exception/AlibException.h" - -#include <iostream> -#include <algorithm> -#include <sstream> - -#include "../../sax/FromXMLParserHelper.h" -#include "../common/IndexFromXMLParser.h" -#include "../common/IndexToXMLComposer.h" -#include "../../object/Object.h" -#include "../../XmlApi.hpp" - -namespace indexes { - -SuffixTrie::SuffixTrie ( std::set < alphabet::Symbol > alphabet ) { - this->alphabet = std::move ( alphabet ); - this->tree = new SuffixTrieNode ( { } ); -} - -SuffixTrie::SuffixTrie ( std::set < alphabet::Symbol > alphabet, SuffixTrieNode tree ) { - this->alphabet = std::move ( alphabet ); - this->tree = NULL; - setTree ( std::move ( tree ) ); -} - -SuffixTrie::SuffixTrie ( SuffixTrieNode tree ) { - tree.computeMinimalAlphabet ( alphabet ); - this->tree = NULL; - setTree ( std::move ( tree ) ); -} - -SuffixTrie::SuffixTrie ( const SuffixTrie & other ) : tree ( other.tree->clone ( ) ) { - alphabet = other.alphabet; - this->tree->attachTree ( this ); -} - -SuffixTrie::SuffixTrie ( SuffixTrie && other ) noexcept : tree ( other.tree ) { - alphabet = std::move ( other.alphabet ); - this->tree->attachTree ( this ); - other.tree = NULL; -} - -alib::ObjectBase * SuffixTrie::clone ( ) const { - return new SuffixTrie ( * this ); -} - -alib::ObjectBase * SuffixTrie::plunder ( ) && { - return new SuffixTrie ( std::move ( * this ) ); -} - -SuffixTrie & SuffixTrie::operator =( const SuffixTrie & other ) { - if ( this == & other ) - return * this; - - * this = SuffixTrie ( other ); - - return * this; -} - -SuffixTrie & SuffixTrie::operator =( SuffixTrie && other ) noexcept { - std::swap ( this->tree, other.tree ); - std::swap ( this->alphabet, other.alphabet ); - - return * this; -} - -SuffixTrie::~SuffixTrie ( ) noexcept { - delete tree; -} - -const SuffixTrieNode & SuffixTrie::getRoot ( ) const { - return * tree; -} - -SuffixTrieNode & SuffixTrie::getRoot ( ) { - return * tree; -} - -void SuffixTrie::setTree ( SuffixTrieNode tree ) { - delete this->tree; - this->tree = std::move ( tree ).plunder ( ); - - if ( !this->tree->attachTree ( this ) ) { - delete this->tree; - throw exception::AlibException ( "Input symbols not in the alphabet." ); - } -} - -bool SuffixTrie::removeSymbolFromAlphabet ( const alphabet::Symbol & symbol ) { - if ( this->tree->testSymbol ( symbol ) ) - throw exception::AlibException ( "Input symbol \"" + ( std::string ) symbol + "\" is used." ); - - return alphabet.erase ( symbol ); -} - -void SuffixTrie::operator >>( std::ostream & out ) const { - out << "(SuffixTrie " << * ( this->tree ) << ")"; -} - -std::ostream & operator <<( std::ostream & out, const SuffixTrie & instance ) { - instance >> out; - return out; -} - -int SuffixTrie::compare ( const SuffixTrie & other ) const { - int res = tree->compare ( * other.tree ); - - if ( res == 0 ) { - std::compare < std::set < alphabet::Symbol > > comp; - res = comp ( alphabet, other.alphabet ); - } - - return res; -} - -SuffixTrie::operator std::string ( ) const { - std::stringstream ss; - ss << * this; - return ss.str ( ); -} - -const std::string SuffixTrie::XML_TAG_NAME = "SuffixTrie"; - -SuffixTrie SuffixTrie::parse ( std::deque < sax::Token >::iterator & input ) { - sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, SuffixTrie::XML_TAG_NAME ); - std::set < alphabet::Symbol > rankedAlphabet = IndexFromXMLParser::parseAlphabet ( input ); - SuffixTrieNode * root = IndexFromXMLParser::parseSuffixTrieNode ( input ); - SuffixTrie tree ( std::move ( rankedAlphabet ), std::move ( * root ) ); - - delete root; - sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, SuffixTrie::XML_TAG_NAME ); - return tree; -} - -void SuffixTrie::compose ( std::deque < sax::Token > & out ) const { - out.emplace_back ( SuffixTrie::XML_TAG_NAME, sax::Token::TokenType::START_ELEMENT ); - IndexToXMLComposer::composeAlphabet ( out, alphabet ); - IndexToXMLComposer::composeNode ( out, * tree ); - out.emplace_back ( SuffixTrie::XML_TAG_NAME, sax::Token::TokenType::END_ELEMENT ); -} - -} /* namespace indexes */ - -namespace alib { - -xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrie > suffixTreeParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrie > ( indexes::SuffixTrie::XML_TAG_NAME, indexes::SuffixTrie::parse ); - -} /* namespace alib */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieFinalMark.cpp b/alib2data/src/indexes/suffixTrie/SuffixTrieFinalMark.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c93ba8fd13ee7c1a0dcd101002003afda715c1ed --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieFinalMark.cpp @@ -0,0 +1,156 @@ +/* + * SuffixTrieFinalMark.cpp + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#include "SuffixTrieFinalMark.h" +#include "../../exception/AlibException.h" + +#include <iostream> +#include <algorithm> +#include <sstream> + +#include "../../sax/FromXMLParserHelper.h" +#include "../common/IndexFromXMLParser.h" +#include "../common/IndexToXMLComposer.h" +#include "../../object/Object.h" +#include "../../XmlApi.hpp" + +namespace indexes { + +SuffixTrieFinalMark::SuffixTrieFinalMark ( std::set < alphabet::Symbol > alphabet ) { + this->alphabet = std::move ( alphabet ); + this->tree = new SuffixTrieNodeFinalMark ( { }, false ); +} + +SuffixTrieFinalMark::SuffixTrieFinalMark ( std::set < alphabet::Symbol > alphabet, SuffixTrieNodeFinalMark tree ) { + this->alphabet = std::move ( alphabet ); + this->tree = NULL; + setTree ( std::move ( tree ) ); +} + +SuffixTrieFinalMark::SuffixTrieFinalMark ( SuffixTrieNodeFinalMark tree ) { + tree.computeMinimalAlphabet ( alphabet ); + this->tree = NULL; + setTree ( std::move ( tree ) ); +} + +SuffixTrieFinalMark::SuffixTrieFinalMark ( const SuffixTrieFinalMark & other ) : tree ( other.tree->clone ( ) ) { + alphabet = other.alphabet; + this->tree->attachTree ( this ); +} + +SuffixTrieFinalMark::SuffixTrieFinalMark ( SuffixTrieFinalMark && other ) noexcept : tree ( other.tree ) { + alphabet = std::move ( other.alphabet ); + this->tree->attachTree ( this ); + other.tree = NULL; +} + +alib::ObjectBase * SuffixTrieFinalMark::clone ( ) const { + return new SuffixTrieFinalMark ( * this ); +} + +alib::ObjectBase * SuffixTrieFinalMark::plunder ( ) && { + return new SuffixTrieFinalMark ( std::move ( * this ) ); +} + +SuffixTrieFinalMark & SuffixTrieFinalMark::operator =( const SuffixTrieFinalMark & other ) { + if ( this == & other ) + return * this; + + * this = SuffixTrieFinalMark ( other ); + + return * this; +} + +SuffixTrieFinalMark & SuffixTrieFinalMark::operator =( SuffixTrieFinalMark && other ) noexcept { + std::swap ( this->tree, other.tree ); + std::swap ( this->alphabet, other.alphabet ); + + return * this; +} + +SuffixTrieFinalMark::~SuffixTrieFinalMark ( ) noexcept { + delete tree; +} + +const SuffixTrieNodeFinalMark & SuffixTrieFinalMark::getRoot ( ) const { + return * tree; +} + +SuffixTrieNodeFinalMark & SuffixTrieFinalMark::getRoot ( ) { + return * tree; +} + +void SuffixTrieFinalMark::setTree ( SuffixTrieNodeFinalMark tree ) { + delete this->tree; + this->tree = std::move ( tree ).plunder ( ); + + if ( !this->tree->attachTree ( this ) ) { + delete this->tree; + throw exception::AlibException ( "Input symbols not in the alphabet." ); + } +} + +bool SuffixTrieFinalMark::removeSymbolFromAlphabet ( const alphabet::Symbol & symbol ) { + if ( this->tree->testSymbol ( symbol ) ) + throw exception::AlibException ( "Input symbol \"" + ( std::string ) symbol + "\" is used." ); + + return alphabet.erase ( symbol ); +} + +void SuffixTrieFinalMark::operator >>( std::ostream & out ) const { + out << "(SuffixTrieFinalMark " << * ( this->tree ) << ")"; +} + +std::ostream & operator <<( std::ostream & out, const SuffixTrieFinalMark & instance ) { + instance >> out; + return out; +} + +int SuffixTrieFinalMark::compare ( const SuffixTrieFinalMark & other ) const { + int res = tree->compare ( * other.tree ); + + if ( res == 0 ) { + std::compare < std::set < alphabet::Symbol > > comp; + res = comp ( alphabet, other.alphabet ); + } + + return res; +} + +SuffixTrieFinalMark::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +const std::string SuffixTrieFinalMark::XML_TAG_NAME = "SuffixTrieFinalMark"; + +SuffixTrieFinalMark SuffixTrieFinalMark::parse ( std::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, SuffixTrieFinalMark::XML_TAG_NAME ); + std::set < alphabet::Symbol > rankedAlphabet = IndexFromXMLParser::parseAlphabet ( input ); + SuffixTrieNodeFinalMark * root = IndexFromXMLParser::parseSuffixTrieNodeFinalMark ( input ); + SuffixTrieFinalMark tree ( std::move ( rankedAlphabet ), std::move ( * root ) ); + + delete root; + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, SuffixTrieFinalMark::XML_TAG_NAME ); + return tree; +} + +void SuffixTrieFinalMark::compose ( std::deque < sax::Token > & out ) const { + out.emplace_back ( SuffixTrieFinalMark::XML_TAG_NAME, sax::Token::TokenType::START_ELEMENT ); + IndexToXMLComposer::composeAlphabet ( out, alphabet ); + IndexToXMLComposer::composeNode ( out, * tree ); + out.emplace_back ( SuffixTrieFinalMark::XML_TAG_NAME, sax::Token::TokenType::END_ELEMENT ); +} + +} /* namespace indexes */ + +namespace alib { + +xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrieFinalMark > suffixTreeFinalMarkParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrieFinalMark > ( indexes::SuffixTrieFinalMark::XML_TAG_NAME, indexes::SuffixTrieFinalMark::parse ); + +} /* namespace alib */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrie.h b/alib2data/src/indexes/suffixTrie/SuffixTrieFinalMark.h similarity index 56% rename from alib2data/src/indexes/suffixTrie/SuffixTrie.h rename to alib2data/src/indexes/suffixTrie/SuffixTrieFinalMark.h index f2d7139e786cc3674b6be251ab75a48558c76f59..74f676f7bd5baba87bead2f5255cee58c3929c1e 100644 --- a/alib2data/src/indexes/suffixTrie/SuffixTrie.h +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieFinalMark.h @@ -1,18 +1,18 @@ /* - * SuffixTrie.h + * SuffixTrieFinalMark.h * * Created on: Nov 23, 2013 * Author: Jan Travnicek */ -#ifndef RANKED_TREE_H_ -#define RANKED_TREE_H_ +#ifndef SUFFIX_TRIE_FINAL_MARK_H_ +#define SUFFIX_TRIE_FINAL_MARK_H_ #include <vector> #include <list> #include <string> #include <set> -#include "SuffixTrieNode.h" +#include "SuffixTrieNodeFinalMark.h" #include "../../object/ObjectBase.h" #include "../../alphabet/Alphabet.h" @@ -22,50 +22,50 @@ namespace indexes { * Represents regular expression parsed from the XML. Regular expression is stored * as a tree of RegExpElement. */ -class SuffixTrie : public alib::ObjectBase, public alphabet::Alphabet { +class SuffixTrieFinalMark : public alib::ObjectBase, public alphabet::Alphabet { protected: - SuffixTrieNode * tree; + SuffixTrieNodeFinalMark * tree; public: /** - * @copydoc SuffixTrieNode::clone() const + * @copydoc SuffixTrieNodeFinalMark::clone() const */ virtual ObjectBase * clone ( ) const; /** - * @copydoc SuffixTrieNode::plunder() const + * @copydoc SuffixTrieNodeFinalMark::plunder() const */ virtual ObjectBase * plunder ( ) &&; - explicit SuffixTrie ( std::set < alphabet::Symbol > alphabet ); - explicit SuffixTrie ( std::set < alphabet::Symbol > alphabet, SuffixTrieNode tree ); - explicit SuffixTrie ( SuffixTrieNode tree ); + explicit SuffixTrieFinalMark ( std::set < alphabet::Symbol > alphabet ); + explicit SuffixTrieFinalMark ( std::set < alphabet::Symbol > alphabet, SuffixTrieNodeFinalMark tree ); + explicit SuffixTrieFinalMark ( SuffixTrieNodeFinalMark tree ); /** * Copy constructor. * @param other tree to copy */ - SuffixTrie ( const SuffixTrie & other ); - SuffixTrie ( SuffixTrie && other ) noexcept; - SuffixTrie & operator =( const SuffixTrie & other ); - SuffixTrie & operator =( SuffixTrie && other ) noexcept; - ~SuffixTrie ( ) noexcept; + SuffixTrieFinalMark ( const SuffixTrieFinalMark & other ); + SuffixTrieFinalMark ( SuffixTrieFinalMark && other ) noexcept; + SuffixTrieFinalMark & operator =( const SuffixTrieFinalMark & other ); + SuffixTrieFinalMark & operator =( SuffixTrieFinalMark && other ) noexcept; + ~SuffixTrieFinalMark ( ) noexcept; /** * @return Root node of the regular expression tree */ - const SuffixTrieNode & getRoot ( ) const; + const SuffixTrieNodeFinalMark & getRoot ( ) const; /** * @return Root node of the regular expression tree */ - SuffixTrieNode & getRoot ( ); + SuffixTrieNodeFinalMark & getRoot ( ); /** * Sets the root node of the regular expression tree * @param tree root node to set */ - void setTree ( SuffixTrieNode tree ); + void setTree ( SuffixTrieNodeFinalMark tree ); /** * Removes symbol from the alphabet of symbol available in the regular expression @@ -80,7 +80,7 @@ public: */ virtual void operator >>( std::ostream & out ) const; - friend std::ostream & operator <<( std::ostream & out, const SuffixTrie & instance ); + friend std::ostream & operator <<( std::ostream & out, const SuffixTrieFinalMark & instance ); virtual int compare ( const ObjectBase & other ) const { if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); @@ -88,13 +88,13 @@ public: return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) ); } - virtual int compare ( const SuffixTrie & other ) const; + virtual int compare ( const SuffixTrieFinalMark & other ) const; virtual explicit operator std::string ( ) const; const static std::string XML_TAG_NAME; - static SuffixTrie parse ( std::deque < sax::Token >::iterator & input ); + static SuffixTrieFinalMark parse ( std::deque < sax::Token >::iterator & input ); void compose ( std::deque < sax::Token > & out ) const; }; @@ -104,8 +104,8 @@ public: namespace std { template < > -struct compare < indexes::SuffixTrie > { - int operator ()( const indexes::SuffixTrie & first, const indexes::SuffixTrie & second ) const { +struct compare < indexes::SuffixTrieFinalMark > { + int operator ()( const indexes::SuffixTrieFinalMark & first, const indexes::SuffixTrieFinalMark & second ) const { return first.compare ( second ); } @@ -113,4 +113,4 @@ struct compare < indexes::SuffixTrie > { } /* namespace std */ -#endif /* RANKED_TREE_H_ */ +#endif /* SUFFIX_TRIE_FINAL_MARK_H_ */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieNode.h b/alib2data/src/indexes/suffixTrie/SuffixTrieNode.h deleted file mode 100644 index 202185473188917b535ca3e6032522a0af3a067d..0000000000000000000000000000000000000000 --- a/alib2data/src/indexes/suffixTrie/SuffixTrieNode.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * SuffixTrieNode.h - * - * Created on: Nov 23, 2013 - * Author: Stepan Plachy - */ - -#ifndef SUFFIX_TRIE_NODE_H -#define SUFFIX_TRIE_NODE_H - -#include "../../alphabet/Symbol.h" -#include "../../primitive/Unsigned.h" -#include <map> -#include <set> -#include "../../alphabet/Alphabet.h" - -namespace indexes { - -/** - * Represents a node in the ranked tree. Contains name of the symbol. - */ -class SuffixTrieNode { -protected: - std::map < alphabet::Symbol, SuffixTrieNode * > children; - - SuffixTrieNode * parent; - - /** - * Parent tree contanining this instance of RankedTree - */ - const alphabet::Alphabet * parentTree; - - /** - * @copydoc SuffixTrieNode::testSymbol() const - */ - bool testSymbol ( const alphabet::Symbol & symbol ) const; - - /** - * @copydoc SuffixTrieNode::attachTree() - */ - bool attachTree ( const alphabet::Alphabet * tree ); - - /** - * @copydoc SuffixTrieNode::computeMinimalAlphabet() - */ - void computeMinimalAlphabet ( std::set < alphabet::Symbol > & alphabet ) const; - -public: - explicit SuffixTrieNode ( std::map < alphabet::Symbol, SuffixTrieNode * > children ); - - SuffixTrieNode ( const SuffixTrieNode & other ); - SuffixTrieNode ( SuffixTrieNode && other ) noexcept; - SuffixTrieNode & operator =( const SuffixTrieNode & other ); - SuffixTrieNode & operator =( SuffixTrieNode && other ) noexcept; - ~SuffixTrieNode ( ) noexcept; - - /** - * @copydoc SuffixTrieNode::clone() const - */ - SuffixTrieNode * clone ( ) const; - - /** - * @copydoc SuffixTrieNode::plunder() const - */ - SuffixTrieNode * plunder ( ) &&; - - /** - * @return children - */ - const std::map < const alphabet::Symbol, const SuffixTrieNode * > & getChildren ( ) const; - - /** - * @return children - */ - const std::map < alphabet::Symbol, SuffixTrieNode * > & getChildren ( ); - - SuffixTrieNode & getChild ( const alphabet::Symbol & symbol ); - - const SuffixTrieNode & getChild ( const alphabet::Symbol & symbol ) const; - - bool hasChild ( const alphabet::Symbol & symbol ) const; - - SuffixTrieNode & addChild ( alphabet::Symbol symbol, SuffixTrieNode node ); - - SuffixTrieNode * getParent ( ); - - const SuffixTrieNode * getParent ( ) const; - - void swap ( SuffixTrieNode & other ); - - int compare ( const SuffixTrieNode & ) const; - - /** - * @copydoc SuffixTrieNode::operator>>() const - */ - void operator >>( std::ostream & out ) const; - - friend std::ostream & operator <<( std::ostream &, const SuffixTrieNode & node ); - - explicit operator std::string ( ) const; - - friend class SuffixTrie; -}; - -} /* namespace indexes */ - -#endif /* SUFFIX_TRIE_NODE_H */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieNode.cpp b/alib2data/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.cpp similarity index 51% rename from alib2data/src/indexes/suffixTrie/SuffixTrieNode.cpp rename to alib2data/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.cpp index 41168898010187f1d32ff1ae04f6f389b8deef1f..445ca0eb5b51b5ec7ed1451db13e1f2453fec45b 100644 --- a/alib2data/src/indexes/suffixTrie/SuffixTrieNode.cpp +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.cpp @@ -1,18 +1,18 @@ /* - * SuffixTrieNode.cpp + * SuffixTrieNodeFinalMark.cpp * * Created on: Nov 23, 2013 * Author: Stepan Plachy */ -#include "SuffixTrieNode.h" -#include "SuffixTrie.h" +#include "SuffixTrieNodeFinalMark.h" +#include "SuffixTrieFinalMark.h" #include "../../exception/AlibException.h" #include <sstream> namespace indexes { -SuffixTrieNode::SuffixTrieNode ( std::map < alphabet::Symbol, SuffixTrieNode * > children ) : children ( std::move ( children ) ), parentTree ( NULL ) { +SuffixTrieNodeFinalMark::SuffixTrieNodeFinalMark ( std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * > children, bool finalMark ) : children ( std::move ( children ) ), finalMark ( finalMark ), parentTree ( NULL ) { for ( auto & element : this->children ) element.second->parent = this; @@ -20,7 +20,7 @@ SuffixTrieNode::SuffixTrieNode ( std::map < alphabet::Symbol, SuffixTrieNode * > this->parent = NULL; } -SuffixTrieNode::SuffixTrieNode ( const SuffixTrieNode & other ) : parentTree ( NULL ) { +SuffixTrieNodeFinalMark::SuffixTrieNodeFinalMark ( const SuffixTrieNodeFinalMark & other ) : finalMark ( other.finalMark ), parentTree ( NULL ) { for ( const auto & element : other.children ) children.insert ( std::make_pair ( element.first, element.second->clone ( ) ) ); @@ -31,7 +31,7 @@ SuffixTrieNode::SuffixTrieNode ( const SuffixTrieNode & other ) : parentTree ( N this->parent = NULL; } -SuffixTrieNode::SuffixTrieNode ( SuffixTrieNode && other ) noexcept : children ( std::move ( other.children ) ), parentTree ( NULL ) { +SuffixTrieNodeFinalMark::SuffixTrieNodeFinalMark ( SuffixTrieNodeFinalMark && other ) noexcept : children ( std::move ( other.children ) ), finalMark ( other.finalMark ), parentTree ( NULL ) { other.children.clear ( ); for ( auto & element : this->children ) @@ -41,17 +41,18 @@ SuffixTrieNode::SuffixTrieNode ( SuffixTrieNode && other ) noexcept : children ( this->parent = NULL; } -SuffixTrieNode & SuffixTrieNode::operator =( const SuffixTrieNode & other ) { +SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::operator =( const SuffixTrieNodeFinalMark & other ) { if ( this == & other ) return * this; - * this = SuffixTrieNode ( other ); + * this = SuffixTrieNodeFinalMark ( other ); return * this; } -SuffixTrieNode & SuffixTrieNode::operator =( SuffixTrieNode && other ) noexcept { +SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::operator =( SuffixTrieNodeFinalMark && other ) noexcept { std::swap ( this->children, other.children ); + std::swap ( this->finalMark, other.finalMark ); std::swap ( this->parentTree, other.parentTree ); // this->parentTree is stored within other.parentTree and it is reattached on the next line for ( auto & element : this->children ) @@ -62,66 +63,74 @@ SuffixTrieNode & SuffixTrieNode::operator =( SuffixTrieNode && other ) noexcept return * this; } -SuffixTrieNode::~SuffixTrieNode ( ) noexcept { +SuffixTrieNodeFinalMark::~SuffixTrieNodeFinalMark ( ) noexcept { for ( auto element : children ) delete element.second; children.clear ( ); } -const std::map < const alphabet::Symbol, const SuffixTrieNode * > & SuffixTrieNode::getChildren ( ) const { - return * reinterpret_cast < const std::map < const alphabet::Symbol, const SuffixTrieNode * > * > ( & children ); +const std::map < const alphabet::Symbol, const SuffixTrieNodeFinalMark * > & SuffixTrieNodeFinalMark::getChildren ( ) const { + return * reinterpret_cast < const std::map < const alphabet::Symbol, const SuffixTrieNodeFinalMark * > * > ( & children ); } -const std::map < alphabet::Symbol, SuffixTrieNode * > & SuffixTrieNode::getChildren ( ) { +bool SuffixTrieNodeFinalMark::getFinalMark ( ) const { + return finalMark; +} + +void SuffixTrieNodeFinalMark::setFinalMark ( bool newFinalMark ) { + finalMark = newFinalMark; +} + +const std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * > & SuffixTrieNodeFinalMark::getChildren ( ) { return children; } -SuffixTrieNode & SuffixTrieNode::getChild ( const alphabet::Symbol & symbol ) { - std::map < alphabet::Symbol, SuffixTrieNode * >::const_iterator iter = children.find ( symbol ); +SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::getChild ( const alphabet::Symbol & symbol ) { + std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * >::const_iterator iter = children.find ( symbol ); if ( iter == children.end ( ) ) throw exception::AlibException ( "child does not exist" ); return * iter->second; } -const SuffixTrieNode & SuffixTrieNode::getChild ( const alphabet::Symbol & symbol ) const { - std::map < alphabet::Symbol, SuffixTrieNode * >::const_iterator iter = children.find ( symbol ); +const SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::getChild ( const alphabet::Symbol & symbol ) const { + std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * >::const_iterator iter = children.find ( symbol ); if ( iter == children.end ( ) ) throw exception::AlibException ( "child does not exist" ); return * iter->second; } -bool SuffixTrieNode::hasChild ( const alphabet::Symbol & symbol ) const { +bool SuffixTrieNodeFinalMark::hasChild ( const alphabet::Symbol & symbol ) const { if ( children.find ( symbol ) == children.end ( ) ) return false; return true; } -SuffixTrieNode & SuffixTrieNode::addChild ( alphabet::Symbol symbol, SuffixTrieNode node ) { - std::map < alphabet::Symbol, SuffixTrieNode * >::iterator iter = children.find ( symbol ); +SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::addChild ( alphabet::Symbol symbol, SuffixTrieNodeFinalMark node ) { + std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * >::iterator iter = children.find ( symbol ); if ( iter != children.end ( ) ) throw exception::AlibException ( "child already exist" ); if ( ( this->parentTree != NULL ) && ( this->parentTree->getAlphabet ( ).find ( symbol ) == this->parentTree->getAlphabet ( ).end ( ) ) ) throw exception::AlibException ( "Symbol is not in the alphabet" ); - return * children.insert ( std::make_pair ( std::move ( symbol ), new SuffixTrieNode ( std::move ( node ) ) ) ).first->second; + return * children.insert ( std::make_pair ( std::move ( symbol ), new SuffixTrieNodeFinalMark ( std::move ( node ) ) ) ).first->second; } -SuffixTrieNode * SuffixTrieNode::getParent ( ) { +SuffixTrieNodeFinalMark * SuffixTrieNodeFinalMark::getParent ( ) { return parent; } -const SuffixTrieNode * SuffixTrieNode::getParent ( ) const { +const SuffixTrieNodeFinalMark * SuffixTrieNodeFinalMark::getParent ( ) const { return parent; } -void SuffixTrieNode::swap ( SuffixTrieNode & other ) { +void SuffixTrieNodeFinalMark::swap ( SuffixTrieNodeFinalMark & other ) { const alphabet::Alphabet * thisParentTree = this->parentTree; const alphabet::Alphabet * otherParentTree = other.parentTree; - SuffixTrieNode tmp = std::move ( other ); + SuffixTrieNodeFinalMark tmp = std::move ( other ); other = std::move ( * this ); * this = std::move ( tmp ); @@ -130,15 +139,15 @@ void SuffixTrieNode::swap ( SuffixTrieNode & other ) { other.attachTree ( otherParentTree ); } -SuffixTrieNode * SuffixTrieNode::clone ( ) const { - return new SuffixTrieNode ( * this ); +SuffixTrieNodeFinalMark * SuffixTrieNodeFinalMark::clone ( ) const { + return new SuffixTrieNodeFinalMark ( * this ); } -SuffixTrieNode * SuffixTrieNode::plunder ( ) && { - return new SuffixTrieNode ( std::move ( * this ) ); +SuffixTrieNodeFinalMark * SuffixTrieNodeFinalMark::plunder ( ) && { + return new SuffixTrieNodeFinalMark ( std::move ( * this ) ); } -int SuffixTrieNode::compare ( const SuffixTrieNode & other ) const { +int SuffixTrieNodeFinalMark::compare ( const SuffixTrieNodeFinalMark & other ) const { int thisSize = this->children.size ( ); int otherSize = other.children.size ( ); @@ -162,16 +171,16 @@ int SuffixTrieNode::compare ( const SuffixTrieNode & other ) const { return 0; } -void SuffixTrieNode::operator >>( std::ostream & out ) const { - out << "(SuffixTrieNode " << " children = " << this->children << ")"; +void SuffixTrieNodeFinalMark::operator >>( std::ostream & out ) const { + out << "(SuffixTrieNodeFinalMark " << " children = " << this->children << " finalMark = " << this->finalMark << ")"; } -std::ostream & operator <<( std::ostream & out, const SuffixTrieNode & node ) { +std::ostream & operator <<( std::ostream & out, const SuffixTrieNodeFinalMark & node ) { node >> out; return out; } -bool SuffixTrieNode::testSymbol ( const alphabet::Symbol & symbol ) const { +bool SuffixTrieNodeFinalMark::testSymbol ( const alphabet::Symbol & symbol ) const { for ( const auto & child : this->children ) { if ( symbol == child.first ) return true; @@ -181,7 +190,7 @@ bool SuffixTrieNode::testSymbol ( const alphabet::Symbol & symbol ) const { return false; } -bool SuffixTrieNode::attachTree ( const alphabet::Alphabet * tree ) { +bool SuffixTrieNodeFinalMark::attachTree ( const alphabet::Alphabet * tree ) { if ( this->parentTree == tree ) return true; this->parentTree = tree; @@ -195,14 +204,14 @@ bool SuffixTrieNode::attachTree ( const alphabet::Alphabet * tree ) { return true; } -void SuffixTrieNode::computeMinimalAlphabet ( std::set < alphabet::Symbol > & alphabet ) const { +void SuffixTrieNodeFinalMark::computeMinimalAlphabet ( std::set < alphabet::Symbol > & alphabet ) const { for ( const auto & child : this->children ) { alphabet.insert ( child.first ); child.second->computeMinimalAlphabet ( alphabet ); } } -SuffixTrieNode::operator std::string ( ) const { +SuffixTrieNodeFinalMark::operator std::string ( ) const { std::stringstream ss; ss << * this; return ss.str ( ); diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.h b/alib2data/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.h new file mode 100644 index 0000000000000000000000000000000000000000..22cea8bad1c55079d1374321a3caf046dbb02350 --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.h @@ -0,0 +1,113 @@ +/* + * SuffixTrieNodeFinalMark.h + * + * Created on: Nov 23, 2013 + * Author: Stepan Plachy + */ + +#ifndef SUFFIX_TRIE_NODE_FINAL_MARK_H_ +#define SUFFIX_TRIE_NODE_FINAL_MARK_H_ + +#include "../../alphabet/Symbol.h" +#include "../../primitive/Unsigned.h" +#include <map> +#include <set> +#include "../../alphabet/Alphabet.h" + +namespace indexes { + +/** + * Represents a node in the ranked tree. Contains name of the symbol. + */ +class SuffixTrieNodeFinalMark { +protected: + std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * > children; + + bool finalMark; + + SuffixTrieNodeFinalMark * parent; + + /** + * Parent tree contanining this instance of RankedTree + */ + const alphabet::Alphabet * parentTree; + + /** + * @copydoc SuffixTrieNodeFinalMark::testSymbol() const + */ + bool testSymbol ( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc SuffixTrieNodeFinalMark::attachTree() + */ + bool attachTree ( const alphabet::Alphabet * tree ); + + /** + * @copydoc SuffixTrieNodeFinalMark::computeMinimalAlphabet() + */ + void computeMinimalAlphabet ( std::set < alphabet::Symbol > & alphabet ) const; + +public: + explicit SuffixTrieNodeFinalMark ( std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * > children, bool finalMark ); + + SuffixTrieNodeFinalMark ( const SuffixTrieNodeFinalMark & other ); + SuffixTrieNodeFinalMark ( SuffixTrieNodeFinalMark && other ) noexcept; + SuffixTrieNodeFinalMark & operator =( const SuffixTrieNodeFinalMark & other ); + SuffixTrieNodeFinalMark & operator =( SuffixTrieNodeFinalMark && other ) noexcept; + ~SuffixTrieNodeFinalMark ( ) noexcept; + + /** + * @copydoc SuffixTrieNodeFinalMark::clone() const + */ + SuffixTrieNodeFinalMark * clone ( ) const; + + /** + * @copydoc SuffixTrieNodeFinalMark::plunder() const + */ + SuffixTrieNodeFinalMark * plunder ( ) &&; + + /** + * @return children + */ + const std::map < const alphabet::Symbol, const SuffixTrieNodeFinalMark * > & getChildren ( ) const; + + bool getFinalMark ( ) const; + + void setFinalMark ( bool newFinalMark ); + + /** + * @return children + */ + const std::map < alphabet::Symbol, SuffixTrieNodeFinalMark * > & getChildren ( ); + + SuffixTrieNodeFinalMark & getChild ( const alphabet::Symbol & symbol ); + + const SuffixTrieNodeFinalMark & getChild ( const alphabet::Symbol & symbol ) const; + + bool hasChild ( const alphabet::Symbol & symbol ) const; + + SuffixTrieNodeFinalMark & addChild ( alphabet::Symbol symbol, SuffixTrieNodeFinalMark node ); + + SuffixTrieNodeFinalMark * getParent ( ); + + const SuffixTrieNodeFinalMark * getParent ( ) const; + + void swap ( SuffixTrieNodeFinalMark & other ); + + int compare ( const SuffixTrieNodeFinalMark & ) const; + + /** + * @copydoc SuffixTrieNodeFinalMark::operator>>() const + */ + void operator >>( std::ostream & out ) const; + + friend std::ostream & operator <<( std::ostream &, const SuffixTrieNodeFinalMark & node ); + + explicit operator std::string ( ) const; + + friend class SuffixTrieFinalMark; +}; + +} /* namespace indexes */ + +#endif /* SUFFIX_TRIE_NODE_FINAL_MARK_H_ */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieNodeTerminatingSymbol.cpp b/alib2data/src/indexes/suffixTrie/SuffixTrieNodeTerminatingSymbol.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88a543c1e11340123c74425f3f6fe75b47becfae --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieNodeTerminatingSymbol.cpp @@ -0,0 +1,211 @@ +/* + * SuffixTrieNodeTerminatingSymbol.cpp + * + * Created on: Nov 23, 2013 + * Author: Stepan Plachy + */ + +#include "SuffixTrieNodeTerminatingSymbol.h" +#include "SuffixTrieTerminatingSymbol.h" +#include "../../exception/AlibException.h" +#include <sstream> + +namespace indexes { + +SuffixTrieNodeTerminatingSymbol::SuffixTrieNodeTerminatingSymbol ( std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * > children ) : children ( std::move ( children ) ), parentTree ( NULL ) { + for ( auto & element : this->children ) + element.second->parent = this; + + this->attachTree ( NULL ); + this->parent = NULL; +} + +SuffixTrieNodeTerminatingSymbol::SuffixTrieNodeTerminatingSymbol ( const SuffixTrieNodeTerminatingSymbol & other ) : parentTree ( NULL ) { + for ( const auto & element : other.children ) + children.insert ( std::make_pair ( element.first, element.second->clone ( ) ) ); + + for ( auto & element : this->children ) + element.second->parent = this; + + this->attachTree ( NULL ); + this->parent = NULL; +} + +SuffixTrieNodeTerminatingSymbol::SuffixTrieNodeTerminatingSymbol ( SuffixTrieNodeTerminatingSymbol && other ) noexcept : children ( std::move ( other.children ) ), parentTree ( NULL ) { + other.children.clear ( ); + + for ( auto & element : this->children ) + element.second->parent = this; + + this->attachTree ( NULL ); + this->parent = NULL; +} + +SuffixTrieNodeTerminatingSymbol & SuffixTrieNodeTerminatingSymbol::operator =( const SuffixTrieNodeTerminatingSymbol & other ) { + if ( this == & other ) + return * this; + + * this = SuffixTrieNodeTerminatingSymbol ( other ); + + return * this; +} + +SuffixTrieNodeTerminatingSymbol & SuffixTrieNodeTerminatingSymbol::operator =( SuffixTrieNodeTerminatingSymbol && other ) noexcept { + std::swap ( this->children, other.children ); + std::swap ( this->parentTree, other.parentTree ); // this->parentTree is stored within other.parentTree and it is reattached on the next line + + for ( auto & element : this->children ) + element.second->parent = this; + + this->attachTree ( other.parentTree ); + + return * this; +} + +SuffixTrieNodeTerminatingSymbol::~SuffixTrieNodeTerminatingSymbol ( ) noexcept { + for ( auto element : children ) + delete element.second; + + children.clear ( ); +} + +const std::map < const alphabet::Symbol, const SuffixTrieNodeTerminatingSymbol * > & SuffixTrieNodeTerminatingSymbol::getChildren ( ) const { + return * reinterpret_cast < const std::map < const alphabet::Symbol, const SuffixTrieNodeTerminatingSymbol * > * > ( & children ); +} + +const std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * > & SuffixTrieNodeTerminatingSymbol::getChildren ( ) { + return children; +} + +SuffixTrieNodeTerminatingSymbol & SuffixTrieNodeTerminatingSymbol::getChild ( const alphabet::Symbol & symbol ) { + std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * >::const_iterator iter = children.find ( symbol ); + + if ( iter == children.end ( ) ) throw exception::AlibException ( "child does not exist" ); + + return * iter->second; +} + +const SuffixTrieNodeTerminatingSymbol & SuffixTrieNodeTerminatingSymbol::getChild ( const alphabet::Symbol & symbol ) const { + std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * >::const_iterator iter = children.find ( symbol ); + + if ( iter == children.end ( ) ) throw exception::AlibException ( "child does not exist" ); + + return * iter->second; +} + +bool SuffixTrieNodeTerminatingSymbol::hasChild ( const alphabet::Symbol & symbol ) const { + if ( children.find ( symbol ) == children.end ( ) ) return false; + + return true; +} + +SuffixTrieNodeTerminatingSymbol & SuffixTrieNodeTerminatingSymbol::addChild ( alphabet::Symbol symbol, SuffixTrieNodeTerminatingSymbol node ) { + std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * >::iterator iter = children.find ( symbol ); + + if ( iter != children.end ( ) ) throw exception::AlibException ( "child already exist" ); + + if ( ( this->parentTree != NULL ) && ( this->parentTree->getAlphabet ( ).find ( symbol ) == this->parentTree->getAlphabet ( ).end ( ) ) ) throw exception::AlibException ( "Symbol is not in the alphabet" ); + + return * children.insert ( std::make_pair ( std::move ( symbol ), new SuffixTrieNodeTerminatingSymbol ( std::move ( node ) ) ) ).first->second; +} + +SuffixTrieNodeTerminatingSymbol * SuffixTrieNodeTerminatingSymbol::getParent ( ) { + return parent; +} + +const SuffixTrieNodeTerminatingSymbol * SuffixTrieNodeTerminatingSymbol::getParent ( ) const { + return parent; +} + +void SuffixTrieNodeTerminatingSymbol::swap ( SuffixTrieNodeTerminatingSymbol & other ) { + const TerminatingSymbolAlphabet * thisParentTree = this->parentTree; + const TerminatingSymbolAlphabet * otherParentTree = other.parentTree; + + SuffixTrieNodeTerminatingSymbol tmp = std::move ( other ); + + other = std::move ( * this ); + * this = std::move ( tmp ); + + this->attachTree ( thisParentTree ); + other.attachTree ( otherParentTree ); +} + +SuffixTrieNodeTerminatingSymbol * SuffixTrieNodeTerminatingSymbol::clone ( ) const { + return new SuffixTrieNodeTerminatingSymbol ( * this ); +} + +SuffixTrieNodeTerminatingSymbol * SuffixTrieNodeTerminatingSymbol::plunder ( ) && { + return new SuffixTrieNodeTerminatingSymbol ( std::move ( * this ) ); +} + +int SuffixTrieNodeTerminatingSymbol::compare ( const SuffixTrieNodeTerminatingSymbol & other ) const { + int thisSize = this->children.size ( ); + int otherSize = other.children.size ( ); + + if ( thisSize < otherSize ) return -1; + + if ( thisSize > otherSize ) return 1; + + auto thisIter = this->children.begin ( ); + auto otherIter = other.children.begin ( ); + + for ( ; thisIter != this->children.end ( ); ++thisIter, ++otherIter ) { + int res = thisIter->first.compare ( otherIter->first ); + + if ( res != 0 ) return res; + + res = thisIter->second->compare ( * otherIter->second ); + + if ( res != 0 ) return res; + } + + return 0; +} + +void SuffixTrieNodeTerminatingSymbol::operator >>( std::ostream & out ) const { + out << "(SuffixTrieNodeTerminatingSymbol " << " children = " << this->children << ")"; +} + +std::ostream & operator <<( std::ostream & out, const SuffixTrieNodeTerminatingSymbol & node ) { + node >> out; + return out; +} + +bool SuffixTrieNodeTerminatingSymbol::testSymbol ( const alphabet::Symbol & symbol ) const { + for ( const auto & child : this->children ) { + if ( symbol == child.first ) return true; + + if ( child.second->testSymbol ( symbol ) ) return true; + } + + return false; +} + +bool SuffixTrieNodeTerminatingSymbol::attachTree ( const TerminatingSymbolAlphabet * tree ) { + if ( this->parentTree == tree ) return true; + + this->parentTree = tree; + + for ( const auto & child : this->children ) { + if ( ( this->parentTree != NULL ) && ( this->parentTree->getAlphabet ( ).find ( child.first ) == this->parentTree->getAlphabet ( ).end ( ) ) ) return false; + + if ( !child.second->attachTree ( tree ) ) return false; + } + + return true; +} + +void SuffixTrieNodeTerminatingSymbol::computeMinimalAlphabet ( std::set < alphabet::Symbol > & alphabet ) const { + for ( const auto & child : this->children ) { + alphabet.insert ( child.first ); + child.second->computeMinimalAlphabet ( alphabet ); + } +} + +SuffixTrieNodeTerminatingSymbol::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +} /* namespace indexes */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieNodeTerminatingSymbol.h b/alib2data/src/indexes/suffixTrie/SuffixTrieNodeTerminatingSymbol.h new file mode 100644 index 0000000000000000000000000000000000000000..b49e167921de26875a9cc6d0cc32674b37c25d36 --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieNodeTerminatingSymbol.h @@ -0,0 +1,107 @@ +/* + * SuffixTrieNodeTerminatingSymbol.h + * + * Created on: Nov 23, 2013 + * Author: Stepan Plachy + */ + +#ifndef SUFFIX_TRIE_NODE_TERMINATING_SYMBOL_H_ +#define SUFFIX_TRIE_NODE_TERMINATING_SYMBOL_H_ + +#include "../../alphabet/Symbol.h" +#include "../../primitive/Unsigned.h" +#include <map> +#include <set> +#include "common/TerminatingSymbolAlphabet.h" + +namespace indexes { + +/** + * Represents a node in the ranked tree. Contains name of the symbol. + */ +class SuffixTrieNodeTerminatingSymbol { +protected: + std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * > children; + + SuffixTrieNodeTerminatingSymbol * parent; + + /** + * Parent tree contanining this instance of RankedTree + */ + const TerminatingSymbolAlphabet * parentTree; + + /** + * @copydoc SuffixTrieNodeTerminatingSymbol::testSymbol() const + */ + bool testSymbol ( const alphabet::Symbol & symbol ) const; + + /** + * @copydoc SuffixTrieNodeTerminatingSymbol::attachTree() + */ + bool attachTree ( const TerminatingSymbolAlphabet * tree ); + + /** + * @copydoc SuffixTrieNodeTerminatingSymbol::computeMinimalAlphabet() + */ + void computeMinimalAlphabet ( std::set < alphabet::Symbol > & alphabet ) const; + +public: + explicit SuffixTrieNodeTerminatingSymbol ( std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * > children ); + + SuffixTrieNodeTerminatingSymbol ( const SuffixTrieNodeTerminatingSymbol & other ); + SuffixTrieNodeTerminatingSymbol ( SuffixTrieNodeTerminatingSymbol && other ) noexcept; + SuffixTrieNodeTerminatingSymbol & operator =( const SuffixTrieNodeTerminatingSymbol & other ); + SuffixTrieNodeTerminatingSymbol & operator =( SuffixTrieNodeTerminatingSymbol && other ) noexcept; + ~SuffixTrieNodeTerminatingSymbol ( ) noexcept; + + /** + * @copydoc SuffixTrieNodeTerminatingSymbol::clone() const + */ + SuffixTrieNodeTerminatingSymbol * clone ( ) const; + + /** + * @copydoc SuffixTrieNodeTerminatingSymbol::plunder() const + */ + SuffixTrieNodeTerminatingSymbol * plunder ( ) &&; + + /** + * @return children + */ + const std::map < const alphabet::Symbol, const SuffixTrieNodeTerminatingSymbol * > & getChildren ( ) const; + + /** + * @return children + */ + const std::map < alphabet::Symbol, SuffixTrieNodeTerminatingSymbol * > & getChildren ( ); + + SuffixTrieNodeTerminatingSymbol & getChild ( const alphabet::Symbol & symbol ); + + const SuffixTrieNodeTerminatingSymbol & getChild ( const alphabet::Symbol & symbol ) const; + + bool hasChild ( const alphabet::Symbol & symbol ) const; + + SuffixTrieNodeTerminatingSymbol & addChild ( alphabet::Symbol symbol, SuffixTrieNodeTerminatingSymbol node ); + + SuffixTrieNodeTerminatingSymbol * getParent ( ); + + const SuffixTrieNodeTerminatingSymbol * getParent ( ) const; + + void swap ( SuffixTrieNodeTerminatingSymbol & other ); + + int compare ( const SuffixTrieNodeTerminatingSymbol & ) const; + + /** + * @copydoc SuffixTrieNodeTerminatingSymbol::operator>>() const + */ + void operator >>( std::ostream & out ) const; + + friend std::ostream & operator <<( std::ostream &, const SuffixTrieNodeTerminatingSymbol & node ); + + explicit operator std::string ( ) const; + + friend class SuffixTrieTerminatingSymbol; +}; + +} /* namespace indexes */ + +#endif /* SUFFIX_TRIE_NODE_TERMINATING_SYMBOL_H_ */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieTerminatingSymbol.cpp b/alib2data/src/indexes/suffixTrie/SuffixTrieTerminatingSymbol.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8799c20acf7aa0b82aa11bec6794bd44097477a7 --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieTerminatingSymbol.cpp @@ -0,0 +1,158 @@ +/* + * SuffixTrieTerminatingSymbol.cpp + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#include "SuffixTrieTerminatingSymbol.h" +#include "../../exception/AlibException.h" + +#include <iostream> +#include <algorithm> +#include <sstream> + +#include "../../sax/FromXMLParserHelper.h" +#include "../common/IndexFromXMLParser.h" +#include "../common/IndexToXMLComposer.h" +#include "../../object/Object.h" +#include "../../XmlApi.hpp" + +namespace indexes { + +SuffixTrieTerminatingSymbol::SuffixTrieTerminatingSymbol ( std::set < alphabet::Symbol > alphabet, alphabet::Symbol terminatingSymbol ) : TerminatingSymbolAlphabet ( std::move ( terminatingSymbol ) ) { + this->alphabet = std::move ( alphabet ); + this->tree = new SuffixTrieNodeTerminatingSymbol ( { } ); +} + +SuffixTrieTerminatingSymbol::SuffixTrieTerminatingSymbol ( std::set < alphabet::Symbol > alphabet, alphabet::Symbol terminatingSymbol, SuffixTrieNodeTerminatingSymbol tree ) : TerminatingSymbolAlphabet ( std::move ( terminatingSymbol ) ) { + this->alphabet = std::move ( alphabet ); + this->tree = NULL; + setTree ( std::move ( tree ) ); +} + +SuffixTrieTerminatingSymbol::SuffixTrieTerminatingSymbol ( alphabet::Symbol terminatingSymbol, SuffixTrieNodeTerminatingSymbol tree ) : TerminatingSymbolAlphabet ( std::move ( terminatingSymbol ) ) { + tree.computeMinimalAlphabet ( alphabet ); + this->tree = NULL; + setTree ( std::move ( tree ) ); +} + +SuffixTrieTerminatingSymbol::SuffixTrieTerminatingSymbol ( const SuffixTrieTerminatingSymbol & other ) : TerminatingSymbolAlphabet ( other.terminatingSymbol ), tree ( other.tree->clone ( ) ) { + alphabet = other.alphabet; + this->tree->attachTree ( this ); +} + +SuffixTrieTerminatingSymbol::SuffixTrieTerminatingSymbol ( SuffixTrieTerminatingSymbol && other ) noexcept : TerminatingSymbolAlphabet ( std::move ( other.terminatingSymbol ) ), tree ( other.tree ) { + alphabet = std::move ( other.alphabet ); + this->tree->attachTree ( this ); + other.tree = NULL; +} + +alib::ObjectBase * SuffixTrieTerminatingSymbol::clone ( ) const { + return new SuffixTrieTerminatingSymbol ( * this ); +} + +alib::ObjectBase * SuffixTrieTerminatingSymbol::plunder ( ) && { + return new SuffixTrieTerminatingSymbol ( std::move ( * this ) ); +} + +SuffixTrieTerminatingSymbol & SuffixTrieTerminatingSymbol::operator =( const SuffixTrieTerminatingSymbol & other ) { + if ( this == & other ) + return * this; + + * this = SuffixTrieTerminatingSymbol ( other ); + + return * this; +} + +SuffixTrieTerminatingSymbol & SuffixTrieTerminatingSymbol::operator =( SuffixTrieTerminatingSymbol && other ) noexcept { + std::swap ( this->tree, other.tree ); + std::swap ( this->alphabet, other.alphabet ); + + return * this; +} + +SuffixTrieTerminatingSymbol::~SuffixTrieTerminatingSymbol ( ) noexcept { + delete tree; +} + +const SuffixTrieNodeTerminatingSymbol & SuffixTrieTerminatingSymbol::getRoot ( ) const { + return * tree; +} + +SuffixTrieNodeTerminatingSymbol & SuffixTrieTerminatingSymbol::getRoot ( ) { + return * tree; +} + +void SuffixTrieTerminatingSymbol::setTree ( SuffixTrieNodeTerminatingSymbol tree ) { + delete this->tree; + this->tree = std::move ( tree ).plunder ( ); + + if ( !this->tree->attachTree ( this ) ) { + delete this->tree; + throw exception::AlibException ( "Input symbols not in the alphabet." ); + } +} + +bool SuffixTrieTerminatingSymbol::removeSymbolFromAlphabet ( const alphabet::Symbol & symbol ) { + if ( this->tree->testSymbol ( symbol ) ) + throw exception::AlibException ( "Input symbol \"" + ( std::string ) symbol + "\" is used." ); + + return alphabet.erase ( symbol ); +} + +void SuffixTrieTerminatingSymbol::operator >>( std::ostream & out ) const { + out << "(SuffixTrieTerminatingSymbol " << * ( this->tree ) << ")"; +} + +std::ostream & operator <<( std::ostream & out, const SuffixTrieTerminatingSymbol & instance ) { + instance >> out; + return out; +} + +int SuffixTrieTerminatingSymbol::compare ( const SuffixTrieTerminatingSymbol & other ) const { + int res = tree->compare ( * other.tree ); + + if ( res == 0 ) { + std::compare < std::set < alphabet::Symbol > > comp; + res = comp ( alphabet, other.alphabet ); + } + + return res; +} + +SuffixTrieTerminatingSymbol::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +const std::string SuffixTrieTerminatingSymbol::XML_TAG_NAME = "SuffixTrieTerminatingSymbol"; + +SuffixTrieTerminatingSymbol SuffixTrieTerminatingSymbol::parse ( std::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, SuffixTrieTerminatingSymbol::XML_TAG_NAME ); + std::set < alphabet::Symbol > rankedAlphabet = IndexFromXMLParser::parseAlphabet ( input ); + alphabet::Symbol terminatingSymbol = alib::xmlApi < alphabet::Symbol >::parse ( input ); + SuffixTrieNodeTerminatingSymbol * root = IndexFromXMLParser::parseSuffixTrieNodeTerminatingSymbol ( input ); + SuffixTrieTerminatingSymbol tree ( std::move ( rankedAlphabet ), terminatingSymbol, std::move ( * root ) ); + + delete root; + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, SuffixTrieTerminatingSymbol::XML_TAG_NAME ); + return tree; +} + +void SuffixTrieTerminatingSymbol::compose ( std::deque < sax::Token > & out ) const { + out.emplace_back ( SuffixTrieTerminatingSymbol::XML_TAG_NAME, sax::Token::TokenType::START_ELEMENT ); + IndexToXMLComposer::composeAlphabet ( out, alphabet ); + alib::xmlApi < alphabet::Symbol >::compose ( out, terminatingSymbol ); + IndexToXMLComposer::composeNode ( out, * tree ); + out.emplace_back ( SuffixTrieTerminatingSymbol::XML_TAG_NAME, sax::Token::TokenType::END_ELEMENT ); +} + +} /* namespace indexes */ + +namespace alib { + +xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrieTerminatingSymbol > suffixTreeTerminatingSymbolParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrieTerminatingSymbol > ( indexes::SuffixTrieTerminatingSymbol::XML_TAG_NAME, indexes::SuffixTrieTerminatingSymbol::parse ); + +} /* namespace alib */ diff --git a/alib2data/src/indexes/suffixTrie/SuffixTrieTerminatingSymbol.h b/alib2data/src/indexes/suffixTrie/SuffixTrieTerminatingSymbol.h new file mode 100644 index 0000000000000000000000000000000000000000..d31cfb385d4a2a6b8bad30b82300453b89c12961 --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/SuffixTrieTerminatingSymbol.h @@ -0,0 +1,116 @@ +/* + * SuffixTrieTerminatingSymbol.h + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#ifndef SUFFIX_TRIE_TERMINATING_SYMBOL_H_ +#define SUFFIX_TRIE_TERMINATING_SYMBOL_H_ + +#include <vector> +#include <list> +#include <string> +#include <set> +#include "SuffixTrieNodeTerminatingSymbol.h" +#include "../../object/ObjectBase.h" +#include "common/TerminatingSymbolAlphabet.h" + +namespace indexes { + +/** + * Represents regular expression parsed from the XML. Regular expression is stored + * as a tree of RegExpElement. + */ +class SuffixTrieTerminatingSymbol : public alib::ObjectBase, public TerminatingSymbolAlphabet { +protected: + SuffixTrieNodeTerminatingSymbol * tree; + +public: + /** + * @copydoc SuffixTrieNodeTerminatingSymbol::clone() const + */ + virtual ObjectBase * clone ( ) const; + + /** + * @copydoc SuffixTrieNodeTerminatingSymbol::plunder() const + */ + virtual ObjectBase * plunder ( ) &&; + + explicit SuffixTrieTerminatingSymbol ( std::set < alphabet::Symbol > alphabet, alphabet::Symbol terminatingSymbol ); + explicit SuffixTrieTerminatingSymbol ( std::set < alphabet::Symbol > alphabet, alphabet::Symbol terminatingSymbol, SuffixTrieNodeTerminatingSymbol tree ); + explicit SuffixTrieTerminatingSymbol ( alphabet::Symbol terminatingSymbol, SuffixTrieNodeTerminatingSymbol tree ); + + /** + * Copy constructor. + * @param other tree to copy + */ + SuffixTrieTerminatingSymbol ( const SuffixTrieTerminatingSymbol & other ); + SuffixTrieTerminatingSymbol ( SuffixTrieTerminatingSymbol && other ) noexcept; + SuffixTrieTerminatingSymbol & operator =( const SuffixTrieTerminatingSymbol & other ); + SuffixTrieTerminatingSymbol & operator =( SuffixTrieTerminatingSymbol && other ) noexcept; + ~SuffixTrieTerminatingSymbol ( ) noexcept; + + /** + * @return Root node of the regular expression tree + */ + const SuffixTrieNodeTerminatingSymbol & getRoot ( ) const; + + /** + * @return Root node of the regular expression tree + */ + SuffixTrieNodeTerminatingSymbol & getRoot ( ); + + /** + * Sets the root node of the regular expression tree + * @param tree root node to set + */ + void setTree ( SuffixTrieNodeTerminatingSymbol tree ); + + /** + * Removes symbol from the alphabet of symbol available in the regular expression + * @param symbol removed symbol from the alphabet + */ + bool removeSymbolFromAlphabet ( const alphabet::Symbol & symbol ); + + /** + * Prints XML representation of the tree to the output stream. + * @param out output stream to which print the tree + * @param tree tree to print + */ + virtual void operator >>( std::ostream & out ) const; + + friend std::ostream & operator <<( std::ostream & out, const SuffixTrieTerminatingSymbol & instance ); + + virtual int compare ( const ObjectBase & other ) const { + if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); + + return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) ); + } + + virtual int compare ( const SuffixTrieTerminatingSymbol & other ) const; + + virtual explicit operator std::string ( ) const; + + const static std::string XML_TAG_NAME; + + static SuffixTrieTerminatingSymbol parse ( std::deque < sax::Token >::iterator & input ); + + void compose ( std::deque < sax::Token > & out ) const; +}; + +} /* namespace tree */ + +namespace std { + +template < > +struct compare < indexes::SuffixTrieTerminatingSymbol > { + int operator ()( const indexes::SuffixTrieTerminatingSymbol & first, const indexes::SuffixTrieTerminatingSymbol & second ) const { + return first.compare ( second ); + } + +}; + +} /* namespace std */ + +#endif /* SUFFIX_TRIE_TERMINATING_SYMBOL_H_ */ diff --git a/alib2data/src/indexes/suffixTrie/common/TerminatingSymbolAlphabet.cpp b/alib2data/src/indexes/suffixTrie/common/TerminatingSymbolAlphabet.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2a74b2ae86a006f8e84f23ea6834b66206ff0a41 --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/common/TerminatingSymbolAlphabet.cpp @@ -0,0 +1,36 @@ +/* + * TerminatingSymbolAlphabet.cpp + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#include "TerminatingSymbolAlphabet.h" +#include "../../../exception/AlibException.h" + +namespace indexes { + +TerminatingSymbolAlphabet::TerminatingSymbolAlphabet ( alphabet::Symbol terminatingSymbol ) : terminatingSymbol ( terminatingSymbol ) { + addSymbol ( std::move ( terminatingSymbol ) ); +} + +const std::set < alphabet::Symbol > & TerminatingSymbolAlphabet::getAlphabet ( ) const { + return this->alphabet; +} + +void TerminatingSymbolAlphabet::addSymbol ( alphabet::Symbol symbol ) { + this->alphabet.insert ( std::move ( symbol ) ); +} + +void TerminatingSymbolAlphabet::setTerminatingSymbol ( alphabet::Symbol terminatingSymbol ) { + if ( !alphabet.count ( terminatingSymbol ) ) + throw exception::AlibException ( "Symbol " + ( std::string ) terminatingSymbol + " cannot be set as terminating symbol. It is not present in the alphabet." ); + + terminatingSymbol = std::move ( terminatingSymbol ); +} + +const alphabet::Symbol & TerminatingSymbolAlphabet::getTerminatingSymbol ( ) const { + return terminatingSymbol; +} + +} /* namespace indexes */ diff --git a/alib2data/src/indexes/suffixTrie/common/TerminatingSymbolAlphabet.h b/alib2data/src/indexes/suffixTrie/common/TerminatingSymbolAlphabet.h new file mode 100644 index 0000000000000000000000000000000000000000..c4da72e79d8d9a1eb49bf9c68651323581dc7ad1 --- /dev/null +++ b/alib2data/src/indexes/suffixTrie/common/TerminatingSymbolAlphabet.h @@ -0,0 +1,54 @@ +/* + * TerminatingSymbolAlphabet.h + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#ifndef TERMINATING_SYMBOL_ALPHABET_H_ +#define TERMINATING_SYMBOL_ALPHABET_H_ + +#include <set> + +#include "../../../alphabet/Symbol.h" + +namespace indexes { + +/** + * Represents indexes of symbols. + */ +class TerminatingSymbolAlphabet { +protected: + std::set < alphabet::Symbol > alphabet; + + alphabet::Symbol terminatingSymbol; + +public: + TerminatingSymbolAlphabet ( alphabet::Symbol terminatingSymbol ); + + /** + * @return List of symbols in the indexes. + */ + const std::set < alphabet::Symbol > & getAlphabet ( ) const; + + /** + * @param symbol to insert + */ + void addSymbol ( alphabet::Symbol symbol ); + + /** + * Set terminating symbol. + * @param terminatingSymbol alphabet::Symbol to set + * @throws AlibException when state is not present in the alphabet + */ + void setTerminatingSymbol ( alphabet::Symbol terminatinSymbol ); + + /** + * @return terminating symbol + */ + const alphabet::Symbol & getTerminatingSymbol ( ) const; +}; + +} /* namespace indexes */ + +#endif /* TERMINATING_SYMBOL_ALPHABET_H_ */ diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index ae38f2e5580cc39ec121054235bc8148a235d9aa..139f240b7e71908451504a6ab4e0b7e8e3c9313d 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -229,7 +229,7 @@ int main ( int argc, char * argv[] ) { std::chrono::measurements::end ( ); std::chrono::measurements::start ( "Algorithm", std::chrono::measurements::Type::MAIN ); - indexes::SuffixTrie suffixTrie = stringology::indexing::SuffixTrie::construct ( subject ); + indexes::SuffixTrieFinalMark suffixTrie = stringology::indexing::SuffixTrie::construct ( subject ); std::chrono::measurements::end ( ); std::chrono::measurements::start ( "Output write", std::chrono::measurements::Type::AUXILARY );