diff --git a/alib2algo/src/stringology/indexing/SuffixTrieNaive.cpp b/alib2algo/src/stringology/indexing/SuffixTrieNaive.cpp new file mode 100644 index 0000000000000000000000000000000000000000..de125822814dafc6d53c9a99bc53cb3ac9f4c017 --- /dev/null +++ b/alib2algo/src/stringology/indexing/SuffixTrieNaive.cpp @@ -0,0 +1,24 @@ +/* + * SuffixTrieNaive.cpp + * + * Created on: 1. 11. 2014 + * Author: Tomas Pecka + */ + +#include "SuffixTrieNaive.h" + +#include <string/LinearString.h> + +namespace stringology { + +namespace indexing { + +indexes::SuffixTrie < DefaultSymbolType, unsigned > SuffixTrieNaive::construct ( const string::String & string ) { + return dispatch ( string.getData ( ) ); +} + +auto SuffixTrieNaiveLinearString = SuffixTrieNaive::RegistratorWrapper < indexes::SuffixTrie < DefaultSymbolType, unsigned >, string::LinearString < > > ( SuffixTrieNaive::construct ); + +} /* namespace indexing */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/indexing/SuffixTrieNaive.h b/alib2algo/src/stringology/indexing/SuffixTrieNaive.h new file mode 100644 index 0000000000000000000000000000000000000000..557cf3df44aaf3e37dc569c47c6bec14b6440f09 --- /dev/null +++ b/alib2algo/src/stringology/indexing/SuffixTrieNaive.h @@ -0,0 +1,68 @@ +/* + * SuffixTrieNaive.h + * + * Created on: 1. 11. 2014 + * Author: Tomas Pecka + */ + +#ifndef SUFFIX_TRIE_NAIVE_H_ +#define SUFFIX_TRIE_NAIVE_H_ + +#include <indexes/SuffixTrie.h> +#include <string/String.h> +#include <string/LinearString.h> +#include <core/multipleDispatch.hpp> + +namespace stringology { + +namespace indexing { + +/** + * Constructs suffix trie for given string. + * + * Source: Lectures MI-EVY (CTU in Prague), Year 2014, Lecture 3, slide 4 + */ + +class SuffixTrieNaive : public std::SingleDispatch < SuffixTrieNaive, indexes::SuffixTrie < DefaultSymbolType, unsigned >, const string::StringBase & > { +public: + /** + * Creates suffix trie + * @param string string to construct suffix trie for + * @return automaton + */ + static indexes::SuffixTrie < DefaultSymbolType, unsigned > construct ( const string::String & string ); + + template < class SymbolType > + static indexes::SuffixTrie < SymbolType, unsigned > construct ( const string::LinearString < SymbolType > & string ); + +}; + +template < class SymbolType > +indexes::SuffixTrie < SymbolType, unsigned > SuffixTrieNaive::construct ( const string::LinearString < SymbolType > & w ) { + std::trie < SymbolType, std::variant < void, unsigned > > trie ( std::variant < void, unsigned >::from < void > ( ) ); + std::set < unsigned > nodeAlphabet; + + for ( unsigned i = 0; i < w.getContent ( ).size ( ); i++ ) { + unsigned k = i; + std::trie < SymbolType, std::variant < void, unsigned > > * n = & trie; + + // inlined slow_find_one from MI-EVY lectures + while ( k < w.getContent ( ).size ( ) && n->getChildren ( ).count ( w.getContent ( )[k] ) ) + n = & n->getChildren ( ).find ( w.getContent ( )[k++] )->second; + + for ( ; k < w.getContent ( ).size ( ); k++ ) { + std::variant < void, unsigned > node = k + 1 < w.getContent ( ).size ( ) ? std::variant < void, unsigned >::from < void > ( ) : std::variant < void, unsigned > ( i ); + n = & n->getChildren ( ).insert ( std::make_pair ( w.getContent ( )[k], std::trie < SymbolType, std::variant < void, unsigned > > ( node ) ) ).first->second; + } + + nodeAlphabet.insert ( i ); + } + + return indexes::SuffixTrie < SymbolType, unsigned > ( w.getAlphabet ( ), nodeAlphabet, trie ); +} + +} /* namespace indexing */ + +} /* namespace stringology */ + +#endif /* SUFFIX_TRIE_NAIVE_H_ */ diff --git a/alib2algo_experimental/src/stringology/indexing/ExperimentalSuffixTrie.cpp b/alib2algo_experimental/src/stringology/indexing/ExperimentalSuffixTrie.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f796848d549966e0465ba64ab869b74b3ac36bd --- /dev/null +++ b/alib2algo_experimental/src/stringology/indexing/ExperimentalSuffixTrie.cpp @@ -0,0 +1,42 @@ +/* + * ExperimentalSuffixTrie.cpp + * + * Created on: 1. 11. 2014 + * Author: Tomas Pecka + */ + +#include "ExperimentalSuffixTrie.h" + +#include <string/LinearStringTerminatingSymbol.h> + +namespace stringology { + +namespace indexing { + +indexes::SuffixTrieTerminatingSymbol ExperimentalSuffixTrie::construct ( const string::String & string ) { + return dispatch ( string.getData ( ) ); +} + +indexes::SuffixTrieTerminatingSymbol ExperimentalSuffixTrie::construct ( const string::LinearStringTerminatingSymbol & w ) { + indexes::SuffixTrieTerminatingSymbol res ( w.getAlphabet ( ), w.getTerminatingSymbol ( ) ); + + for ( unsigned int i = 0; i < w.getContent ( ).size ( ); i++ ) { + unsigned int k = i; + indexes::SuffixTrieNodeTerminatingSymbol * n = & res.getRoot ( ); + + // inlined slow_find_one from MI-EVY lectures + while ( k < w.getContent ( ).size ( ) && n->hasChild ( w.getContent ( )[k] ) ) + n = & n->getChild ( w.getContent ( )[k++] ); + + for ( ; k < w.getContent ( ).size ( ); k++ ) + n = & n->addChild ( w.getContent ( )[k], indexes::SuffixTrieNodeTerminatingSymbol ( { } ) ); + } + + return res; +} + +auto SuffixTrieTerminatingSymbolLinearStringTerminatingSymbol = ExperimentalSuffixTrie::RegistratorWrapper < indexes::SuffixTrieTerminatingSymbol, string::LinearStringTerminatingSymbol > ( ExperimentalSuffixTrie::construct ); + +} /* namespace indexing */ + +} /* namespace stringology */ diff --git a/alib2algo_experimental/src/stringology/indexing/SuffixTrie.h b/alib2algo_experimental/src/stringology/indexing/ExperimentalSuffixTrie.h similarity index 60% rename from alib2algo_experimental/src/stringology/indexing/SuffixTrie.h rename to alib2algo_experimental/src/stringology/indexing/ExperimentalSuffixTrie.h index 66c669edd20478c47a67c4ebdb1ab72b819a5843..ffaf062f819baf5b6c5c9d77854e39968da6913e 100644 --- a/alib2algo_experimental/src/stringology/indexing/SuffixTrie.h +++ b/alib2algo_experimental/src/stringology/indexing/ExperimentalSuffixTrie.h @@ -1,17 +1,15 @@ /* - * SuffixTrie.h + * ExperimentalSuffixTrie.h * * Created on: 1. 11. 2014 * Author: Tomas Pecka */ -#ifndef SUFFIX_TRIE_H_ -#define SUFFIX_TRIE_H_ +#ifndef EXPERIMENTAL_SUFFIX_TRIE_H_ +#define EXPERIMENTAL_SUFFIX_TRIE_H_ -#include <indexes/suffixTrie/SuffixTrieFinalMark.h> #include <indexes/suffixTrie/SuffixTrieTerminatingSymbol.h> #include <string/String.h> -#include <string/LinearString.h> #include <string/LinearStringTerminatingSymbol.h> #include <core/multipleDispatch.hpp> @@ -25,16 +23,15 @@ namespace indexing { * Source: Lectures MI-EVY (CTU in Prague), Year 2014, Lecture 3, slide 4 */ -class SuffixTrie : public std::SingleDispatch < SuffixTrie, indexes::SuffixTrieFinalMark, const string::StringBase & > { +class ExperimentalSuffixTrie : public std::SingleDispatch < ExperimentalSuffixTrie, indexes::SuffixTrieTerminatingSymbol, const string::StringBase & > { public: /** * Creates suffix trie * @param string string to construct suffix trie for * @return automaton */ - static indexes::SuffixTrieFinalMark construct ( const string::String & string ); + static indexes::SuffixTrieTerminatingSymbol construct ( const string::String & string ); - static indexes::SuffixTrieFinalMark construct ( const string::LinearString < > & string ); static indexes::SuffixTrieTerminatingSymbol construct ( const string::LinearStringTerminatingSymbol & string ); }; @@ -43,4 +40,4 @@ public: } /* namespace stringology */ -#endif /* SUFFIX_TRIE_H_ */ +#endif /* EXPERIMENTAL_SUFFIX_TRIE_H_ */ diff --git a/alib2algo_experimental/src/stringology/indexing/SuffixTrie.cpp b/alib2algo_experimental/src/stringology/indexing/SuffixTrie.cpp deleted file mode 100644 index e2d021b853e71559932ec7d92430929540a88fad..0000000000000000000000000000000000000000 --- a/alib2algo_experimental/src/stringology/indexing/SuffixTrie.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * SuffixTrie.cpp - * - * Created on: 1. 11. 2014 - * Author: Tomas Pecka - */ - -#include "SuffixTrie.h" - -#include <string/LinearString.h> -#include <string/LinearStringTerminatingSymbol.h> -#include <string/Epsilon.h> -#include <alphabet/EndSymbol.h> - -namespace stringology { - -namespace indexing { - -indexes::SuffixTrieFinalMark SuffixTrie::construct ( const string::String & string ) { - return dispatch ( string.getData ( ) ); -} - -indexes::SuffixTrieFinalMark SuffixTrie::construct ( const string::LinearString < > & w ) { - indexes::SuffixTrieFinalMark res ( w.getAlphabet ( ) ); - - for ( unsigned int i = 0; i < w.getContent ( ).size ( ); i++ ) { - unsigned int k = i; - indexes::SuffixTrieNodeFinalMark * n = & res.getRoot ( ); - - // inlined slow_find_one from MI-EVY lectures - while ( k < w.getContent ( ).size ( ) && n->hasChild ( w.getContent ( )[k] ) ) - n = & n->getChild ( w.getContent ( )[k++] ); - - for ( ; k < w.getContent ( ).size ( ); k++ ) - n = & n->addChild ( w.getContent ( )[k], indexes::SuffixTrieNodeFinalMark ( { }, false ) ); - - n->setFinalMark ( true ); - } - - return res; -} - -auto SuffixTrieLinearString = SuffixTrie::RegistratorWrapper < indexes::SuffixTrieFinalMark, string::LinearString < > > ( SuffixTrie::construct ); - -indexes::SuffixTrieTerminatingSymbol SuffixTrie::construct ( const string::LinearStringTerminatingSymbol & w ) { - indexes::SuffixTrieTerminatingSymbol res ( w.getAlphabet ( ), w.getTerminatingSymbol ( ) ); - - for ( unsigned int i = 0; i < w.getContent ( ).size ( ); i++ ) { - unsigned int k = i; - indexes::SuffixTrieNodeTerminatingSymbol * n = & res.getRoot ( ); - - // inlined slow_find_one from MI-EVY lectures - while ( k < w.getContent ( ).size ( ) && n->hasChild ( w.getContent ( )[k] ) ) - n = & n->getChild ( w.getContent ( )[k++] ); - - for ( ; k < w.getContent ( ).size ( ); k++ ) - n = & n->addChild ( w.getContent ( )[k], indexes::SuffixTrieNodeTerminatingSymbol ( { } ) ); - } - - return res; -} - -auto SuffixTrieTerminatingSymbolLinearStringTerminatingSymbol = SuffixTrie::RegistratorWrapper < indexes::SuffixTrieTerminatingSymbol, string::LinearStringTerminatingSymbol > ( SuffixTrie::construct ); - -} /* namespace indexing */ - -} /* namespace stringology */ diff --git a/alib2data/src/indexes/SuffixTrie.cpp b/alib2data/src/indexes/SuffixTrie.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c84c9ea62b386d65573908a8aac75e6b2a75d60 --- /dev/null +++ b/alib2data/src/indexes/SuffixTrie.cpp @@ -0,0 +1,14 @@ +/* + * SuffixTrie.cpp + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#include "SuffixTrie.h" + +namespace alib { + +auto suffixTreeParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrie < > > ( ); + +} /* namespace alib */ diff --git a/alib2data/src/indexes/SuffixTrie.h b/alib2data/src/indexes/SuffixTrie.h new file mode 100644 index 0000000000000000000000000000000000000000..c7adc5791593628bc4e5ebf34297899a0a37baec --- /dev/null +++ b/alib2data/src/indexes/SuffixTrie.h @@ -0,0 +1,282 @@ +/* + * SuffixTrie.h + * + * Created on: Nov 23, 2013 + * Author: Jan Travnicek + */ + +#ifndef SUFFIX_TRIE_H_ +#define SUFFIX_TRIE_H_ + +#include <string> +#include <set> +#include <trie> +#include <variant> +#include <iostream> +#include <algorithm> +#include <sstream> + +#include <core/components.hpp> +#include <exception/CommonException.h> + +#include <object/Object.h> +#include <object/UniqueObject.h> +#include <object/ObjectBase.h> + +#include <sax/FromXMLParserHelper.h> +#include <core/xmlApi.hpp> +#include <primitive/Bool.h> + +#include <container/ObjectsSet.h> +#include <container/ObjectsTrie.h> +#include <container/ObjectsVariant.h> +#include <object/Void.h> + +namespace indexes { + +class EdgeAlphabet; +class NodeAlphabet; + +/** + * Represents regular expression parsed from the XML. Regular expression is stored + * as a tree of RegExpElement. + */ +template < class EdgeType = alib::Object, class NodeType = alib::Object > +class SuffixTrie : public alib::ObjectBase, public std::Components < SuffixTrie < EdgeType, NodeType >, EdgeType, std::tuple < EdgeAlphabet >, std::tuple < >, NodeType, std::tuple < NodeAlphabet >, std::tuple < > > { +protected: + std::trie < EdgeType, std::variant < void, NodeType > > m_trie; + +public: + /** + * @copydoc SuffixTrieNode::clone() const + */ + virtual ObjectBase * clone ( ) const; + + /** + * @copydoc SuffixTrieNode::plunder() const + */ + virtual ObjectBase * plunder ( ) &&; + + explicit SuffixTrie ( std::set < EdgeType > edgeAlphabet, std::set < NodeType > nodeAlphabet ); + explicit SuffixTrie ( std::set < EdgeType > edgeAlphabet, std::set < NodeType > nodeAlphabet, std::trie < EdgeType, std::variant < void, NodeType > > trie ); + explicit SuffixTrie ( std::trie < EdgeType, std::variant < void, NodeType > > trie ); + + void checkTrie ( const std::trie < EdgeType, std::variant < void, NodeType > > & trie ); + + /** + * @return Root node of the trie + */ + const std::trie < EdgeType, std::variant < void, NodeType > > & getRoot ( ) const; + + const std::set < EdgeType > & getEdgeAlphabet ( ) const { + return this->template accessComponent < EdgeAlphabet > ( ).get ( ); + } + + const std::set < NodeType > & getNodeAlphabet ( ) const { + return this->template accessComponent < NodeAlphabet > ( ).get ( ); + } + + /** + * Sets the root node of the regular expression tree + * @param tree root node to set + */ + void setTree ( std::trie < EdgeType, std::variant < void, NodeType > > tree ); + + /** + * Removes symbol from the alphabet of symbol available in the regular expression + * @param symbol removed symbol from the alphabet + */ + bool removeSymbolFromEdgeAlphabet ( const EdgeType & symbol ); + + /** + * Removes symbol from the alphabet of symbol available in the regular expression + * @param symbol removed symbol from the alphabet + */ + bool removeSymbolFromNodeAlphabet ( const NodeType & symbol ); + + /** + * Prints XML representation of the tree to the output stream. + * @param out output stream to which print the tree + * @param tree tree to print + */ + virtual void operator >>( std::ostream & out ) const; + + virtual int compare ( const ObjectBase & other ) const { + if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); + + return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) ); + } + + virtual int compare ( const SuffixTrie & other ) const; + + virtual explicit operator std::string ( ) const; + + static const std::string & getXmlTagName() { + static std::string xmlTagName = "SuffixTrie"; + + return xmlTagName; + } + + static SuffixTrie parse ( std::deque < sax::Token >::iterator & input ); + + void compose ( std::deque < sax::Token > & out ) const; + + virtual alib::ObjectBase * inc ( ) &&; +}; + +} /* namespace indexes */ + +namespace indexes { + +template < class EdgeType, class NodeType > +SuffixTrie < EdgeType, NodeType >::SuffixTrie ( std::set < EdgeType > edgeAlphabet, std::set < NodeType > nodeAlphabet ) : SuffixTrie ( std::move ( edgeAlphabet ), std::move ( nodeAlphabet ), std::trie < EdgeType, std::variant < void, NodeType > > ( std::variant < void, NodeType >::template from < void > ( ) ) ) { +} + +template < class EdgeType, class NodeType > +SuffixTrie < EdgeType, NodeType >::SuffixTrie ( std::set < EdgeType > edgeAlphabet, std::set < NodeType > nodeAlphabet, std::trie < EdgeType, std::variant < void, NodeType > > trie ) : std::Components < SuffixTrie, EdgeType, std::tuple < EdgeAlphabet >, std::tuple < >, NodeType, std::tuple < NodeAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( edgeAlphabet ) ), std::tuple < > ( ), std::make_tuple ( std::move ( nodeAlphabet ) ), std::tuple < > ( ) ), m_trie ( std::move ( trie ) ) { + checkTrie ( this->m_trie ); +} + +template < class EdgeType, class NodeType > +SuffixTrie < EdgeType, NodeType >::SuffixTrie ( std::trie < EdgeType, std::variant < void, NodeType > > trie ) : SuffixTrie ( computeMinimalEdgeAlphabet ( trie ), computeMinimalNodeAlphabet ( trie ), trie ) { +} + +template < class EdgeType, class NodeType > +alib::ObjectBase * SuffixTrie < EdgeType, NodeType >::clone ( ) const { + return new SuffixTrie ( * this ); +} + +template < class EdgeType, class NodeType > +alib::ObjectBase * SuffixTrie < EdgeType, NodeType >::plunder ( ) && { + return new SuffixTrie ( std::move ( * this ) ); +} + +template < class EdgeType, class NodeType > +void SuffixTrie < EdgeType, NodeType >::checkTrie ( const std::trie < EdgeType, std::variant < void, NodeType > > & trie ) { + if ( trie.getData ( ).template is < NodeType > ( ) && ! getNodeAlphabet ( ).count ( trie.getData ( ).template get < NodeType > ( ) ) ) + throw exception::CommonException ( "Node symbols not in the node alphabet." ); + + for ( const std::pair < const EdgeType, std::trie < EdgeType, std::variant < void, NodeType > > > & child : trie.getChildren ( ) ) { + if ( ! getEdgeAlphabet ( ).count ( child.first ) ) + throw exception::CommonException ( "Node symbols not in the node alphabet." ); + checkTrie ( child.second ); + } +} + +template < class EdgeType, class NodeType > +const std::trie < EdgeType, std::variant < void, NodeType > > & SuffixTrie < EdgeType, NodeType >::getRoot ( ) const { + return m_trie; +} + +template < class EdgeType, class NodeType > +void SuffixTrie < EdgeType, NodeType >::setTree ( std::trie < EdgeType, std::variant < void, NodeType > > trie ) { + checkTrie ( trie ); + this->m_trie = std::move ( trie ).plunder ( ); +} + +template < class EdgeType, class NodeType > +void SuffixTrie < EdgeType, NodeType >::operator >>( std::ostream & out ) const { + out << "(SuffixTrie " << this->m_trie << ")"; +} + +template < class EdgeType, class NodeType > +int SuffixTrie < EdgeType, NodeType >::compare ( const SuffixTrie & other ) const { + auto first = std::tie ( getRoot ( ), getEdgeAlphabet ( ), getNodeAlphabet ( ) ); + auto second = std::tie ( other.getRoot ( ), other.getEdgeAlphabet ( ), getNodeAlphabet ( ) ); + + std::compare < decltype ( first ) > comp; + + return comp ( first, second ); +} + +template < class EdgeType, class NodeType > +SuffixTrie < EdgeType, NodeType >::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +template < class EdgeType, class NodeType > +SuffixTrie < EdgeType, NodeType > SuffixTrie < EdgeType, NodeType >::parse ( std::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, SuffixTrie::getXmlTagName() ); + std::set < EdgeType > edgeAlphabet = alib::xmlApi < std::set < EdgeType > >::parse ( input ); + std::set < NodeType > nodeAlphabet = alib::xmlApi < std::set < NodeType > >::parse ( input ); + std::trie < EdgeType, std::variant < void, NodeType > > root = alib::xmlApi < std::trie < EdgeType, std::variant < void, NodeType > > >::parse ( input ); + SuffixTrie < EdgeType, NodeType > trie ( std::move ( edgeAlphabet ), std::move ( nodeAlphabet ), std::move ( root ) ); + + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, SuffixTrie::getXmlTagName() ); + return trie; +} + +template < class EdgeType, class NodeType > +void SuffixTrie < EdgeType, NodeType >::compose ( std::deque < sax::Token > & out ) const { + out.emplace_back ( SuffixTrie::getXmlTagName(), sax::Token::TokenType::START_ELEMENT ); + alib::xmlApi < std::set < EdgeType > >::compose ( out, getEdgeAlphabet ( ) ); + alib::xmlApi < std::set < NodeType > >::compose ( out, getNodeAlphabet ( ) ); + alib::xmlApi < std::trie < EdgeType, std::variant < void, NodeType > > >::compose ( out, getRoot ( ) ); + out.emplace_back ( SuffixTrie::getXmlTagName(), sax::Token::TokenType::END_ELEMENT ); +} + +template < class EdgeType, class NodeType > +alib::ObjectBase* SuffixTrie < EdgeType, NodeType >::inc() && { + return new alib::UniqueObject(alib::Object(std::move(*this)), primitive::Integer(0)); +} + +} /* namespace indexes */ + +namespace std { + +template < class EdgeType, class NodeType > +class ComponentConstraint < indexes::SuffixTrie < EdgeType, NodeType >, EdgeType, indexes::EdgeAlphabet > { + + static bool used ( const std::trie < EdgeType, std::variant < void, NodeType > > & trie, const EdgeType & symbol ) { + for ( const std::pair < const EdgeType, std::trie < EdgeType, std::variant < void, NodeType > > > & child : trie.getChildren ( ) ) { + if ( symbol == child.first || checkTrie ( trie, child.second ) ) + return true; + } + return false; + } + +public: + static bool used ( const indexes::SuffixTrie < EdgeType, NodeType > & index, const EdgeType & symbol ) { + return used ( index.getRoot ( ), symbol ); + } + + static bool available ( const indexes::SuffixTrie < EdgeType, NodeType > &, const EdgeType & ) { + return true; + } + + static void valid ( const indexes::SuffixTrie < EdgeType, NodeType > &, const EdgeType & ) { + } +}; + +template < class EdgeType, class NodeType > +class ComponentConstraint < indexes::SuffixTrie < EdgeType, NodeType >, NodeType, indexes::NodeAlphabet > { + + static bool used ( const std::trie < EdgeType, std::variant < void, NodeType > > & trie, const NodeType & symbol ) { + if ( trie.getData ( ).template is < NodeType > ( ) && symbol == trie.getData ( ).template get < NodeType > ( ) ) + return true; + + for ( const std::pair < const EdgeType, std::trie < EdgeType, std::variant < void, NodeType > > > & child : trie.getChildren ( ) ) { + if ( used ( trie, child.second ) ) + return true; + } + return false; + } + +public: + static bool used ( const indexes::SuffixTrie < EdgeType, NodeType > & index, const NodeType & symbol ) { + return used ( index.getRoot ( ), symbol ); + } + + static bool available ( const indexes::SuffixTrie < EdgeType, NodeType > &, const NodeType & ) { + return true; + } + + static void valid ( const indexes::SuffixTrie < EdgeType, NodeType > &, const NodeType & ) { + } +}; + +} /* namespace std */ + +#endif /* SUFFIX_TRIE_H_ */ diff --git a/alib2data_experimental/src/indexes/common/IndexFromXMLParser.cpp b/alib2data_experimental/src/indexes/common/IndexFromXMLParser.cpp index 3055bd57f6e839c8e2059d9ffab7a8d22f967b57..ee54c4cf77203e8b519047ea2b3a266a0a1f44ba 100644 --- a/alib2data_experimental/src/indexes/common/IndexFromXMLParser.cpp +++ b/alib2data_experimental/src/indexes/common/IndexFromXMLParser.cpp @@ -24,23 +24,6 @@ std::set < DefaultSymbolType > IndexFromXMLParser::parseAlphabet ( std::deque < return symbols; } -SuffixTrieNodeFinalMark * IndexFromXMLParser::parseSuffixTrieNodeFinalMark ( std::deque < sax::Token >::iterator & input ) { - sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "node" ); - std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * > children; - bool finalMark = alib::xmlApi < bool >::parse ( input ); - - while ( sax::FromXMLParserHelper::isTokenType ( input, sax::Token::TokenType::START_ELEMENT ) ) { - sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "child" ); - DefaultSymbolType symbol = alib::xmlApi < DefaultSymbolType >::parse ( input ); - children.insert ( std::make_pair ( std::move ( symbol ), parseSuffixTrieNodeFinalMark ( input ) ) ); - sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "child" ); - } - - sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, "node" ); - - return new SuffixTrieNodeFinalMark ( children, finalMark ); -} - SuffixTrieNodeTerminatingSymbol * IndexFromXMLParser::parseSuffixTrieNodeTerminatingSymbol ( std::deque < sax::Token >::iterator & input ) { sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, "node" ); std::map < DefaultSymbolType, SuffixTrieNodeTerminatingSymbol * > children; diff --git a/alib2data_experimental/src/indexes/common/IndexFromXMLParser.h b/alib2data_experimental/src/indexes/common/IndexFromXMLParser.h index 8f1de94b397871240f5421e88720af1aabb23e9a..19b4f5e5162acab1bdb06482f3873ff38d6cc6cd 100644 --- a/alib2data_experimental/src/indexes/common/IndexFromXMLParser.h +++ b/alib2data_experimental/src/indexes/common/IndexFromXMLParser.h @@ -13,7 +13,6 @@ #include <sax/Token.h> #include <alphabet/SymbolFeatures.h> -#include "../suffixTrie/SuffixTrieNodeFinalMark.h" #include "../suffixTrie/SuffixTrieNodeTerminatingSymbol.h" namespace indexes { @@ -23,7 +22,6 @@ namespace indexes { */ class IndexFromXMLParser { public: - static SuffixTrieNodeFinalMark * parseSuffixTrieNodeFinalMark ( std::deque < sax::Token >::iterator & input ); static SuffixTrieNodeTerminatingSymbol * parseSuffixTrieNodeTerminatingSymbol ( std::deque < sax::Token >::iterator & input ); static std::set < DefaultSymbolType > parseAlphabet ( std::deque < sax::Token >::iterator & input ); }; diff --git a/alib2data_experimental/src/indexes/common/IndexToXMLComposer.cpp b/alib2data_experimental/src/indexes/common/IndexToXMLComposer.cpp index 52b9e5d97021038e2af81adb1f36acce7cbd5468..d4e06a1c8992ac294cacff422ac1d93325760a7b 100644 --- a/alib2data_experimental/src/indexes/common/IndexToXMLComposer.cpp +++ b/alib2data_experimental/src/indexes/common/IndexToXMLComposer.cpp @@ -20,20 +20,6 @@ void IndexToXMLComposer::composeAlphabet ( std::deque < sax::Token > & out, cons out.emplace_back ( sax::Token ( "alphabet", sax::Token::TokenType::END_ELEMENT ) ); } -void IndexToXMLComposer::composeNode ( std::deque < sax::Token > & out, const SuffixTrieNodeFinalMark & node ) { - out.emplace_back ( sax::Token ( "node", sax::Token::TokenType::START_ELEMENT ) ); - alib::xmlApi < bool >::compose ( out, node.getFinalMark ( ) ); - - for ( const auto & child : node.getChildren ( ) ) { - out.emplace_back ( sax::Token ( "child", sax::Token::TokenType::START_ELEMENT ) ); - alib::xmlApi < DefaultSymbolType >::compose ( out, child.first ); - composeNode ( out, * child.second ); - out.emplace_back ( sax::Token ( "child", sax::Token::TokenType::END_ELEMENT ) ); - } - - out.emplace_back ( sax::Token ( "node", sax::Token::TokenType::END_ELEMENT ) ); -} - void IndexToXMLComposer::composeNode ( std::deque < sax::Token > & out, const SuffixTrieNodeTerminatingSymbol & node ) { out.emplace_back ( sax::Token ( "node", sax::Token::TokenType::START_ELEMENT ) ); diff --git a/alib2data_experimental/src/indexes/common/IndexToXMLComposer.h b/alib2data_experimental/src/indexes/common/IndexToXMLComposer.h index 7cfd6cf55c13e8dcac099a1f0e0737c3108f0609..0bbc3c1a5bcc9a507062ac10fd21b265f1521f36 100644 --- a/alib2data_experimental/src/indexes/common/IndexToXMLComposer.h +++ b/alib2data_experimental/src/indexes/common/IndexToXMLComposer.h @@ -12,7 +12,6 @@ #include <set> #include <alphabet/SymbolFeatures.h> #include <sax/Token.h> -#include "../suffixTrie/SuffixTrieNodeFinalMark.h" #include "../suffixTrie/SuffixTrieNodeTerminatingSymbol.h" namespace indexes { @@ -24,7 +23,6 @@ class IndexToXMLComposer { public: static void composeAlphabet ( std::deque < sax::Token > & out, const std::set < DefaultSymbolType > & symbols ); - static void composeNode ( std::deque < sax::Token > & out, const SuffixTrieNodeFinalMark & node ); static void composeNode ( std::deque < sax::Token > & out, const SuffixTrieNodeTerminatingSymbol & node ); }; diff --git a/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieFinalMark.cpp b/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieFinalMark.cpp deleted file mode 100644 index 1f291a2d00a15978951c5a7c133fcbf801e24bad..0000000000000000000000000000000000000000 --- a/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieFinalMark.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/* - * SuffixTrieFinalMark.cpp - * - * Created on: Nov 23, 2013 - * Author: Jan Travnicek - */ - -#include "SuffixTrieFinalMark.h" -#include "SuffixTrieTerminatingSymbol.h" -#include <exception/CommonException.h> - -#include <iostream> -#include <algorithm> -#include <sstream> - -#include <sax/FromXMLParserHelper.h> -#include "../common/IndexFromXMLParser.h" -#include "../common/IndexToXMLComposer.h" -#include <object/Object.h> -#include <core/xmlApi.hpp> -#include <object/UniqueObject.h> - -namespace indexes { - -SuffixTrieFinalMark::SuffixTrieFinalMark ( std::set < DefaultSymbolType > alphabet ) : SuffixTrieFinalMark ( std::move ( alphabet ), SuffixTrieNodeFinalMark ( { }, true ) ) { -} - -SuffixTrieFinalMark::SuffixTrieFinalMark ( std::set < DefaultSymbolType > alphabet, SuffixTrieNodeFinalMark tree ) : std::Components < SuffixTrieFinalMark, DefaultSymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( alphabet ) ), std::tuple < > ( ) ), m_tree ( NULL ) { - setTree ( std::move ( tree ) ); -} - -SuffixTrieFinalMark::SuffixTrieFinalMark ( SuffixTrieNodeFinalMark tree ) : SuffixTrieFinalMark ( tree.computeMinimalAlphabet ( ), tree ) { -} - -SuffixTrieFinalMark::SuffixTrieFinalMark ( const SuffixTrieTerminatingSymbol & other ) : std::Components < SuffixTrieFinalMark, DefaultSymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( other.getAlphabet ( ) ), std::tuple < > ( ) ), m_tree ( NULL ) { - this->accessComponent < GeneralAlphabet > ( ).remove ( other.accessElement < TerminatingSymbol > ( ).get ( ) ); - setTree ( SuffixTrieNodeFinalMark ( other.getRoot ( ), other.accessElement < TerminatingSymbol > ( ).get ( ) ) ); -} - -SuffixTrieFinalMark::SuffixTrieFinalMark ( const SuffixTrieFinalMark & other ) : std::Components < SuffixTrieFinalMark, DefaultSymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( other.getAlphabet ( ) ), std::tuple < > ( ) ), m_tree ( other.m_tree->clone ( ) ) { - this->m_tree->attachTree ( this ); -} - -SuffixTrieFinalMark::SuffixTrieFinalMark ( SuffixTrieFinalMark && other ) noexcept : std::Components < SuffixTrieFinalMark, DefaultSymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( other.accessComponent < GeneralAlphabet > ( ).get ( ) ) ), std::tuple < > ( ) ), m_tree ( other.m_tree ) { - this->m_tree->attachTree ( this ); - other.m_tree = NULL; -} - -alib::ObjectBase * SuffixTrieFinalMark::clone ( ) const { - return new SuffixTrieFinalMark ( * this ); -} - -alib::ObjectBase * SuffixTrieFinalMark::plunder ( ) && { - return new SuffixTrieFinalMark ( std::move ( * this ) ); -} - -SuffixTrieFinalMark & SuffixTrieFinalMark::operator =( const SuffixTrieFinalMark & other ) { - if ( this == & other ) - return * this; - - * this = SuffixTrieFinalMark ( other ); - - return * this; -} - -SuffixTrieFinalMark & SuffixTrieFinalMark::operator =( SuffixTrieFinalMark && other ) noexcept { - std::swap ( this->m_tree, other.m_tree ); - std::swap ( accessComponent < GeneralAlphabet > ( ).get ( ), other.accessComponent < GeneralAlphabet > ( ).get ( ) ); - - return * this; -} - -SuffixTrieFinalMark::~SuffixTrieFinalMark ( ) noexcept { - delete m_tree; -} - -const SuffixTrieNodeFinalMark & SuffixTrieFinalMark::getRoot ( ) const { - return * m_tree; -} - -SuffixTrieNodeFinalMark & SuffixTrieFinalMark::getRoot ( ) { - return * m_tree; -} - -void SuffixTrieFinalMark::setTree ( SuffixTrieNodeFinalMark tree ) { - delete this->m_tree; - this->m_tree = std::move ( tree ).plunder ( ); - - if ( !this->m_tree->attachTree ( this ) ) { - delete this->m_tree; - throw exception::CommonException ( "Input symbols not in the alphabet." ); - } -} - -void SuffixTrieFinalMark::operator >>( std::ostream & out ) const { - out << "(SuffixTrieFinalMark " << * ( this->m_tree ) << ")"; -} - -std::ostream & operator <<( std::ostream & out, const SuffixTrieFinalMark & instance ) { - instance >> out; - return out; -} - -int SuffixTrieFinalMark::compare ( const SuffixTrieFinalMark & other ) const { - auto first = std::tie ( * m_tree, getAlphabet() ); - auto second = std::tie ( * other.m_tree, other.getAlphabet() ); - - std::compare < decltype ( first ) > comp; - - return comp ( first, second ); -} - -SuffixTrieFinalMark::operator std::string ( ) const { - std::stringstream ss; - ss << * this; - return ss.str ( ); -} - -SuffixTrieFinalMark SuffixTrieFinalMark::parse ( std::deque < sax::Token >::iterator & input ) { - sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, SuffixTrieFinalMark::getXmlTagName() ); - std::set < DefaultSymbolType > rankedAlphabet = IndexFromXMLParser::parseAlphabet ( input ); - SuffixTrieNodeFinalMark * root = IndexFromXMLParser::parseSuffixTrieNodeFinalMark ( input ); - SuffixTrieFinalMark tree ( std::move ( rankedAlphabet ), std::move ( * root ) ); - - delete root; - sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, SuffixTrieFinalMark::getXmlTagName() ); - return tree; -} - -void SuffixTrieFinalMark::compose ( std::deque < sax::Token > & out ) const { - out.emplace_back ( SuffixTrieFinalMark::getXmlTagName(), sax::Token::TokenType::START_ELEMENT ); - IndexToXMLComposer::composeAlphabet ( out, getAlphabet ( ) ); - IndexToXMLComposer::composeNode ( out, * m_tree ); - out.emplace_back ( SuffixTrieFinalMark::getXmlTagName(), sax::Token::TokenType::END_ELEMENT ); -} - -alib::ObjectBase* SuffixTrieFinalMark::inc() && { - return new alib::UniqueObject(alib::Object(std::move(*this)), primitive::Integer(0)); -} - -} /* namespace indexes */ - -namespace alib { - -auto suffixTreeFinalMarkParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::SuffixTrieFinalMark > ( ); - -} /* namespace alib */ diff --git a/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieFinalMark.h b/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieFinalMark.h deleted file mode 100644 index 0efc4903b9ffd55f79d79f264700b6b482c12c29..0000000000000000000000000000000000000000 --- a/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieFinalMark.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * SuffixTrieFinalMark.h - * - * Created on: Nov 23, 2013 - * Author: Jan Travnicek - */ - -#ifndef SUFFIX_TRIE_FINAL_MARK_H_ -#define SUFFIX_TRIE_FINAL_MARK_H_ - -#include <vector> -#include <list> -#include <string> -#include <set> -#include <core/components.hpp> -#include "SuffixTrieNodeFinalMark.h" -#include <object/ObjectBase.h> - -namespace indexes { - -class SuffixTrieTerminatingSymbol; -class GeneralAlphabet; - -/** - * Represents regular expression parsed from the XML. Regular expression is stored - * as a tree of RegExpElement. - */ -class SuffixTrieFinalMark : public alib::ObjectBase, public std::Components < SuffixTrieFinalMark, DefaultSymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > { -protected: - SuffixTrieNodeFinalMark * m_tree; - -public: - /** - * @copydoc SuffixTrieNodeFinalMark::clone() const - */ - virtual ObjectBase * clone ( ) const; - - /** - * @copydoc SuffixTrieNodeFinalMark::plunder() const - */ - virtual ObjectBase * plunder ( ) &&; - - explicit SuffixTrieFinalMark ( std::set < DefaultSymbolType > alphabet ); - explicit SuffixTrieFinalMark ( std::set < DefaultSymbolType > alphabet, SuffixTrieNodeFinalMark tree ); - explicit SuffixTrieFinalMark ( SuffixTrieNodeFinalMark tree ); - - explicit SuffixTrieFinalMark ( const SuffixTrieTerminatingSymbol & tree ); - - /** - * Copy constructor. - * @param other tree to copy - */ - SuffixTrieFinalMark ( const SuffixTrieFinalMark & other ); - SuffixTrieFinalMark ( SuffixTrieFinalMark && other ) noexcept; - SuffixTrieFinalMark & operator =( const SuffixTrieFinalMark & other ); - SuffixTrieFinalMark & operator =( SuffixTrieFinalMark && other ) noexcept; - ~SuffixTrieFinalMark ( ) noexcept; - - /** - * @return Root node of the regular expression tree - */ - const SuffixTrieNodeFinalMark & getRoot ( ) const; - - /** - * @return Root node of the regular expression tree - */ - SuffixTrieNodeFinalMark & getRoot ( ); - - const std::set < DefaultSymbolType > & getAlphabet ( ) const { - return accessComponent < GeneralAlphabet > ( ).get ( ); - } - - /** - * Sets the root node of the regular expression tree - * @param tree root node to set - */ - void setTree ( SuffixTrieNodeFinalMark tree ); - - /** - * Removes symbol from the alphabet of symbol available in the regular expression - * @param symbol removed symbol from the alphabet - */ - bool removeSymbolFromAlphabet ( const DefaultSymbolType & symbol ); - - /** - * Prints XML representation of the tree to the output stream. - * @param out output stream to which print the tree - * @param tree tree to print - */ - virtual void operator >>( std::ostream & out ) const; - - friend std::ostream & operator <<( std::ostream & out, const SuffixTrieFinalMark & instance ); - - virtual int compare ( const ObjectBase & other ) const { - if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); - - return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) ); - } - - virtual int compare ( const SuffixTrieFinalMark & other ) const; - - virtual explicit operator std::string ( ) const; - - static const std::string & getXmlTagName() { - static std::string xmlTagName = "SuffixTrieFinalMark"; - - return xmlTagName; - } - - static SuffixTrieFinalMark parse ( std::deque < sax::Token >::iterator & input ); - - void compose ( std::deque < sax::Token > & out ) const; - - virtual alib::ObjectBase * inc ( ) &&; -}; - -} /* namespace tree */ - -namespace std { - -template < > -class ComponentConstraint< indexes::SuffixTrieFinalMark, DefaultSymbolType, indexes::GeneralAlphabet > { -public: - static bool used ( const indexes::SuffixTrieFinalMark & index, const DefaultSymbolType & symbol ) { - return index.getRoot ( ).testSymbol ( symbol ); - } - - static bool available ( const indexes::SuffixTrieFinalMark &, const DefaultSymbolType & ) { - return true; - } - - static void valid ( const indexes::SuffixTrieFinalMark &, const DefaultSymbolType & ) { - } -}; - -} /* namespace std */ - -#endif /* SUFFIX_TRIE_FINAL_MARK_H_ */ diff --git a/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.cpp b/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.cpp deleted file mode 100644 index cde5382fdca1af2180e0e7b3a21e2663b7ab69f3..0000000000000000000000000000000000000000 --- a/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.cpp +++ /dev/null @@ -1,226 +0,0 @@ -/* - * SuffixTrieNodeFinalMark.cpp - * - * Created on: Nov 23, 2013 - * Author: Stepan Plachy - */ - -#include "SuffixTrieNodeFinalMark.h" -#include "SuffixTrieNodeTerminatingSymbol.h" -#include "SuffixTrieFinalMark.h" -#include <exception/CommonException.h> -#include <sstream> - -namespace indexes { - -SuffixTrieNodeFinalMark::SuffixTrieNodeFinalMark ( std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * > children, bool finalMark ) : m_children ( std::move ( children ) ), m_finalMark ( finalMark ), parentTree ( NULL ) { - for ( auto & element : this->m_children ) - element.second->parent = this; - - this->attachTree ( NULL ); - this->parent = NULL; -} - -SuffixTrieNodeFinalMark::SuffixTrieNodeFinalMark ( const SuffixTrieNodeTerminatingSymbol & other, const DefaultSymbolType & terminatingSymbol ) : m_finalMark ( false ), parentTree ( NULL ) { - for ( auto & element : other.getChildren ( ) ) - if ( element.first == terminatingSymbol ) - this->m_finalMark = true; - else - this->m_children.insert ( std::make_pair ( element.first, new SuffixTrieNodeFinalMark ( * element.second, terminatingSymbol ) ) ); - - for ( auto & element : this->m_children ) - element.second->parent = this; - - this->attachTree ( NULL ); - this->parent = NULL; -} - -SuffixTrieNodeFinalMark::SuffixTrieNodeFinalMark ( const SuffixTrieNodeFinalMark & other ) : m_finalMark ( other.m_finalMark ), parentTree ( NULL ) { - for ( const auto & element : other.m_children ) - m_children.insert ( std::make_pair ( element.first, element.second->clone ( ) ) ); - - for ( auto & element : this->m_children ) - element.second->parent = this; - - this->attachTree ( NULL ); - this->parent = NULL; -} - -SuffixTrieNodeFinalMark::SuffixTrieNodeFinalMark ( SuffixTrieNodeFinalMark && other ) noexcept : m_children ( std::move ( other.m_children ) ), m_finalMark ( other.m_finalMark ), parentTree ( NULL ) { - other.m_children.clear ( ); - - for ( auto & element : this->m_children ) - element.second->parent = this; - - this->attachTree ( NULL ); - this->parent = NULL; -} - -SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::operator =( const SuffixTrieNodeFinalMark & other ) { - if ( this == & other ) - return * this; - - * this = SuffixTrieNodeFinalMark ( other ); - - return * this; -} - -SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::operator =( SuffixTrieNodeFinalMark && other ) noexcept { - std::swap ( this->m_children, other.m_children ); - std::swap ( this->m_finalMark, other.m_finalMark ); - std::swap ( this->parentTree, other.parentTree ); // this->parentTree is stored within other.parentTree and it is reattached on the next line - - for ( auto & element : this->m_children ) - element.second->parent = this; - - this->attachTree ( other.parentTree ); - - return * this; -} - -SuffixTrieNodeFinalMark::~SuffixTrieNodeFinalMark ( ) noexcept { - for ( auto element : m_children ) - delete element.second; - - m_children.clear ( ); -} - -const std::map < const DefaultSymbolType, const SuffixTrieNodeFinalMark * > & SuffixTrieNodeFinalMark::getChildren ( ) const { - return * reinterpret_cast < const std::map < const DefaultSymbolType, const SuffixTrieNodeFinalMark * > * > ( & m_children ); -} - -bool SuffixTrieNodeFinalMark::getFinalMark ( ) const { - return m_finalMark; -} - -void SuffixTrieNodeFinalMark::setFinalMark ( bool newFinalMark ) { - m_finalMark = newFinalMark; -} - -const std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * > & SuffixTrieNodeFinalMark::getChildren ( ) { - return m_children; -} - -SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::getChild ( const DefaultSymbolType & symbol ) { - std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * >::const_iterator iter = m_children.find ( symbol ); - - if ( iter == m_children.end ( ) ) throw exception::CommonException ( "child does not exist" ); - - return * iter->second; -} - -const SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::getChild ( const DefaultSymbolType & symbol ) const { - std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * >::const_iterator iter = m_children.find ( symbol ); - - if ( iter == m_children.end ( ) ) throw exception::CommonException ( "child does not exist" ); - - return * iter->second; -} - -bool SuffixTrieNodeFinalMark::hasChild ( const DefaultSymbolType & symbol ) const { - if ( m_children.find ( symbol ) == m_children.end ( ) ) return false; - - return true; -} - -SuffixTrieNodeFinalMark & SuffixTrieNodeFinalMark::addChild ( DefaultSymbolType symbol, SuffixTrieNodeFinalMark node ) { - std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * >::iterator iter = m_children.find ( symbol ); - - if ( iter != m_children.end ( ) ) throw exception::CommonException ( "child already exist" ); - - if ( ( this->parentTree != NULL ) && ! ( this->parentTree->getAlphabet ( ).count ( symbol ) ) ) throw exception::CommonException ( "Symbol is not in the alphabet" ); - - return * m_children.insert ( std::make_pair ( std::move ( symbol ), new SuffixTrieNodeFinalMark ( std::move ( node ) ) ) ).first->second; -} - -SuffixTrieNodeFinalMark * SuffixTrieNodeFinalMark::getParent ( ) { - return parent; -} - -const SuffixTrieNodeFinalMark * SuffixTrieNodeFinalMark::getParent ( ) const { - return parent; -} - -void SuffixTrieNodeFinalMark::swap ( SuffixTrieNodeFinalMark & other ) { - const SuffixTrieFinalMark * thisParentTree = this->parentTree; - const SuffixTrieFinalMark * otherParentTree = other.parentTree; - - SuffixTrieNodeFinalMark tmp = std::move ( other ); - - other = std::move ( * this ); - * this = std::move ( tmp ); - - this->attachTree ( thisParentTree ); - other.attachTree ( otherParentTree ); -} - -SuffixTrieNodeFinalMark * SuffixTrieNodeFinalMark::clone ( ) const { - return new SuffixTrieNodeFinalMark ( * this ); -} - -SuffixTrieNodeFinalMark * SuffixTrieNodeFinalMark::plunder ( ) && { - return new SuffixTrieNodeFinalMark ( std::move ( * this ) ); -} - -int SuffixTrieNodeFinalMark::compare ( const SuffixTrieNodeFinalMark & other ) const { - auto first = std::tie ( getChildren() ); - auto second = std::tie ( other.getChildren() ); - - std::compare < decltype ( first ) > comp; - - return comp ( first, second ); -} - -void SuffixTrieNodeFinalMark::operator >>( std::ostream & out ) const { - out << "(SuffixTrieNodeFinalMark " << " children = " << this->m_children << " finalMark = " << this->m_finalMark << ")"; -} - -std::ostream & operator <<( std::ostream & out, const SuffixTrieNodeFinalMark & node ) { - node >> out; - return out; -} - -bool SuffixTrieNodeFinalMark::testSymbol ( const DefaultSymbolType & symbol ) const { - for ( const auto & child : this->m_children ) { - if ( symbol == child.first ) return true; - - if ( child.second->testSymbol ( symbol ) ) return true; - } - - return false; -} - -bool SuffixTrieNodeFinalMark::attachTree ( const SuffixTrieFinalMark * tree ) { - if ( this->parentTree == tree ) return true; - - this->parentTree = tree; - - for ( const auto & child : this->m_children ) { - if ( ( this->parentTree != NULL ) && ! ( this->parentTree->getAlphabet ( ).count ( child.first ) ) ) return false; - - if ( !child.second->attachTree ( tree ) ) return false; - } - - return true; -} - -std::set < DefaultSymbolType > SuffixTrieNodeFinalMark::computeMinimalAlphabet ( ) const { - std::set < DefaultSymbolType > res; - computeMinimalAlphabet ( res ); - return res; -} - -void SuffixTrieNodeFinalMark::computeMinimalAlphabet ( std::set < DefaultSymbolType > & alphabet ) const { - for ( const auto & child : this->m_children ) { - alphabet.insert ( child.first ); - child.second->computeMinimalAlphabet ( alphabet ); - } -} - -SuffixTrieNodeFinalMark::operator std::string ( ) const { - std::stringstream ss; - ss << * this; - return ss.str ( ); -} - -} /* namespace indexes */ diff --git a/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.h b/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.h deleted file mode 100644 index e8dac03c69ccf5657d4ebca7665ba6a27c204461..0000000000000000000000000000000000000000 --- a/alib2data_experimental/src/indexes/suffixTrie/SuffixTrieNodeFinalMark.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * SuffixTrieNodeFinalMark.h - * - * Created on: Nov 23, 2013 - * Author: Stepan Plachy - */ - -#ifndef SUFFIX_TRIE_NODE_FINAL_MARK_H_ -#define SUFFIX_TRIE_NODE_FINAL_MARK_H_ - -#include <common/DefaultSymbolType.h> -#include <map> -#include <set> - -namespace indexes { - -class SuffixTrieNodeTerminatingSymbol; -class SuffixTrieFinalMark; - -/** - * Represents a node in the ranked indexes. Contains name of the symbol. - */ -class SuffixTrieNodeFinalMark { -protected: - std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * > m_children; - - bool m_finalMark; - - SuffixTrieNodeFinalMark * parent; - - /** - * Parent indexes contanining this instance of RankedTree - */ - const SuffixTrieFinalMark * parentTree; - - /** - * @copydoc SuffixTrieNodeFinalMark::attachTree() - */ - bool attachTree ( const SuffixTrieFinalMark * indexes ); - -public: - explicit SuffixTrieNodeFinalMark ( std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * > children, bool finalMark ); - - explicit SuffixTrieNodeFinalMark ( const SuffixTrieNodeTerminatingSymbol & other, const DefaultSymbolType & terminatingSymbol ); - - SuffixTrieNodeFinalMark ( const SuffixTrieNodeFinalMark & other ); - SuffixTrieNodeFinalMark ( SuffixTrieNodeFinalMark && other ) noexcept; - SuffixTrieNodeFinalMark & operator =( const SuffixTrieNodeFinalMark & other ); - SuffixTrieNodeFinalMark & operator =( SuffixTrieNodeFinalMark && other ) noexcept; - ~SuffixTrieNodeFinalMark ( ) noexcept; - - /** - * @copydoc SuffixTrieNodeFinalMark::clone() const - */ - SuffixTrieNodeFinalMark * clone ( ) const; - - /** - * @copydoc SuffixTrieNodeFinalMark::plunder() const - */ - SuffixTrieNodeFinalMark * plunder ( ) &&; - - /** - * @copydoc SuffixTrieNodeFinalMark::computeMinimalAlphabet() - */ - std::set < DefaultSymbolType > computeMinimalAlphabet ( ) const; - - /** - * @copydoc SuffixTrieNodeFinalMark::computeMinimalAlphabet() - */ - void computeMinimalAlphabet ( std::set < DefaultSymbolType > & alphabet ) const; - - /** - * @return children - */ - const std::map < const DefaultSymbolType, const SuffixTrieNodeFinalMark * > & getChildren ( ) const; - - bool getFinalMark ( ) const; - - void setFinalMark ( bool newFinalMark ); - - /** - * @return children - */ - const std::map < DefaultSymbolType, SuffixTrieNodeFinalMark * > & getChildren ( ); - - SuffixTrieNodeFinalMark & getChild ( const DefaultSymbolType & symbol ); - - const SuffixTrieNodeFinalMark & getChild ( const DefaultSymbolType & symbol ) const; - - bool hasChild ( const DefaultSymbolType & symbol ) const; - - SuffixTrieNodeFinalMark & addChild ( DefaultSymbolType symbol, SuffixTrieNodeFinalMark node ); - - SuffixTrieNodeFinalMark * getParent ( ); - - const SuffixTrieNodeFinalMark * getParent ( ) const; - - void swap ( SuffixTrieNodeFinalMark & other ); - - int compare ( const SuffixTrieNodeFinalMark & ) const; - - /** - * @copydoc SuffixTrieNodeFinalMark::testSymbol() const - */ - bool testSymbol ( const DefaultSymbolType & symbol ) const; - - /** - * @copydoc SuffixTrieNodeFinalMark::operator>>() const - */ - void operator >>( std::ostream & out ) const; - - friend std::ostream & operator <<( std::ostream &, const SuffixTrieNodeFinalMark & node ); - - explicit operator std::string ( ) const; - - friend class SuffixTrieFinalMark; -}; - -} /* namespace indexes */ - -namespace std { - -template < > -struct compare < indexes::SuffixTrieNodeFinalMark > { - int operator ()( const indexes::SuffixTrieNodeFinalMark & first, const indexes::SuffixTrieNodeFinalMark & second ) const { - return first.compare ( second ); - } - -}; - -} /* namespace std */ - -#endif /* SUFFIX_TRIE_NODE_FINAL_MARK_H_ */ diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index 16c9e749e2266a06a03eddf64d3da21c32a74a67..65ac45517db6774f39c012970256f7a5f639798c 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -34,7 +34,7 @@ #include <stringology/exact/FactorOracleAutomaton.h> #include <stringology/exact/SuffixAutomaton.h> #include <string/properties/BorderArray.h> -#include <stringology/indexing/SuffixTrie.h> +#include <stringology/indexing/SuffixTrieNaive.h> int main ( int argc, char * argv[] ) { try { @@ -283,7 +283,7 @@ int main ( int argc, char * argv[] ) { measurements::end ( ); measurements::start ( "Algorithm", measurements::Type::MAIN ); - indexes::SuffixTrieFinalMark suffixTrie = stringology::indexing::SuffixTrie::construct ( subject ); + indexes::SuffixTrie < DefaultSymbolType, unsigned > suffixTrie = stringology::indexing::SuffixTrieNaive::construct ( subject ); measurements::end ( ); measurements::start ( "Output write", measurements::Type::AUXILIARY );