diff --git a/alib2data/src/indexes/stringology/SuffixAutomaton.h b/alib2data/src/indexes/stringology/SuffixAutomaton.h index db17f6019873d293437daca1cc292a539f3974c7..3b326a8f45bacb32f716e354c3ef3c02eab4845e 100644 --- a/alib2data/src/indexes/stringology/SuffixAutomaton.h +++ b/alib2data/src/indexes/stringology/SuffixAutomaton.h @@ -1,6 +1,22 @@ /* * SuffixAutomaton.h * + * This file is part of Algorithms library toolkit. + * Copyright (C) 2017 Jan Travnicek (jan.travnicek@fit.cvut.cz) + + * Algorithms library toolkit is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * Algorithms library toolkit is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with Algorithms library toolkit. If not, see <http://www.gnu.org/licenses/>. + * * Created on: Jan 8, 2017 * Author: Jan Travnicek */ @@ -34,77 +50,135 @@ namespace stringology { class GeneralAlphabet; /** - * Represents regular expression parsed from the XML. Regular expression is stored - * as a tree of RegExpElement. + * \brief + * Suffix automaton string index. Automaton representation of all suffixes. The automaton is general deterministic automaton. The class does not checks whether the automaton actually is a suffix automaton. The index alphabet is stored within the automaton. + * + * \tparam SymbolType type of symbols of indexed string */ template < class SymbolType = DefaultSymbolType > class SuffixAutomaton final : public object::ObjectBase { -protected: + /** + * Representation of the suffix automaton. + */ automaton::DFA < SymbolType, unsigned > m_automaton; + /** + * Additional information about the backbone length, i.e. the length of the indexed string. + */ unsigned m_backboneLength; public: /** - * @copydoc SuffixTrieNode::clone ( ) const & + * @copydoc ObjectBase::clone ( ) const & */ - virtual ObjectBase * clone ( ) const &; + virtual ObjectBase * clone ( ) const & override; /** - * @copydoc SuffixTrieNode::clone ( ) const & + * @copydoc ObjectBase::clone() && */ - virtual ObjectBase * clone ( ) &&; + virtual ObjectBase * clone ( ) && override; + /** + * Creates a new instance of the index with concrete suffix automaton and backbone length. + * + * \param automaton the suffix automaton representing the index + * \param backbone the length of the indexed string or the longest path in the automaton. + */ explicit SuffixAutomaton ( automaton::DFA < SymbolType, unsigned > automaton, unsigned backboneLength ); /** - * @return Root node of the trie + * Getter of the underlying suffix automaton. + * + * \return raw automatn representing the index. */ const automaton::DFA < SymbolType, unsigned > & getAutomaton ( ) const &; + /** + * Getter of the underlying suffix automaton. + * + * \return raw automatn representing the index. + */ automaton::DFA < SymbolType, unsigned > && getAutomaton ( ) &&; + /** + * Getter of the alphabet of the indexed string. + * + * \returns the alphabet of the indexed string + */ const ext::set < SymbolType > & getAlphabet ( ) const & { return m_automaton.getInputAlphabet ( ); } + /** + * Getter of the alphabet of the indexed string. + * + * \returns the alphabet of the indexed string + */ ext::set < SymbolType > && getAlphabet ( ) && { return std::move ( m_automaton ).getInputAlphabet ( ); } /** - * Removes symbol from the alphabet of symbol available in the regular expression - * @param symbol removed symbol from the alphabet + * Remover of a symbol from the alphabet. + * + * \param symbol a symbol to remove. */ bool removeSymbolFromAlphabet ( const SymbolType & symbol ) { return m_automaton.removeInputSymbol ( symbol ); } + /** + * Getter of the backbone length. + * + * \return the backbone length of the automaton + */ unsigned getBackboneLength ( ) const { return m_backboneLength; } /** - * Prints XML representation of the tree to the output stream. - * @param out output stream to which print the tree - * @param tree tree to print + * @copydoc alib::CommonBase<ObjectBase>::compare ( const ObjectBase & ) */ - virtual void operator >>( std::ostream & out ) const; - - virtual int compare ( const ObjectBase & other ) const { + virtual int compare ( const ObjectBase & other ) const override { if ( ext::type_index ( typeid ( * this ) ) == ext::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); return ext::type_index ( typeid ( * this ) ) - ext::type_index ( typeid ( other ) ); } - virtual int compare ( const SuffixAutomaton & other ) const; + /** + * The actual compare method + * + * \param other the other instance + * + * \returns the actual relation between two by type same automata instances + */ + int compare ( const SuffixAutomaton & other ) const; - virtual explicit operator std::string ( ) const; + /** + * @copydoc alib::CommonBase<ObjectBase>::operator >> ( std::ostream & ) + */ + virtual void operator >>( std::ostream & out ) const override; + + /** + * @copydoc alib::CommonBase<ObjectBase>::operator std::string ( ) + */ + virtual explicit operator std::string ( ) const override; + /** + * Cast operator to the raw automaton. + * + * \return the underlying raw deterministic automaton. + */ explicit operator automaton::DFA < SymbolType, unsigned > ( ) const; - virtual object::ObjectBase * inc ( ) &&; + /** + * @copydoc alib::ObjectBase::inc() + */ + virtual object::ObjectBase * inc ( ) && override; + /** + * Type of normalized index. + */ typedef SuffixAutomaton < > normalized_type; }; @@ -178,6 +252,11 @@ object::ObjectBase* SuffixAutomaton < SymbolType >::inc() && { namespace core { +/** + * Helper for normalisation of types specified by templates used as internal datatypes of symbols. + * + * \returns new instance of the automaton with default template parameters or unmodified instance if the template parameters were already the default ones + */ template < class SymbolType > struct normalize < indexes::stringology::SuffixAutomaton < SymbolType >, typename std::enable_if < ! std::is_same < indexes::stringology::SuffixAutomaton < SymbolType >, indexes::stringology::SuffixAutomaton < > >::value >::type > { static indexes::stringology::SuffixAutomaton < > eval ( indexes::stringology::SuffixAutomaton < SymbolType > && value ) { diff --git a/alib2data/src/indexes/stringology/SuffixTrie.h b/alib2data/src/indexes/stringology/SuffixTrie.h index ea32300e8c8a834c5fc672a981149fff02c27f12..430e44505768a05b719f10db7deca43517f1e40e 100644 --- a/alib2data/src/indexes/stringology/SuffixTrie.h +++ b/alib2data/src/indexes/stringology/SuffixTrie.h @@ -67,7 +67,7 @@ class GeneralAlphabet; /** * \brief - * Suffix trie string index. Tree like representation of all suffixes. Nodes of the trie are either containing index of the suffix or void. The parent child relationship of nodes is represented by single symbol. + * Suffix trie string index. Tree like representation of all suffixes. Nodes of the trie are either containing index of the suffix or void. The parent child relationship of nodes is represented by single symbol. The class does not checks whether the trie actually is suffix trie. * * \tparam SymbolType type of symbols of indexed string */ @@ -158,9 +158,9 @@ public: void setTree ( ext::trie < SymbolType, ext::variant < void, unsigned > > tree ); /** - * Sets the root node of the suffix trie + * Remover of a symbol from the alphabet. * - * \param tree root node to set + * \param symbol a symbol to remove. */ bool removeSymbolFromEdgeAlphabet ( const SymbolType & symbol ) { return this->template accessComponent < GeneralAlphabet > ( ).remove ( symbol ); @@ -354,7 +354,7 @@ public: }; /** - * Helper for normalisation of types specified by templates used as internal datatypes of symbols and states. + * Helper for normalisation of types specified by templates used as internal datatypes of symbols. * * \returns new instance of the automaton with default template parameters or unmodified instance if the template parameters were already the default ones */