diff --git a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h index 64c524cfb96f8a3438c772d74ba9884b2bbd7333..3b4681c5a2e114fcfae1c11cbb81b692f9872082 100644 --- a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h +++ b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h @@ -49,7 +49,7 @@ indexes::stringology::SuffixAutomaton < SymbolType > ExactSuffixAutomaton::const lastState = suffixLinks [ lastState ].first; } - return indexes::stringology::SuffixAutomaton < SymbolType > ( std::move ( suffixAutomaton ) ); + return indexes::stringology::SuffixAutomaton < SymbolType > ( std::move ( suffixAutomaton ), pattern.getContent ( ).size ( ) ); } template < class SymbolType > diff --git a/alib2algo/src/stringology/query/BackwardDAWGMatching.h b/alib2algo/src/stringology/query/BackwardDAWGMatching.h index 9094f72e2b7c18bd80c30439d77e71d675056a87..b8588a29b3f8973c397a9b9b9dc21e7f9e487072 100644 --- a/alib2algo/src/stringology/query/BackwardDAWGMatching.h +++ b/alib2algo/src/stringology/query/BackwardDAWGMatching.h @@ -10,8 +10,6 @@ #include <indexes/stringology/SuffixAutomaton.h> #include <string/LinearString.h> -#include <stringology/properties/BackboneLength.h> - namespace stringology { namespace query { @@ -34,7 +32,7 @@ template < class SymbolType > ext::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString < SymbolType > & subject, const indexes::stringology::SuffixAutomaton < SymbolType > & suffixAutomaton ) { ext::set < unsigned > occ; - size_t patternSize = stringology::properties::BackboneLength::length ( suffixAutomaton.getAutomaton ( ) ); + size_t patternSize = suffixAutomaton.getBackboneLength ( ); size_t subjectSize = subject.getContent ( ).size ( ); bool fail; diff --git a/alib2algo/src/stringology/query/SuffixAutomatonFactors.h b/alib2algo/src/stringology/query/SuffixAutomatonFactors.h index 978a86b65fc994433223c16c8db0fab391fcd895..62a8f2a271d80da35a1c95ba752acf4ec9641870 100644 --- a/alib2algo/src/stringology/query/SuffixAutomatonFactors.h +++ b/alib2algo/src/stringology/query/SuffixAutomatonFactors.h @@ -38,13 +38,14 @@ public: template < class SymbolType > ext::set < unsigned > SuffixAutomatonFactors::query ( const indexes::stringology::SuffixAutomaton < SymbolType > & suffixAutomaton, const string::LinearString < SymbolType > & string ) { + unsigned backboneLength = suffixAutomaton.getBackboneLength ( ); + ext::tuple < bool, unsigned, ext::set < unsigned > > run = automaton::run::Run::calculateState ( suffixAutomaton.getAutomaton ( ), string ); if ( ! std::get < 0 > ( run ) ) return { }; std::deque < std::pair < unsigned, unsigned > > open = { { std::get < 1 > ( run ), 0u } }; ext::vector < unsigned > tmp; - unsigned max = 0; while ( ! open.empty ( ) ) { std::pair < unsigned, unsigned > cur = std::move ( open.back ( ) ); open.pop_back ( ); @@ -52,18 +53,13 @@ ext::set < unsigned > SuffixAutomatonFactors::query ( const indexes::stringology if ( suffixAutomaton.getAutomaton ( ).getFinalStates ( ).count ( cur.first ) ) tmp.push_back ( cur.second ); - if ( cur.second > max ) - max = cur.second; - for ( const auto & transition : suffixAutomaton.getAutomaton ( ).getTransitionsFromState ( cur.first ) ) open.emplace_back ( transition.second, cur.second + 1 ); } - unsigned subjectSize = max + string.getContent ( ).size ( ); - ext::set < unsigned > res; for ( unsigned dist : tmp ) - res.insert ( subjectSize - dist ); + res.insert ( backboneLength - dist - 1 ); return res; } diff --git a/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp b/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp index d5f5c279f072de4843fb0ef67a8d9312fddeb682..e18f732f133de5c6306a327690c99d39343b36c5 100644 --- a/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp +++ b/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp @@ -40,7 +40,7 @@ void ExactSuffixAutomatonTest::testSuffixAutomatonConstruction ( ) { tmp.addTransition ( 4, DefaultSymbolType ( 'a' ), 5 ); tmp.addTransition ( 5, DefaultSymbolType ( 'c' ), 6 ); - indexes::stringology::SuffixAutomaton < > refSuffixAutomaton ( std::move ( tmp ) ); + indexes::stringology::SuffixAutomaton < > refSuffixAutomaton ( std::move ( tmp ), 6 ); std::cout << suffixAutomaton << std::endl; std::cout << refSuffixAutomaton << std::endl; diff --git a/alib2data/src/indexes/stringology/SuffixAutomaton.cpp b/alib2data/src/indexes/stringology/SuffixAutomaton.cpp index 691a72c7997b49b5ad06038407481da42ac71b27..3a6c9094ef0fba2658417db64d625e23d5e2c456 100644 --- a/alib2data/src/indexes/stringology/SuffixAutomaton.cpp +++ b/alib2data/src/indexes/stringology/SuffixAutomaton.cpp @@ -7,11 +7,14 @@ #include "SuffixAutomaton.h" +#include <registration/CastRegistration.hpp> #include <registration/ValuePrinterRegistration.hpp> #include <registration/XmlRegistration.hpp> namespace { +static auto DFAFromSuffixAutomaton = registration::CastRegister < automaton::DFA < DefaultSymbolType, unsigned >, indexes::stringology::SuffixAutomaton < > > ( ); + static auto valuePrinter = registration::ValuePrinterRegister < indexes::stringology::SuffixAutomaton < > > ( ); static auto xmlWrite = registration::XmlWriterRegister < indexes::stringology::SuffixAutomaton < > > ( ); diff --git a/alib2data/src/indexes/stringology/SuffixAutomaton.h b/alib2data/src/indexes/stringology/SuffixAutomaton.h index 40a9b597aab5f7c41f71df8f8c03fb4d65b16cbb..5a32a1369abe4b0e9502e72f9e620e769634eb5e 100644 --- a/alib2data/src/indexes/stringology/SuffixAutomaton.h +++ b/alib2data/src/indexes/stringology/SuffixAutomaton.h @@ -42,6 +42,8 @@ class SuffixAutomaton final : public object::ObjectBase { protected: automaton::DFA < SymbolType, unsigned > m_automaton; + unsigned m_backboneLength; + public: /** * @copydoc SuffixTrieNode::clone() const @@ -53,7 +55,7 @@ public: */ virtual ObjectBase * plunder ( ) &&; - explicit SuffixAutomaton ( automaton::DFA < SymbolType, unsigned > automaton ); + explicit SuffixAutomaton ( automaton::DFA < SymbolType, unsigned > automaton, unsigned backboneLength ); /** * @return Root node of the trie @@ -78,6 +80,10 @@ public: return m_automaton.removeInputSymbol ( symbol ); } + unsigned getBackboneLength ( ) const { + return m_backboneLength; + } + /** * Prints XML representation of the tree to the output stream. * @param out output stream to which print the tree @@ -95,6 +101,8 @@ public: virtual explicit operator std::string ( ) const; + explicit operator automaton::DFA < SymbolType, unsigned > ( ) const; + virtual object::ObjectBase * inc ( ) &&; typedef SuffixAutomaton < > normalized_type; @@ -109,7 +117,7 @@ namespace indexes { namespace stringology { template < class SymbolType > -SuffixAutomaton < SymbolType >::SuffixAutomaton ( automaton::DFA < SymbolType, unsigned > automaton ) : m_automaton ( std::move ( automaton ) ) { +SuffixAutomaton < SymbolType >::SuffixAutomaton ( automaton::DFA < SymbolType, unsigned > automaton, unsigned backboneLength ) : m_automaton ( std::move ( automaton ) ), m_backboneLength ( backboneLength ) { } template < class SymbolType > @@ -154,6 +162,11 @@ SuffixAutomaton < SymbolType >::operator std::string ( ) const { return ss.str ( ); } +template < class SymbolType > +SuffixAutomaton < SymbolType >::operator automaton::DFA < SymbolType, unsigned > ( ) const { + return getAutomaton ( ); +} + template < class SymbolType > object::ObjectBase* SuffixAutomaton < SymbolType >::inc() && { return new object::UniqueObject(object::Object(std::move(*this)), primitive::Integer(0)); @@ -168,6 +181,7 @@ namespace core { template < class SymbolType > struct normalize < indexes::stringology::SuffixAutomaton < SymbolType >, typename std::enable_if < ! std::is_same < indexes::stringology::SuffixAutomaton < SymbolType >, indexes::stringology::SuffixAutomaton < > >::value >::type > { static indexes::stringology::SuffixAutomaton < > eval ( indexes::stringology::SuffixAutomaton < SymbolType > && value ) { + // FIXME this does not look right ext::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( value ).getAlphabet ( ) ); ext::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( value ).getString ( ) ); @@ -187,7 +201,8 @@ template < class SymbolType > indexes::stringology::SuffixAutomaton < SymbolType > xmlApi < indexes::stringology::SuffixAutomaton < SymbolType > >::parse ( ext::deque < sax::Token >::iterator & input ) { sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) ); automaton::DFA < SymbolType, unsigned > automaton = core::xmlApi < automaton::DFA < SymbolType, unsigned > >::parse ( input ); - indexes::stringology::SuffixAutomaton < SymbolType > res ( std::move ( automaton ) ); + unsigned backboneLength = core::xmlApi < unsigned >::parse ( input ); + indexes::stringology::SuffixAutomaton < SymbolType > res ( std::move ( automaton ), backboneLength ); sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, xmlTagName ( ) ); return res; @@ -209,6 +224,7 @@ template < class SymbolType > void xmlApi < indexes::stringology::SuffixAutomaton < SymbolType > >::compose ( ext::deque < sax::Token > & output, const indexes::stringology::SuffixAutomaton < SymbolType > & index ) { output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::START_ELEMENT ); core::xmlApi < automaton::DFA < SymbolType, unsigned > >::compose ( output, index.getAutomaton ( ) ); + core::xmlApi < unsigned >::compose ( output, index.getBackboneLength ( ) ); output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::END_ELEMENT ); }