From 997692cb458db2bae48758f746f5f698bf3a15f2 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Tue, 13 Feb 2018 12:31:14 +0100 Subject: [PATCH] update suffix automaton and factor oracle indexes --- .../indexing/ExactFactorOracleAutomaton.h | 2 +- .../indexing/ExactSuffixAutomatonTest.cpp | 30 +++++++++--------- .../indexing/FactorOracleAutomatonTest.cpp | 30 +++++++++--------- .../stringology/FactorOracleAutomaton.h | 31 ++++++++++++------- .../src/indexes/stringology/SuffixAutomaton.h | 19 +++++++++--- 5 files changed, 67 insertions(+), 45 deletions(-) diff --git a/alib2algo/src/stringology/indexing/ExactFactorOracleAutomaton.h b/alib2algo/src/stringology/indexing/ExactFactorOracleAutomaton.h index fb2e2cb695..c2092b9834 100644 --- a/alib2algo/src/stringology/indexing/ExactFactorOracleAutomaton.h +++ b/alib2algo/src/stringology/indexing/ExactFactorOracleAutomaton.h @@ -40,7 +40,7 @@ indexes::stringology::FactorOracleAutomaton < SymbolType > ExactFactorOracleAuto for ( const SymbolType & symbol : pattern.getContent ( ) ) oracleAddLetter ( oracleAutomaton, symbol, supplyFunction ); - return indexes::stringology::FactorOracleAutomaton < SymbolType > ( std::move ( oracleAutomaton ), pattern.getContent ( ).size ( ) ); + return indexes::stringology::FactorOracleAutomaton < SymbolType > ( std::move ( oracleAutomaton ) ); } template < class SymbolType > diff --git a/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp b/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp index e18f732f13..1a76d9579e 100644 --- a/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp +++ b/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp @@ -14,11 +14,11 @@ void ExactSuffixAutomatonTest::tearDown ( ) { void ExactSuffixAutomatonTest::testSuffixAutomatonConstruction ( ) { - string::LinearString < > pattern ( "atatac" ); + string::LinearString < char > pattern ( "atatac" ); - indexes::stringology::SuffixAutomaton < > suffixAutomaton = stringology::indexing::ExactSuffixAutomaton::construct ( pattern ); + indexes::stringology::SuffixAutomaton < char > suffixAutomaton = stringology::indexing::ExactSuffixAutomaton::construct ( pattern ); - automaton::DFA < DefaultSymbolType, unsigned > tmp ( 0 ); + automaton::DFA < char, unsigned > tmp ( 0 ); tmp.setInputAlphabet ( pattern.getAlphabet ( ) ); @@ -29,21 +29,23 @@ void ExactSuffixAutomatonTest::testSuffixAutomatonConstruction ( ) { tmp.addFinalState ( 0 ); tmp.addFinalState ( 6 ); - tmp.addTransition ( 0, DefaultSymbolType ( 'a' ), 1 ); - tmp.addTransition ( 0, DefaultSymbolType ( 't' ), 2 ); - tmp.addTransition ( 0, DefaultSymbolType ( 'c' ), 6 ); - tmp.addTransition ( 1, DefaultSymbolType ( 't' ), 2 ); - tmp.addTransition ( 1, DefaultSymbolType ( 'c' ), 6 ); - tmp.addTransition ( 2, DefaultSymbolType ( 'a' ), 3 ); - tmp.addTransition ( 3, DefaultSymbolType ( 't' ), 4 ); - tmp.addTransition ( 3, DefaultSymbolType ( 'c' ), 6 ); - tmp.addTransition ( 4, DefaultSymbolType ( 'a' ), 5 ); - tmp.addTransition ( 5, DefaultSymbolType ( 'c' ), 6 ); + tmp.addTransition ( 0, 'a', 1 ); + tmp.addTransition ( 0, 't', 2 ); + tmp.addTransition ( 0, 'c', 6 ); + tmp.addTransition ( 1, 't', 2 ); + tmp.addTransition ( 1, 'c', 6 ); + tmp.addTransition ( 2, 'a', 3 ); + tmp.addTransition ( 3, 't', 4 ); + tmp.addTransition ( 3, 'c', 6 ); + tmp.addTransition ( 4, 'a', 5 ); + tmp.addTransition ( 5, 'c', 6 ); - indexes::stringology::SuffixAutomaton < > refSuffixAutomaton ( std::move ( tmp ), 6 ); + indexes::stringology::SuffixAutomaton < char > refSuffixAutomaton ( std::move ( tmp ), 6 ); std::cout << suffixAutomaton << std::endl; std::cout << refSuffixAutomaton << std::endl; CPPUNIT_ASSERT ( suffixAutomaton == refSuffixAutomaton ); + + core::normalize < indexes::stringology::SuffixAutomaton < char > >::eval ( std::move ( refSuffixAutomaton ) ); } diff --git a/alib2algo/test-src/stringology/indexing/FactorOracleAutomatonTest.cpp b/alib2algo/test-src/stringology/indexing/FactorOracleAutomatonTest.cpp index 495dce3da0..8a7c184ef3 100644 --- a/alib2algo/test-src/stringology/indexing/FactorOracleAutomatonTest.cpp +++ b/alib2algo/test-src/stringology/indexing/FactorOracleAutomatonTest.cpp @@ -13,11 +13,11 @@ void FactorOracleAutomatonTest::tearDown ( ) { } void FactorOracleAutomatonTest::testFactorOracleConstruction ( ) { - string::LinearString < > pattern ( "atatac" ); + string::LinearString < char > pattern ( "atatac" ); - indexes::stringology::FactorOracleAutomaton < > oracle = stringology::indexing::ExactFactorOracleAutomaton::construct ( pattern ); + indexes::stringology::FactorOracleAutomaton < char > oracle = stringology::indexing::ExactFactorOracleAutomaton::construct ( pattern ); - automaton::DFA < DefaultSymbolType, unsigned > tmp ( 0 ); + automaton::DFA < char, unsigned > tmp ( 0 ); tmp.addFinalState ( 0 ); @@ -28,18 +28,20 @@ void FactorOracleAutomatonTest::testFactorOracleConstruction ( ) { tmp.addFinalState ( i ); } - tmp.addTransition ( 0, DefaultSymbolType ( 'a' ), 1 ); - tmp.addTransition ( 0, DefaultSymbolType ( 't' ), 2 ); - tmp.addTransition ( 0, DefaultSymbolType ( 'c' ), 6 ); - tmp.addTransition ( 1, DefaultSymbolType ( 't' ), 2 ); - tmp.addTransition ( 1, DefaultSymbolType ( 'c' ), 6 ); - tmp.addTransition ( 2, DefaultSymbolType ( 'a' ), 3 ); - tmp.addTransition ( 3, DefaultSymbolType ( 't' ), 4 ); - tmp.addTransition ( 3, DefaultSymbolType ( 'c' ), 6 ); - tmp.addTransition ( 4, DefaultSymbolType ( 'a' ), 5 ); - tmp.addTransition ( 5, DefaultSymbolType ( 'c' ), 6 ); + tmp.addTransition ( 0, 'a', 1 ); + tmp.addTransition ( 0, 't', 2 ); + tmp.addTransition ( 0, 'c', 6 ); + tmp.addTransition ( 1, 't', 2 ); + tmp.addTransition ( 1, 'c', 6 ); + tmp.addTransition ( 2, 'a', 3 ); + tmp.addTransition ( 3, 't', 4 ); + tmp.addTransition ( 3, 'c', 6 ); + tmp.addTransition ( 4, 'a', 5 ); + tmp.addTransition ( 5, 'c', 6 ); - indexes::stringology::FactorOracleAutomaton < > refOracle ( std::move ( tmp ), 6 ); + indexes::stringology::FactorOracleAutomaton < char > refOracle ( std::move ( tmp ) ); CPPUNIT_ASSERT ( oracle == refOracle ); + + core::normalize < indexes::stringology::FactorOracleAutomaton < char > >::eval ( std::move ( refOracle ) ); } diff --git a/alib2data/src/indexes/stringology/FactorOracleAutomaton.h b/alib2data/src/indexes/stringology/FactorOracleAutomaton.h index d73668ef81..0058f23701 100644 --- a/alib2data/src/indexes/stringology/FactorOracleAutomaton.h +++ b/alib2data/src/indexes/stringology/FactorOracleAutomaton.h @@ -42,8 +42,6 @@ class FactorOracleAutomaton final : public object::ObjectBase { protected: automaton::DFA < SymbolType, unsigned > m_automaton; - unsigned m_backboneLength; - public: /** * @copydoc OracleTrieNode::clone() const @@ -55,7 +53,7 @@ public: */ virtual ObjectBase * plunder ( ) &&; - explicit FactorOracleAutomaton ( automaton::DFA < SymbolType, unsigned > automaton, unsigned backboneLength ); + explicit FactorOracleAutomaton ( automaton::DFA < SymbolType, unsigned > automaton ); /** * @return Root node of the trie @@ -81,7 +79,7 @@ public: } unsigned getBackboneLength ( ) const { - return m_backboneLength; + return m_automaton.getStates ( ).size ( ) - 1; } /** @@ -117,7 +115,7 @@ namespace indexes { namespace stringology { template < class SymbolType > -FactorOracleAutomaton < SymbolType >::FactorOracleAutomaton ( automaton::DFA < SymbolType, unsigned > automaton, unsigned backboneLength ) : m_automaton ( std::move ( automaton ) ), m_backboneLength ( backboneLength ) { +FactorOracleAutomaton < SymbolType >::FactorOracleAutomaton ( automaton::DFA < SymbolType, unsigned > automaton ) : m_automaton ( std::move ( automaton ) ) { } template < class SymbolType > @@ -181,11 +179,22 @@ namespace core { template < class SymbolType > struct normalize < indexes::stringology::FactorOracleAutomaton < SymbolType >, typename std::enable_if < ! std::is_same < indexes::stringology::FactorOracleAutomaton < SymbolType >, indexes::stringology::FactorOracleAutomaton < > >::value >::type > { static indexes::stringology::FactorOracleAutomaton < > eval ( indexes::stringology::FactorOracleAutomaton < SymbolType > && value ) { - // FIXME this does not look right - ext::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( value ).getAlphabet ( ) ); - ext::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( value ).getString ( ) ); + ext::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( std::move ( value ).getAutomaton ( ) ).getInputAlphabet ( ) ); + ext::set < unsigned > states = std::move ( std::move ( value ).getAutomaton ( ) ).getStates ( ); + unsigned initialState = std::move ( std::move ( value ).getAutomaton ( ) ).getInitialState ( ); + ext::set < unsigned > finalStates = std::move ( std::move ( value ).getAutomaton ( ) ).getFinalStates ( ); + + automaton::DFA < DefaultStateType, unsigned > res ( std::move ( states ), std::move ( alphabet ), std::move ( initialState ), std::move ( finalStates ) ); + + for ( std::pair < ext::pair < unsigned, SymbolType >, unsigned > && transition : ext::make_moveable_map ( std::move ( std::move ( value ).getAutomaton ( ) ).getTransitions ( ) ) ) { + unsigned from = transition.first.first; + DefaultSymbolType input = alphabet::SymbolNormalize::normalizeSymbol ( std::move ( transition.first.second ) ); + unsigned to = transition.second; + + res.addTransition ( std::move ( from ), std::move ( input ), std::move ( to ) ); + } - return indexes::stringology::FactorOracleAutomaton < > ( std::move ( alphabet ), std::move ( value ).getData ( ), std::move ( string ) ); + return indexes::stringology::FactorOracleAutomaton < > ( std::move ( res ) ); } }; @@ -201,8 +210,7 @@ template < class SymbolType > indexes::stringology::FactorOracleAutomaton < SymbolType > xmlApi < indexes::stringology::FactorOracleAutomaton < SymbolType > >::parse ( ext::deque < sax::Token >::iterator & input ) { sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, xmlTagName ( ) ); automaton::DFA < SymbolType, unsigned > automaton = core::xmlApi < automaton::DFA < SymbolType, unsigned > >::parse ( input ); - unsigned backboneLength = core::xmlApi < unsigned >::parse ( input ); - indexes::stringology::FactorOracleAutomaton < SymbolType > res ( std::move ( automaton ), backboneLength ); + indexes::stringology::FactorOracleAutomaton < SymbolType > res ( std::move ( automaton ) ); sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, xmlTagName ( ) ); return res; @@ -224,7 +232,6 @@ template < class SymbolType > void xmlApi < indexes::stringology::FactorOracleAutomaton < SymbolType > >::compose ( ext::deque < sax::Token > & output, const indexes::stringology::FactorOracleAutomaton < SymbolType > & index ) { output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::START_ELEMENT ); core::xmlApi < automaton::DFA < SymbolType, unsigned > >::compose ( output, index.getAutomaton ( ) ); - core::xmlApi < unsigned >::compose ( output, index.getBackboneLength ( ) ); output.emplace_back ( xmlTagName ( ), sax::Token::TokenType::END_ELEMENT ); } diff --git a/alib2data/src/indexes/stringology/SuffixAutomaton.h b/alib2data/src/indexes/stringology/SuffixAutomaton.h index 5a32a1369a..18b5056771 100644 --- a/alib2data/src/indexes/stringology/SuffixAutomaton.h +++ b/alib2data/src/indexes/stringology/SuffixAutomaton.h @@ -181,11 +181,22 @@ namespace core { template < class SymbolType > struct normalize < indexes::stringology::SuffixAutomaton < SymbolType >, typename std::enable_if < ! std::is_same < indexes::stringology::SuffixAutomaton < SymbolType >, indexes::stringology::SuffixAutomaton < > >::value >::type > { static indexes::stringology::SuffixAutomaton < > eval ( indexes::stringology::SuffixAutomaton < SymbolType > && value ) { - // FIXME this does not look right - ext::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( value ).getAlphabet ( ) ); - ext::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( value ).getString ( ) ); + ext::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( std::move ( value ).getAutomaton ( ) ).getInputAlphabet ( ) ); + ext::set < unsigned > states = std::move ( std::move ( value ).getAutomaton ( ) ).getStates ( ); + unsigned initialState = std::move ( std::move ( value ).getAutomaton ( ) ).getInitialState ( ); + ext::set < unsigned > finalStates = std::move ( std::move ( value ).getAutomaton ( ) ).getFinalStates ( ); - return indexes::stringology::SuffixAutomaton < > ( std::move ( alphabet ), std::move ( value ).getData ( ), std::move ( string ) ); + automaton::DFA < DefaultStateType, unsigned > res ( std::move ( states ), std::move ( alphabet ), std::move ( initialState ), std::move ( finalStates ) ); + + for ( std::pair < ext::pair < unsigned, SymbolType >, unsigned > && transition : ext::make_moveable_map ( std::move ( std::move ( value ).getAutomaton ( ) ).getTransitions ( ) ) ) { + unsigned from = transition.first.first; + DefaultSymbolType input = alphabet::SymbolNormalize::normalizeSymbol ( std::move ( transition.first.second ) ); + unsigned to = transition.second; + + res.addTransition ( std::move ( from ), std::move ( input ), std::move ( to ) ); + } + + return indexes::stringology::SuffixAutomaton < > ( std::move ( res ), std::move ( value ).getBackboneLength ( ) ); } }; -- GitLab