From a551906263e842738f0a4dd14aaa00822d70d7cd Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Tue, 24 Oct 2017 19:33:07 +0200 Subject: [PATCH] templates in suffix automaton direct construction --- .../indexing/ExactSuffixAutomaton.cpp | 79 +---------------- .../indexing/ExactSuffixAutomaton.h | 87 +++++++++++++++++-- .../matching/DAWGMatcherConstruction.cpp | 4 +- .../matching/DAWGMatcherConstruction.h | 4 +- .../matching/OracleMatcherConstruction.cpp | 4 +- .../matching/OracleMatcherConstruction.h | 2 +- .../query/BackwardDAWGMatching.cpp | 46 ---------- .../stringology/query/BackwardDAWGMatching.h | 53 ++++++++++- .../query/BackwardOracleMatching.cpp | 41 --------- .../query/BackwardOracleMatching.h | 45 +++++++++- .../indexing/ExactSuffixAutomatonTest.cpp | 34 ++++---- .../stringology/query/DAWGMatcherTest.cpp | 4 +- .../stringology/query/OracleMatcherTest.cpp | 4 +- 13 files changed, 200 insertions(+), 207 deletions(-) diff --git a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.cpp b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.cpp index e8a6d63657..e0081a874e 100644 --- a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.cpp +++ b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.cpp @@ -5,81 +5,8 @@ #include "ExactSuffixAutomaton.h" #include <registration/AlgoRegistration.hpp> -namespace stringology { +namespace { -namespace indexing { +auto SuffixAutomatonLinearString = registration::AbstractRegister < stringology::indexing::ExactSuffixAutomaton, automaton::DFA < DefaultSymbolType, unsigned >, const string::LinearString < > & > ( stringology::indexing::ExactSuffixAutomaton::construct ); -automaton::DFA < > ExactSuffixAutomaton::construct ( const string::LinearString < > & pattern ) { - automaton::DFA < > suffixAutomaton ( DefaultStateType ( 0 ) ); - - suffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) ); - - ext::map < DefaultStateType, std::pair < DefaultStateType, int > > suffixLinks = { { DefaultStateType ( 0 ), { DefaultStateType ( -1 ), 0 } } }; - DefaultStateType lastState = DefaultStateType ( 0 ); - - for ( const DefaultSymbolType & symbol : pattern.getContent ( ) ) - suffixAutomatonAddSymbol ( suffixAutomaton, symbol, suffixLinks, lastState ); - - while ( lastState != DefaultStateType ( -1 ) ) { - suffixAutomaton.addFinalState ( lastState ); - lastState = suffixLinks.find ( lastState )->second.first; - } - - return suffixAutomaton; -} - -void ExactSuffixAutomaton::suffixAutomatonAddSymbol ( automaton::DFA < > & suffixAutomaton, const DefaultSymbolType & symbol, ext::map < DefaultStateType, std::pair < DefaultStateType, int > > & suffixLinks, DefaultStateType & lastState ) { - DefaultStateType newState = DefaultStateType ( ( int ) suffixAutomaton.getStates ( ).size ( ) ); - - suffixAutomaton.addState ( newState ); - - int lastSuffixLength = suffixLinks.find ( lastState )->second.second; - - suffixLinks.insert ( { newState, { DefaultStateType ( -1 ), lastSuffixLength + 1 } } ); - - DefaultStateType kState = lastState; - - while ( kState != DefaultStateType ( -1 ) && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) == suffixAutomaton.getTransitions ( ).end ( ) ) { - suffixAutomaton.addTransition ( kState, symbol, newState ); - kState = suffixLinks.find ( kState )->second.first; - } - - if ( kState == DefaultStateType ( -1 ) ) - suffixLinks.find ( newState )->second.first = DefaultStateType ( 0 ); - else { - DefaultStateType qState = suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second; - - int kSuffixLength = suffixLinks.find ( kState )->second.second; - int qSuffixLength = suffixLinks.find ( qState )->second.second; - - if ( kSuffixLength + 1 == qSuffixLength ) { - suffixLinks.find ( newState )->second.first = qState; - } else { - - DefaultStateType cloneState = DefaultStateType ( ( int ) suffixAutomaton.getStates ( ).size ( ) ); - suffixAutomaton.addState ( cloneState ); - - suffixLinks.insert ( { cloneState, { suffixLinks.find ( qState )->second.first, kSuffixLength + 1 } } ); - - for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( qState ) ) - suffixAutomaton.addTransition ( cloneState, transition.first.second, transition.second ); - - while ( kState != DefaultStateType ( -1 ) && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) != suffixAutomaton.getTransitions ( ).end ( ) && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second == qState ) { - suffixAutomaton.removeTransition ( kState, symbol, qState ); - suffixAutomaton.addTransition ( kState, symbol, cloneState ); - kState = suffixLinks.find ( kState )->second.first; - } - - suffixLinks.find ( qState )->second.first = cloneState; - suffixLinks.find ( newState )->second.first = cloneState; - } - } - - lastState = newState; -} - -auto SuffixAutomatonLinearString = registration::AbstractRegister < ExactSuffixAutomaton, automaton::DFA < >, const string::LinearString < > & > ( ExactSuffixAutomaton::construct ); - -} /* namespace indexing */ - -} /* namespace stringology */ +} /* namespace */ diff --git a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h index 7287859c5f..528d580077 100644 --- a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h +++ b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h @@ -2,8 +2,8 @@ * Author: Radovan Cerveny */ -#ifndef EXACT_SUFFIX_AUTOMATON_H_ -#define EXACT_SUFFIX_AUTOMATON_H_ +#ifndef _EXACT_SUFFIX_AUTOMATON_H_ +#define _EXACT_SUFFIX_AUTOMATON_H_ #include <automaton/FSM/DFA.h> #include <string/LinearString.h> @@ -14,19 +14,88 @@ namespace indexing { class ExactSuffixAutomaton { private: - static void suffixAutomatonAddSymbol ( automaton::DFA < > & suffixAutomaton, const DefaultSymbolType & symbol, ext::map < DefaultStateType, std::pair<DefaultStateType, int > > & suffixLinks, DefaultStateType & lastState ); + template < class SymbolType > + static void suffixAutomatonAddSymbol ( automaton::DFA < SymbolType, unsigned > & suffixAutomaton, const SymbolType & symbol, std::map < unsigned, std::pair < unsigned, int > > & suffixLinks, unsigned & lastState ); public: - /** - * Linear time on-line construction of minimal suffix automaton for given pattern. - * @return minimal suffix automaton for given pattern. - */ - static automaton::DFA < > construct ( const string::LinearString < > & pattern ); + template < class SymbolType > + static automaton::DFA < SymbolType, unsigned > construct ( const string::LinearString < SymbolType > & pattern ); }; +template < class SymbolType > +automaton::DFA < SymbolType, unsigned > ExactSuffixAutomaton::construct ( const string::LinearString < SymbolType > & pattern ) { + automaton::DFA < SymbolType, unsigned > suffixAutomaton ( 0 ); + + suffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) ); + + std::map < unsigned, std::pair < unsigned, int > > suffixLinks = { { 0u, { ( unsigned ) -1, 0 } } }; + unsigned lastState = 0; + + for ( const DefaultSymbolType & symbol : pattern.getContent ( ) ) + suffixAutomatonAddSymbol ( suffixAutomaton, symbol, suffixLinks, lastState ); + + while ( lastState != ( unsigned ) -1 ) { + suffixAutomaton.addFinalState ( lastState ); + lastState = suffixLinks.find ( lastState )->second.first; + } + + return suffixAutomaton; +} + +template < class SymbolType > +void ExactSuffixAutomaton::suffixAutomatonAddSymbol ( automaton::DFA < SymbolType, unsigned > & suffixAutomaton, const SymbolType & symbol, std::map < unsigned, std::pair < unsigned, int > > & suffixLinks, unsigned & lastState ) { + unsigned newState = suffixAutomaton.getStates ( ).size ( ); + + suffixAutomaton.addState ( newState ); + + int lastSuffixLength = suffixLinks.find ( lastState )->second.second; + + suffixLinks.insert ( { newState, { ( unsigned ) -1, lastSuffixLength + 1 } } ); + + unsigned kState = lastState; + + while ( kState != ( unsigned ) -1 && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) == suffixAutomaton.getTransitions ( ).end ( ) ) { + suffixAutomaton.addTransition ( kState, symbol, newState ); + kState = suffixLinks.find ( kState )->second.first; + } + + if ( kState == ( unsigned ) -1 ) { + suffixLinks.find ( newState )->second.first = 0; + } else { + unsigned qState = suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second; + + int kSuffixLength = suffixLinks.find ( kState )->second.second; + int qSuffixLength = suffixLinks.find ( qState )->second.second; + + if ( kSuffixLength + 1 == qSuffixLength ) { + suffixLinks.find ( newState )->second.first = qState; + } else { + unsigned cloneState = suffixAutomaton.getStates ( ).size ( ); + suffixAutomaton.addState ( cloneState ); + + suffixLinks.insert ( { cloneState, { suffixLinks.find ( qState )->second.first, kSuffixLength + 1 } } ); + + for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( qState ) ) + suffixAutomaton.addTransition ( cloneState, transition.first.second, transition.second ); + + while ( kState != ( unsigned ) -1 + && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) != suffixAutomaton.getTransitions ( ).end ( ) + && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second == qState ) { + suffixAutomaton.removeTransition ( kState, symbol, qState ); + suffixAutomaton.addTransition ( kState, symbol, cloneState ); + kState = suffixLinks.find ( kState )->second.first; + } + + suffixLinks.find ( qState )->second.first = cloneState; + suffixLinks.find ( newState )->second.first = cloneState; + } + } + lastState = newState; +} + } /* namespace indexing */ } /* namespace stringology */ -#endif /* EXACT_SUFFIX_AUTOMATON_H_ */ +#endif /* _EXACT_SUFFIX_AUTOMATON_H_ */ diff --git a/alib2algo/src/stringology/matching/DAWGMatcherConstruction.cpp b/alib2algo/src/stringology/matching/DAWGMatcherConstruction.cpp index c962d9ee01..90231dfb04 100644 --- a/alib2algo/src/stringology/matching/DAWGMatcherConstruction.cpp +++ b/alib2algo/src/stringology/matching/DAWGMatcherConstruction.cpp @@ -10,7 +10,7 @@ namespace stringology { namespace matching { -automaton::DFA < > DAWGMatcherConstruction::construct ( const string::LinearString < > & pattern ) { +automaton::DFA < DefaultSymbolType, unsigned > DAWGMatcherConstruction::construct ( const string::LinearString < > & pattern ) { auto patternData = pattern.getContent ( ); reverse ( patternData.begin ( ), patternData.end ( ) ); string::LinearString < > reversedPattern ( pattern.getAlphabet ( ), std::move ( patternData ) ); @@ -18,7 +18,7 @@ automaton::DFA < > DAWGMatcherConstruction::construct ( const string::LinearStri return stringology::indexing::ExactSuffixAutomaton::construct ( reversedPattern ); } -auto DAWGMatcherConstructionLinearString = registration::AbstractRegister < DAWGMatcherConstruction, automaton::DFA < >, const string::LinearString < > & > ( DAWGMatcherConstruction::construct ); +auto DAWGMatcherConstructionLinearString = registration::AbstractRegister < DAWGMatcherConstruction, automaton::DFA < DefaultSymbolType, unsigned >, const string::LinearString < > & > ( DAWGMatcherConstruction::construct ); } /* namespace matching */ diff --git a/alib2algo/src/stringology/matching/DAWGMatcherConstruction.h b/alib2algo/src/stringology/matching/DAWGMatcherConstruction.h index 7ffb7b274e..b3f6392ee5 100644 --- a/alib2algo/src/stringology/matching/DAWGMatcherConstruction.h +++ b/alib2algo/src/stringology/matching/DAWGMatcherConstruction.h @@ -18,9 +18,7 @@ public: * Linear time on-line construction of minimal suffix automaton for given pattern. * @return minimal suffix automaton for given pattern. */ - static automaton::Automaton construct ( const string::String & pattern ); - - static automaton::DFA < > construct ( const string::LinearString < > & pattern ); + static automaton::DFA < DefaultSymbolType, unsigned > construct ( const string::LinearString < > & pattern ); }; diff --git a/alib2algo/src/stringology/matching/OracleMatcherConstruction.cpp b/alib2algo/src/stringology/matching/OracleMatcherConstruction.cpp index 943f9a8344..8f27209ffa 100644 --- a/alib2algo/src/stringology/matching/OracleMatcherConstruction.cpp +++ b/alib2algo/src/stringology/matching/OracleMatcherConstruction.cpp @@ -10,7 +10,7 @@ namespace stringology { namespace matching { -automaton::DFA < > OracleMatcherConstruction::construct ( const string::LinearString < > & pattern ) { +automaton::DFA < DefaultSymbolType, unsigned > OracleMatcherConstruction::construct ( const string::LinearString < > & pattern ) { auto patternData = pattern.getContent ( ); reverse ( patternData.begin ( ), patternData.end ( ) ); string::LinearString < > reversedPattern ( pattern.getAlphabet ( ), std::move ( patternData ) ); @@ -18,7 +18,7 @@ automaton::DFA < > OracleMatcherConstruction::construct ( const string::LinearSt return stringology::indexing::ExactSuffixAutomaton::construct ( reversedPattern ); } -auto OracleMatcherConstructionLinearString = registration::AbstractRegister < OracleMatcherConstruction, automaton::DFA < >, const string::LinearString < > & > ( OracleMatcherConstruction::construct ); +auto OracleMatcherConstructionLinearString = registration::AbstractRegister < OracleMatcherConstruction, automaton::DFA < DefaultSymbolType, unsigned >, const string::LinearString < > & > ( OracleMatcherConstruction::construct ); } /* namespace matching */ diff --git a/alib2algo/src/stringology/matching/OracleMatcherConstruction.h b/alib2algo/src/stringology/matching/OracleMatcherConstruction.h index 4f35f4a85e..55b3bd3740 100644 --- a/alib2algo/src/stringology/matching/OracleMatcherConstruction.h +++ b/alib2algo/src/stringology/matching/OracleMatcherConstruction.h @@ -18,7 +18,7 @@ public: * Linear time on-line construction of minimal suffix automaton for given pattern. * @return minimal suffix automaton for given pattern. */ - static automaton::DFA < > construct ( const string::LinearString < > & pattern ); + static automaton::DFA < DefaultSymbolType, unsigned > construct ( const string::LinearString < > & pattern ); }; diff --git a/alib2algo/src/stringology/query/BackwardDAWGMatching.cpp b/alib2algo/src/stringology/query/BackwardDAWGMatching.cpp index f72c0101a3..a1ea591b82 100644 --- a/alib2algo/src/stringology/query/BackwardDAWGMatching.cpp +++ b/alib2algo/src/stringology/query/BackwardDAWGMatching.cpp @@ -3,58 +3,12 @@ */ #include "BackwardDAWGMatching.h" -#include <stringology/matching/DAWGMatcherConstruction.h> -#include <stringology/properties/BackboneLength.h> - -#include <string/LinearString.h> - #include <registration/AlgoRegistration.hpp> namespace stringology { namespace query { -ext::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString < > & subject, const automaton::DFA < > & suffixAutomaton ) { - ext::set < unsigned > occ; - - size_t patternSize = stringology::properties::BackboneLength::length ( suffixAutomaton ); - size_t subjectSize = subject.getContent ( ).size ( ); - - const DefaultStateType failState = DefaultStateType ( -1 ); - - size_t posInSubject = 0; - - while ( posInSubject <= subjectSize - patternSize ) { - DefaultStateType currentState = suffixAutomaton.getInitialState ( ); - - size_t posInPattern = patternSize; - size_t lastPrefixPos = posInPattern; - - while ( posInPattern > 0 && currentState != failState ) { - auto transition = suffixAutomaton.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); - - if ( transition == suffixAutomaton.getTransitions ( ).end ( ) ) - currentState = failState; - else - currentState = transition->second; - - posInPattern--; - - // found a prefix of nonreversed pattern that does not correspond to the entire pattern - if ( ( posInPattern != 0 ) && ( suffixAutomaton.getFinalStates ( ).find ( currentState ) != suffixAutomaton.getFinalStates ( ).end ( ) ) ) - lastPrefixPos = posInPattern; - } - - if ( currentState != failState ) - // Yay, there is match!!! - occ.insert ( posInSubject ); - - posInSubject += lastPrefixPos; - } - - return occ; -} - auto BackwardDAWGMatchingLinearStringLinearString = registration::AbstractRegister < BackwardDAWGMatching, ext::set < unsigned >, const string::LinearString < > &, const automaton::DFA < > & > ( BackwardDAWGMatching::match ); } /* namespace query */ diff --git a/alib2algo/src/stringology/query/BackwardDAWGMatching.h b/alib2algo/src/stringology/query/BackwardDAWGMatching.h index 670d8fd9ac..05dfb02308 100644 --- a/alib2algo/src/stringology/query/BackwardDAWGMatching.h +++ b/alib2algo/src/stringology/query/BackwardDAWGMatching.h @@ -5,11 +5,13 @@ #ifndef STRINGOLOGY_BACKWARD_DAWG_MATCHING_H__ #define STRINGOLOGY_BACKWARD_DAWG_MATCHING_H__ -#include <string/StringFeatures.h> -#include <automaton/AutomatonFeatures.h> - #include <set> +#include <automaton/FSM/DFA.h> +#include <string/LinearString.h> + +#include <stringology/properties/BackboneLength.h> + namespace stringology { namespace query { @@ -23,10 +25,53 @@ public: * Search for pattern in linear string. * @return set set of occurences */ - static ext::set < unsigned > match ( const string::LinearString < > & subject, const automaton::DFA < > & suffixAutomaton ); + template < class SymbolType, class StateType > + static ext::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const automaton::DFA < SymbolType, StateType > & suffixAutomaton ); }; +template < class SymbolType, class StateType > +ext::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString < SymbolType > & subject, const automaton::DFA < SymbolType, StateType > & suffixAutomaton ) { + ext::set < unsigned > occ; + + size_t patternSize = stringology::properties::BackboneLength::length ( suffixAutomaton ); + size_t subjectSize = subject.getContent ( ).size ( ); + + bool fail; + size_t posInSubject = 0; + + while ( posInSubject <= subjectSize - patternSize ) { + StateType currentState = suffixAutomaton.getInitialState ( ); + + size_t posInPattern = patternSize; + size_t lastPrefixPos = posInPattern; + + fail = false; + while ( posInPattern > 0 && ! fail ) { + auto transition = suffixAutomaton.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); + + if ( transition == suffixAutomaton.getTransitions ( ).end ( ) ) + fail = true; + else + currentState = transition->second; + + posInPattern--; + + // found a prefix of nonreversed pattern that does not correspond to the entire pattern + if ( ( posInPattern != 0 ) && ( suffixAutomaton.getFinalStates ( ).find ( currentState ) != suffixAutomaton.getFinalStates ( ).end ( ) ) ) + lastPrefixPos = posInPattern; + } + + if ( ! fail ) + // Yay, there is match!!! + occ.insert ( posInSubject ); + + posInSubject += lastPrefixPos; + } + + return occ; +} + } /* namespace query */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/query/BackwardOracleMatching.cpp b/alib2algo/src/stringology/query/BackwardOracleMatching.cpp index 60f9abe778..abe024f72a 100644 --- a/alib2algo/src/stringology/query/BackwardOracleMatching.cpp +++ b/alib2algo/src/stringology/query/BackwardOracleMatching.cpp @@ -3,53 +3,12 @@ */ #include "BackwardOracleMatching.h" -#include <stringology/matching/OracleMatcherConstruction.h> -#include <stringology/properties/BackboneLength.h> - -#include <string/LinearString.h> #include <registration/AlgoRegistration.hpp> namespace stringology { namespace query { -ext::set < unsigned > BackwardOracleMatching::match ( const string::LinearString < > & subject, const automaton::DFA < > & factorOracle ) { - ext::set < unsigned > occ; - - size_t patternSize = stringology::properties::BackboneLength::length ( factorOracle ); - size_t subjectSize = subject.getContent ( ).size ( ); - - const DefaultStateType failState = DefaultStateType ( -1 ); - - size_t posInSubject = 0; - - while ( posInSubject <= subjectSize - patternSize ) { - - DefaultStateType currentState = factorOracle.getInitialState ( ); - - size_t posInPattern = patternSize; - - while ( posInPattern > 0 && currentState != failState ) { - auto transition = factorOracle.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); - - if ( transition == factorOracle.getTransitions ( ).end ( ) ) - currentState = failState; - else - currentState = transition->second; - - posInPattern--; - } - - if ( currentState != failState ) - // Yay, there is match!!! - occ.insert ( posInSubject ); - - posInSubject += posInPattern + 1; - } - - return occ; -} - auto BackwardOracleMatchingLinearStringLinearString = registration::AbstractRegister < BackwardOracleMatching, ext::set < unsigned >, const string::LinearString < > &, const automaton::DFA < > & > ( BackwardOracleMatching::match ); } /* namespace query */ diff --git a/alib2algo/src/stringology/query/BackwardOracleMatching.h b/alib2algo/src/stringology/query/BackwardOracleMatching.h index 911cd93ff1..8ed9a47e24 100644 --- a/alib2algo/src/stringology/query/BackwardOracleMatching.h +++ b/alib2algo/src/stringology/query/BackwardOracleMatching.h @@ -5,10 +5,12 @@ #ifndef STRINGOLOGY_BACKWARD_ORACLE_MATCHING_H__ #define STRINGOLOGY_BACKWARD_ORACLE_MATCHING_H__ +#include <set> + #include <string/LinearString.h> #include <automaton/FSM/DFA.h> -#include <set> +#include <stringology/properties/BackboneLength.h> namespace stringology { @@ -23,10 +25,49 @@ public: * Search for pattern in linear string. * @return set set of occurences */ - static ext::set < unsigned > match ( const string::LinearString < > & subject, const automaton::DFA < > & factorOracle ); + template < class SymbolType, class StateType > + static ext::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const automaton::DFA < SymbolType, StateType > & factorOracle ); }; +template < class SymbolType, class StateType > +ext::set < unsigned > BackwardOracleMatching::match ( const string::LinearString < SymbolType > & subject, const automaton::DFA < SymbolType, StateType > & factorOracle ) { + ext::set < unsigned > occ; + + size_t patternSize = stringology::properties::BackboneLength::length ( factorOracle ); + size_t subjectSize = subject.getContent ( ).size ( ); + + bool fail; + size_t posInSubject = 0; + + while ( posInSubject <= subjectSize - patternSize ) { + + StateType currentState = factorOracle.getInitialState ( ); + + size_t posInPattern = patternSize; + + fail = false; + while ( posInPattern > 0 && ! fail ) { + auto transition = factorOracle.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); + + if ( transition == factorOracle.getTransitions ( ).end ( ) ) + fail = true; + else + currentState = transition->second; + + posInPattern--; + } + + if ( ! fail ) + // Yay, there is match!!! + occ.insert ( posInSubject ); + + posInSubject += posInPattern + 1; + } + + return occ; +} + } /* namespace query */ } /* namespace stringology */ diff --git a/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp b/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp index 3427482ec2..61587da32c 100644 --- a/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp +++ b/alib2algo/test-src/stringology/indexing/ExactSuffixAutomatonTest.cpp @@ -16,29 +16,29 @@ void ExactSuffixAutomatonTest::testSuffixAutomatonConstruction ( ) { string::LinearString < > pattern ( "atatac" ); - automaton::DFA<> suffixAutomaton = stringology::indexing::ExactSuffixAutomaton::construct ( pattern ); + automaton::DFA < DefaultSymbolType, unsigned > suffixAutomaton = stringology::indexing::ExactSuffixAutomaton::construct ( pattern ); - automaton::DFA<> refSuffixAutomaton ( DefaultStateType ( 0 ) ); + automaton::DFA < DefaultSymbolType, unsigned > refSuffixAutomaton ( 0 ); refSuffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) ); - for ( int i = 1; i <= 6; ++i ) { - refSuffixAutomaton.addState ( DefaultStateType ( i ) ); + for ( unsigned i = 1; i <= 6; ++i ) { + refSuffixAutomaton.addState ( i ); } - refSuffixAutomaton.addFinalState ( DefaultStateType ( 0 ) ); - refSuffixAutomaton.addFinalState ( DefaultStateType ( 6 ) ); - - refSuffixAutomaton.addTransition ( DefaultStateType ( 0 ), DefaultSymbolType ( 'a' ), DefaultStateType ( 1 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 0 ), DefaultSymbolType ( 't' ), DefaultStateType ( 2 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 0 ), DefaultSymbolType ( 'c' ), DefaultStateType ( 6 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 1 ), DefaultSymbolType ( 't' ), DefaultStateType ( 2 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 1 ), DefaultSymbolType ( 'c' ), DefaultStateType ( 6 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 2 ), DefaultSymbolType ( 'a' ), DefaultStateType ( 3 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 3 ), DefaultSymbolType ( 't' ), DefaultStateType ( 4 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 3 ), DefaultSymbolType ( 'c' ), DefaultStateType ( 6 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 4 ), DefaultSymbolType ( 'a' ), DefaultStateType ( 5 ) ); - refSuffixAutomaton.addTransition ( DefaultStateType ( 5 ), DefaultSymbolType ( 'c' ), DefaultStateType ( 6 ) ); + refSuffixAutomaton.addFinalState ( 0 ); + refSuffixAutomaton.addFinalState ( 6 ); + + refSuffixAutomaton.addTransition ( 0, DefaultSymbolType ( 'a' ), 1 ); + refSuffixAutomaton.addTransition ( 0, DefaultSymbolType ( 't' ), 2 ); + refSuffixAutomaton.addTransition ( 0, DefaultSymbolType ( 'c' ), 6 ); + refSuffixAutomaton.addTransition ( 1, DefaultSymbolType ( 't' ), 2 ); + refSuffixAutomaton.addTransition ( 1, DefaultSymbolType ( 'c' ), 6 ); + refSuffixAutomaton.addTransition ( 2, DefaultSymbolType ( 'a' ), 3 ); + refSuffixAutomaton.addTransition ( 3, DefaultSymbolType ( 't' ), 4 ); + refSuffixAutomaton.addTransition ( 3, DefaultSymbolType ( 'c' ), 6 ); + refSuffixAutomaton.addTransition ( 4, DefaultSymbolType ( 'a' ), 5 ); + refSuffixAutomaton.addTransition ( 5, DefaultSymbolType ( 'c' ), 6 ); std::cout << suffixAutomaton << std::endl; std::cout << refSuffixAutomaton << std::endl; diff --git a/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp b/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp index 6d933dc80e..254c39a8ca 100644 --- a/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp +++ b/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp @@ -38,7 +38,7 @@ void DAWGMatcherTest::testBackwardDAWGMatching ( ) { for(size_t i = 0; i < subjects.size(); ++i) { string::LinearString < > subject ( subjects[i] ); string::LinearString < > pattern ( patterns[i] ); - automaton::DFA < > suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( pattern ); + automaton::DFA < DefaultSymbolType, unsigned > suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( pattern ); ext::set < unsigned > res = stringology::query::BackwardDAWGMatching::match ( subject, suffixAutomaton ); std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl; CPPUNIT_ASSERT ( res == expectedOccs[i] ); @@ -46,7 +46,7 @@ void DAWGMatcherTest::testBackwardDAWGMatching ( ) { auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true); auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 5, longSubject ); - automaton::DFA < > suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( longPattern ); + automaton::DFA < DefaultSymbolType, unsigned > suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( longPattern ); ext::set < unsigned > res = stringology::query::BackwardDAWGMatching::match ( longSubject, suffixAutomaton ); ext::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); std::cout << "long: " << res << std::endl; diff --git a/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp b/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp index 0a9a9d18a1..39bd8d76aa 100644 --- a/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp +++ b/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp @@ -37,7 +37,7 @@ void OracleMatcherTest::testBackwardOracleMatching ( ) { for(size_t i = 0; i < subjects.size(); ++i) { string::LinearString < > subject ( subjects[i] ); string::LinearString < > pattern ( patterns[i] ); - automaton::DFA < > oracleAutomaton = stringology::matching::OracleMatcherConstruction::construct ( pattern ); + automaton::DFA < DefaultSymbolType, unsigned > oracleAutomaton = stringology::matching::OracleMatcherConstruction::construct ( pattern ); ext::set < unsigned > res = stringology::query::BackwardOracleMatching::match ( subject, oracleAutomaton ); std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl; CPPUNIT_ASSERT ( res == expectedOccs[i] ); @@ -45,7 +45,7 @@ void OracleMatcherTest::testBackwardOracleMatching ( ) { auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true); auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 5, longSubject ); - automaton::DFA < > oracleAutomaton = stringology::matching::OracleMatcherConstruction::construct ( longPattern ); + automaton::DFA < DefaultSymbolType, unsigned > oracleAutomaton = stringology::matching::OracleMatcherConstruction::construct ( longPattern ); ext::set < unsigned > res = stringology::query::BackwardOracleMatching::match ( longSubject, oracleAutomaton ); ext::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); std::cout << "long: " << res << std::endl; -- GitLab