From 90e029aec88a7f76809745ba861c46dfdea4d0a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radovan=20=C4=8Cerven=C3=BD?= <radovan.cerveny@gmail.com> Date: Thu, 28 Apr 2016 15:53:13 +0200 Subject: [PATCH] fixed implementation of BDM search --- alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp | 8 ++++++-- .../test-src/stringology/exact/SuffixAutomatonTest.cpp | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp b/alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp index 84175a5c30..0a098b53c1 100644 --- a/alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp +++ b/alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp @@ -43,10 +43,10 @@ std::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString & size_t posInSubject = 0; while ( posInSubject <= subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) ) { - automaton::State currentState = suffixAutomaton.getInitialState ( ); size_t posInPattern = reversedPattern.getContent ( ).size ( ); + size_t lastPrefixPos = posInPattern; while ( posInPattern > 0 && currentState != failState ) { auto transition = suffixAutomaton.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); @@ -57,13 +57,17 @@ std::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString & currentState = transition->second; posInPattern--; + + // found a prefix of nonreversed pattern that does not correspond to the entire pattern + if ( ( posInPattern != 0 ) && ( suffixAutomaton.getFinalStates ( ).find ( currentState ) != suffixAutomaton.getFinalStates ( ).end ( ) ) ) + lastPrefixPos = posInPattern; } if ( currentState != failState ) // Yay, there is match!!! occ.insert ( posInSubject ); - posInSubject += posInPattern + 1; + posInSubject += lastPrefixPos; } measurements::end ( ); diff --git a/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp b/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp index c7485c8751..e73b48cc40 100644 --- a/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp +++ b/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp @@ -64,6 +64,7 @@ void SuffixAutomatonTest::testBackwardDAWGMatching ( ) { subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0}); subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8}); subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9}); + subjects.push_back("aaaaaaaaaa"); patterns.push_back("aa"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8}); for(size_t i = 0; i < subjects.size(); ++i) { -- GitLab