diff --git a/alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp b/alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp index 84175a5c30c8c6bb3f5cefe2b5385cfa411638f7..0a098b53c19543a2e8ff7cd5ddd618f53ee7cae5 100644 --- a/alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp +++ b/alib2algo/src/stringology/exact/BackwardDAWGMatching.cpp @@ -43,10 +43,10 @@ std::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString & size_t posInSubject = 0; while ( posInSubject <= subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) ) { - automaton::State currentState = suffixAutomaton.getInitialState ( ); size_t posInPattern = reversedPattern.getContent ( ).size ( ); + size_t lastPrefixPos = posInPattern; while ( posInPattern > 0 && currentState != failState ) { auto transition = suffixAutomaton.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); @@ -57,13 +57,17 @@ std::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString & currentState = transition->second; posInPattern--; + + // found a prefix of nonreversed pattern that does not correspond to the entire pattern + if ( ( posInPattern != 0 ) && ( suffixAutomaton.getFinalStates ( ).find ( currentState ) != suffixAutomaton.getFinalStates ( ).end ( ) ) ) + lastPrefixPos = posInPattern; } if ( currentState != failState ) // Yay, there is match!!! occ.insert ( posInSubject ); - posInSubject += posInPattern + 1; + posInSubject += lastPrefixPos; } measurements::end ( ); diff --git a/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp b/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp index c7485c875187c505bf71c7f890df9b9c97cb9d6e..e73b48cc40ac63dff1344dcb6025308119f647bf 100644 --- a/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp +++ b/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp @@ -64,6 +64,7 @@ void SuffixAutomatonTest::testBackwardDAWGMatching ( ) { subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0}); subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8}); subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9}); + subjects.push_back("aaaaaaaaaa"); patterns.push_back("aa"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8}); for(size_t i = 0; i < subjects.size(); ++i) {