From d32f5c4808a76223035ea37e0718466b455e3299 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Wed, 4 Apr 2018 16:12:16 +0200 Subject: [PATCH] tune tree backward pattern matching --- .../src/arbology/exact/BoyerMooreHorspool.h | 12 ++++++------ alib2algo/src/arbology/exact/QuickSearch.h | 16 ++++++++-------- .../src/tree/exact/BackwardOccurrenceTest.h | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/alib2algo/src/arbology/exact/BoyerMooreHorspool.h b/alib2algo/src/arbology/exact/BoyerMooreHorspool.h index 7b5a58af94..05f76f513d 100644 --- a/alib2algo/src/arbology/exact/BoyerMooreHorspool.h +++ b/alib2algo/src/arbology/exact/BoyerMooreHorspool.h @@ -57,10 +57,10 @@ ext::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTre ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); // index to the subject - unsigned i = 0; + unsigned i = pattern.getContent ( ).size ( ) - 1; // main loop of the algorithm over all possible indexes where the pattern can start - while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { + while ( i < subject.getContent ( ).size ( ) ) { // pair j and offset ext::pair < int, int > jOffset = tree::exact::BackwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i ); @@ -68,7 +68,7 @@ ext::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTre if ( jOffset.first == -1 ) occ.insert ( jOffset.second + 1); // shift heuristics - i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( ) - 1]]; + i += bcs[subject.getContent ( )[i]]; } return occ; @@ -84,10 +84,10 @@ ext::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTre tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject - unsigned i = 0; + unsigned i = pattern.getContent ( ).size ( ) - 1; // main loop of the algorithm over all possible indexes where the pattern can start - while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { + while ( i < subject.getContent ( ).size ( ) ) { // pair j and offset ext::pair < int, int > jOffset = tree::exact::BackwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i ); @@ -95,7 +95,7 @@ ext::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTre if ( jOffset.first == -1 ) occ.insert ( jOffset.second + 1); // shift heuristics - i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( ) - 1]]; + i += bcs[subject.getContent ( )[i]]; } return occ; diff --git a/alib2algo/src/arbology/exact/QuickSearch.h b/alib2algo/src/arbology/exact/QuickSearch.h index 4d7470110e..654ca0427e 100644 --- a/alib2algo/src/arbology/exact/QuickSearch.h +++ b/alib2algo/src/arbology/exact/QuickSearch.h @@ -58,22 +58,22 @@ ext::set < unsigned > QuickSearch::match ( const tree::PrefixRankedBarTree < Sym ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); // index to the subject - int i = 0; + unsigned i = pattern.getContent ( ).size ( ) - 1; // main loop of the algorithm over all possible indexes where the pattern can start - while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { + while ( i < subject.getContent ( ).size ( ) ) { // pair j and offset ext::pair < int, int > jOffset = tree::exact::BackwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i ); // match was found if ( jOffset.first == -1 ) occ.insert ( jOffset.second + 1); - if ( i + pattern.getContent ( ).size ( ) == subject.getContent ( ).size ( ) ) { + if ( i + 1 >= subject.getContent ( ).size ( ) ) { break; } // shift heuristics - i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( )]]; + i += bcs[subject.getContent ( )[i + 1]]; } return occ; @@ -88,22 +88,22 @@ ext::set < unsigned > QuickSearch::match ( const tree::PrefixRankedBarTree < Sym tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject - unsigned i = 0; + unsigned i = pattern.getContent ( ).size ( ) - 1; // main loop of the algorithm over all possible indexes where the pattern can start - while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { + while ( i < subject.getContent ( ).size ( ) ) { // pair j and offset ext::pair < int, int > jOffset = tree::exact::BackwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i ); // match was found if ( jOffset.first == -1 ) occ.insert ( jOffset.second + 1); - if ( i + pattern.getContent ( ).size ( ) == subject.getContent ( ).size ( ) ) { + if ( i + 1 >= subject.getContent ( ).size ( ) ) { break; } // shift heuristics - i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( )]]; + i += bcs[subject.getContent ( )[i + 1]]; } return occ; diff --git a/alib2algo/src/tree/exact/BackwardOccurrenceTest.h b/alib2algo/src/tree/exact/BackwardOccurrenceTest.h index 329eb6dd09..3a0e46afa5 100644 --- a/alib2algo/src/tree/exact/BackwardOccurrenceTest.h +++ b/alib2algo/src/tree/exact/BackwardOccurrenceTest.h @@ -43,7 +43,7 @@ ext::pair < int, int > BackwardOccurrenceTest::occurrence ( const PrefixRankedBa int j = pattern.getContent ( ).size ( ) - 1; // offset to the subject - int offset = subjectPosition + j; + int offset = subjectPosition; while ( ( j >= 0 ) && ( offset >= 0 ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { @@ -72,7 +72,7 @@ ext::pair < int, int > BackwardOccurrenceTest::occurrence ( const PrefixRankedBa int j = pattern.getContent ( ).size ( ) - 1; // offset to the subject - int offset = subjectPosition + j; + int offset = subjectPosition; while ( ( j >= 0 ) && ( offset >= 0 ) ) { if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { -- GitLab