From f0c491da6b98d1981519965cf518f9ea452642dc Mon Sep 17 00:00:00 2001 From: Tomas Capek <tomas@capek.io> Date: Tue, 1 May 2018 16:51:28 +0200 Subject: [PATCH] Implement simulation for aproximate string matching using dynamic programming and Levenshtein distance. --- .../LevenshteinDynamicProgramming.h | 8 ++++--- .../LevenshteinDynamicProgrammingTest.cpp | 24 ++++++------------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h b/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h index 3c02137536..54a881e4c1 100644 --- a/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h +++ b/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h @@ -9,6 +9,7 @@ #define _LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__ #include <algorithm> +#include <limits.h> #include <string/LinearString.h> @@ -46,13 +47,14 @@ ext::vector<ext::vector<unsigned int>> LevenshteinDynamicProgramming::compute_ta value_a = table[j-1][i-1] + 1; } - unsigned int value_b; + unsigned int value_b = UINT_MAX; if(j < pattern.getContent().size()) { value_b = table[j][i-1] + 1; - } else { - value_b = table[j-1][i] + 1; } + value_b = std::min(table[j-1][i] + 1, value_b); + + table[j][i] = std::min({value_a, value_b}); } } diff --git a/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp index 804fa8f365..a87f8f14cc 100644 --- a/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp +++ b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp @@ -11,23 +11,13 @@ void LevenshteinDynamicProgrammingTest::testTableConstruction() { auto pattern = string::LinearString<>("adbbca"); ext::vector<ext::vector<unsigned int>> expected_result = { - ext::vector<unsigned int>({0, 1, 2, 3, 4, 5, 6}), - ext::vector<unsigned int>({0, 0, 1, 2, 3, 4, 5}), - ext::vector<unsigned int>({0, 1, 0, 1, 2, 3, 4}), - ext::vector<unsigned int>({0, 1, 1, 1, 2, 2, 3}), - ext::vector<unsigned int>({0, 0, 1, 2, 2, 3, 2}), - ext::vector<unsigned int>({0, 1, 1, 1, 2, 3, 4}), - ext::vector<unsigned int>({0, 1, 2, 2, 2, 2, 3}), - ext::vector<unsigned int>({0, 0, 1, 2, 3, 3, 2}), - ext::vector<unsigned int>({0, 0, 1, 2, 3, 4, 3}), - ext::vector<unsigned int>({0, 1, 1, 1, 2, 3, 4}), - ext::vector<unsigned int>({0, 0, 1, 2, 2, 3, 3}), - ext::vector<unsigned int>({0, 1, 0, 1, 2, 3, 4}), - ext::vector<unsigned int>({0, 1, 1, 0, 1, 2, 3}), - ext::vector<unsigned int>({0, 1, 2, 1, 0, 1, 2}), - ext::vector<unsigned int>({0, 1, 2, 2, 1, 0, 1}), - ext::vector<unsigned int>({0, 0, 1, 2, 2, 1, 0}), - + ext::vector<unsigned int>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), + ext::vector<unsigned int>({1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0}), + ext::vector<unsigned int>({2, 1, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 2, 2, 1}), + ext::vector<unsigned int>({3, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 1, 0, 1, 2, 2}), + ext::vector<unsigned int>({4, 3, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 1, 0, 1, 2}), + ext::vector<unsigned int>({5, 4, 3, 2, 3, 3, 2, 3, 4, 3, 3, 3, 2, 1, 0, 1}), + ext::vector<unsigned int>({6, 5, 4, 3, 2, 4, 3, 2, 3, 4, 3, 4, 3, 2, 1, 0}), }; CPPUNIT_ASSERT(expected_result == stringology::simulations::LevenshteinDynamicProgramming::compute_table(text, pattern)); -- GitLab