From b13ebefe3dd80dd9ff0b44a1bad9b657ca809a82 Mon Sep 17 00:00:00 2001
From: Tomas Capek <tomas@capek.io>
Date: Thu, 3 May 2018 16:50:25 +0200
Subject: [PATCH] Implement simulation for aproximate string matching using
 dynamic programming and General Levenshtein distance.

---
 ...GeneralizedLevenshteinDynamicProgramming.h | 89 +++++++++++++++++++
 ...lizedLevenshteinDynamicProgrammingTest.cpp | 19 +++-
 ...ralizedLevenshteinDynamicProgrammingTest.h |  2 +
 3 files changed, 108 insertions(+), 2 deletions(-)
 create mode 100644 alib2algo/src/stringology/simulations/GeneralizedLevenshteinDynamicProgramming.h

diff --git a/alib2algo/src/stringology/simulations/GeneralizedLevenshteinDynamicProgramming.h b/alib2algo/src/stringology/simulations/GeneralizedLevenshteinDynamicProgramming.h
new file mode 100644
index 0000000000..e4867a2ee0
--- /dev/null
+++ b/alib2algo/src/stringology/simulations/GeneralizedLevenshteinDynamicProgramming.h
@@ -0,0 +1,89 @@
+/*
+ *  LevenshteinDynamicProgramming.h
+ *
+ *  Created on: 1.5.2018
+ *      Author: Tomas Capek
+ */
+
+#ifndef _GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__
+#define _GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__
+
+#include <algorithm>
+#include <limits.h>
+
+#include <string/LinearString.h>
+
+namespace stringology {
+
+namespace simulations {
+
+class GeneralizedLevenshteinDynamicProgramming {
+public:
+    template <class SymbolType>
+    static ext::vector<ext::vector<unsigned int>> compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern);
+
+    template <class SymbolType>
+    static ext::set<unsigned int> search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors);
+};
+
+template <class SymbolType>
+ext::vector<ext::vector<unsigned int>> GeneralizedLevenshteinDynamicProgramming::compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern) {
+  ext::vector< ext::vector <unsigned int> > table =
+    ext::vector<ext::vector<unsigned int> > (
+      pattern.getContent().size() + 1,
+      ext::vector<unsigned int>(text.getContent().size() + 1, 0)
+    );
+
+  for(unsigned int j = 0; j <= pattern.getContent().size(); j++) {
+    table[j][0] = j;
+  }
+
+  for(unsigned int i = 1; i<=text.getContent().size(); i++) {
+    for(unsigned int j = 1; j<=pattern.getContent().size(); j++) {
+      unsigned int value_a;
+      if(pattern.getContent()[j-1] == text.getContent()[i-1]) {
+        value_a = table[j-1][i-1];
+      } else {
+        value_a = table[j-1][i-1] + 1;
+      }
+
+      unsigned int value_b = UINT_MAX;
+      if(j < pattern.getContent().size()) {
+        value_b = table[j][i-1] + 1;
+      }
+
+      value_b = std::min(table[j-1][i] + 1, value_b);
+
+      unsigned int value_c = UINT_MAX;
+      if(j>1 && i>1 && pattern.getContent()[j-2] == text.getContent()[i-1] && pattern.getContent()[j-1] == text.getContent()[i-2]) {
+        value_c = table[j-2][i-2] + 1;
+      }
+
+      table[j][i] = std::min({value_a, value_b, value_c});
+    }
+  }
+
+  return table;
+}
+
+template <class SymbolType>
+ext::set<unsigned int> GeneralizedLevenshteinDynamicProgramming::search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors) {
+  auto table = GeneralizedLevenshteinDynamicProgramming::compute_table(text, pattern);
+
+  ext::set<unsigned int> result;
+
+  for(unsigned int i = 0; i<= text.getContent().size(); i++) {
+    if(table[pattern.getContent().size()][i] <= errors) {
+      result.insert(i-1);
+    }
+  }
+
+  return result;
+}
+
+
+} // namespace simulations
+
+} // namespace stringology
+
+#endif /* _GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__ */
diff --git a/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp
index 229f053d3c..142e32686f 100644
--- a/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp
+++ b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp
@@ -11,12 +11,27 @@ void GeneralizedLevenshteinDynamicProgrammingTest::testTableConstruction() {
   auto pattern = string::LinearString<>("adbbca");
 
   ext::vector<ext::vector<unsigned int>> expected_result = {
-    ext::vector<unsigned int>({}),
+    ext::vector<unsigned int>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    ext::vector<unsigned int>({1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0}),
+    ext::vector<unsigned int>({2, 1, 0, 1, 2, 2, 1, 1, 1, 1, 0, 1, 2, 2, 1}),
+    ext::vector<unsigned int>({3, 2, 1, 0, 1, 2, 2, 2, 1, 2, 1, 0, 1, 2, 2}),
+    ext::vector<unsigned int>({4, 3, 2, 1, 1, 1, 2, 3, 2, 2, 2, 1, 0, 1, 2}),
+    ext::vector<unsigned int>({5, 4, 3, 2, 1, 1, 2, 3, 3, 3, 3, 2, 1, 0, 1}),
+    ext::vector<unsigned int>({6, 5, 4, 3, 2, 2, 1, 2, 4, 3, 4, 3, 2, 1, 0}),
   };
 
-  CPPUNIT_ASSERT(expected_result == stringology::simulations::GeneralizedLevenshteinDynamicProgramming::compute_table(text, pattern, 3));
+  CPPUNIT_ASSERT(expected_result == stringology::simulations::GeneralizedLevenshteinDynamicProgramming::compute_table(text, pattern));
 }
 
+void GeneralizedLevenshteinDynamicProgrammingTest::testSearch() {
+  auto text = string::LinearString<>("adbcbaabadbbca");
+  auto pattern = string::LinearString<>("adbbca");
+
+  ext::set<unsigned int> expected_result = {2, 3, 4, 5, 6, 8, 10, 11, 12, 13};
+  auto result = stringology::simulations::GeneralizedLevenshteinDynamicProgramming::search(text, pattern, 3);
+
+  CPPUNIT_ASSERT(expected_result == result);
+}
 
 void GeneralizedLevenshteinDynamicProgrammingTest::setUp() { }
 
diff --git a/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h
index 04d54222db..3c044661d4 100644
--- a/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h
+++ b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h
@@ -6,6 +6,7 @@
 class GeneralizedLevenshteinDynamicProgrammingTest : public CppUnit::TestFixture {
 	CPPUNIT_TEST_SUITE(GeneralizedLevenshteinDynamicProgrammingTest);
     CPPUNIT_TEST(testTableConstruction);
+		CPPUNIT_TEST(testSearch);
   CPPUNIT_TEST_SUITE_END();
 
 public:
@@ -13,5 +14,6 @@ public:
     void tearDown ( );
 
     void testTableConstruction();
+		void testSearch();
 };
 #endif // LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_
-- 
GitLab