From 891846d944d0e550185a4b6b20ec5366888ce448 Mon Sep 17 00:00:00 2001
From: Tomas Capek <tomas@capek.io>
Date: Tue, 1 May 2018 16:51:28 +0200
Subject: [PATCH] Implement simulation for aproximate string matching using
 dynamic programming and Levenshtein distance.

---
 .../LevenshteinDynamicProgramming.h           | 73 +++++++++++++++++++
 ...lizedLevenshteinDynamicProgrammingTest.cpp | 23 ++++++
 ...ralizedLevenshteinDynamicProgrammingTest.h | 17 +++++
 .../LevenshteinDynamicProgrammingTest.cpp     | 39 ++++++++++
 .../LevenshteinDynamicProgrammingTest.h       | 17 +++++
 5 files changed, 169 insertions(+)
 create mode 100644 alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h
 create mode 100644 alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp
 create mode 100644 alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h
 create mode 100644 alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp
 create mode 100644 alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.h

diff --git a/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h b/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h
new file mode 100644
index 0000000000..3c02137536
--- /dev/null
+++ b/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h
@@ -0,0 +1,73 @@
+/*
+ *  LevenshteinDynamicProgramming.h
+ *
+ *  Created on: 1.5.2018
+ *      Author: Tomas Capek
+ */
+
+#ifndef _LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__
+#define _LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__
+
+#include <algorithm>
+
+#include <string/LinearString.h>
+
+namespace stringology {
+
+namespace simulations {
+
+class LevenshteinDynamicProgramming {
+public:
+    template <class SymbolType>
+    static ext::vector<ext::vector<unsigned int>> compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern);
+
+};
+
+#include <iostream>
+
+template <class SymbolType>
+ext::vector<ext::vector<unsigned int>> LevenshteinDynamicProgramming::compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern) {
+  ext::vector< ext::vector <unsigned int> > table =
+    ext::vector<ext::vector<unsigned int> > (
+      pattern.getContent().size() + 1,
+      ext::vector<unsigned int>(text.getContent().size() + 1, 0)
+    );
+
+  for(unsigned int j = 0; j <= pattern.getContent().size(); j++) {
+    table[j][0] = j;
+  }
+
+  for(unsigned int i = 1; i<=text.getContent().size(); i++) {
+    for(unsigned int j = 1; j<=pattern.getContent().size(); j++) {
+      unsigned int value_a;
+      if(pattern.getContent()[j-1] == text.getContent()[i-1]) {
+        value_a = table[j-1][i-1];
+      } else {
+        value_a = table[j-1][i-1] + 1;
+      }
+
+      unsigned int value_b;
+      if(j < pattern.getContent().size()) {
+        value_b = table[j][i-1] + 1;
+      } else {
+        value_b = table[j-1][i] + 1;
+      }
+
+      table[j][i] = std::min({value_a, value_b});
+    }
+  }
+
+  for(const auto & row : table) {
+    std::cout << row << std::endl;
+  }
+
+  return table;
+}
+
+
+
+} // namespace simulations
+
+} // namespace stringology
+
+#endif /* _LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__ */
diff --git a/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp
new file mode 100644
index 0000000000..229f053d3c
--- /dev/null
+++ b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp
@@ -0,0 +1,23 @@
+#include "GeneralizedLevenshteinDynamicProgrammingTest.h"
+
+#include <string/LinearString.h>
+#include <stringology/simulations/GeneralizedLevenshteinDynamicProgramming.h>
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( GeneralizedLevenshteinDynamicProgrammingTest, "dynamic programming" );
+CPPUNIT_TEST_SUITE_REGISTRATION ( GeneralizedLevenshteinDynamicProgrammingTest );
+
+void GeneralizedLevenshteinDynamicProgrammingTest::testTableConstruction() {
+  auto text = string::LinearString<>("adbcbaabadbbca");
+  auto pattern = string::LinearString<>("adbbca");
+
+  ext::vector<ext::vector<unsigned int>> expected_result = {
+    ext::vector<unsigned int>({}),
+  };
+
+  CPPUNIT_ASSERT(expected_result == stringology::simulations::GeneralizedLevenshteinDynamicProgramming::compute_table(text, pattern, 3));
+}
+
+
+void GeneralizedLevenshteinDynamicProgrammingTest::setUp() { }
+
+void GeneralizedLevenshteinDynamicProgrammingTest::tearDown() { }
diff --git a/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h
new file mode 100644
index 0000000000..04d54222db
--- /dev/null
+++ b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h
@@ -0,0 +1,17 @@
+#ifndef GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_
+#define GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_
+
+#include <cppunit/extensions/HelperMacros.h>
+
+class GeneralizedLevenshteinDynamicProgrammingTest : public CppUnit::TestFixture {
+	CPPUNIT_TEST_SUITE(GeneralizedLevenshteinDynamicProgrammingTest);
+    CPPUNIT_TEST(testTableConstruction);
+  CPPUNIT_TEST_SUITE_END();
+
+public:
+    void setUp ( );
+    void tearDown ( );
+
+    void testTableConstruction();
+};
+#endif // LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_
diff --git a/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp
new file mode 100644
index 0000000000..804fa8f365
--- /dev/null
+++ b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp
@@ -0,0 +1,39 @@
+#include "LevenshteinDynamicProgrammingTest.h"
+
+#include <string/LinearString.h>
+#include <stringology/simulations/LevenshteinDynamicProgramming.h>
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( LevenshteinDynamicProgrammingTest, "dynamic programming" );
+CPPUNIT_TEST_SUITE_REGISTRATION ( LevenshteinDynamicProgrammingTest );
+
+void LevenshteinDynamicProgrammingTest::testTableConstruction() {
+  auto text = string::LinearString<>("adcabcaabadbbca");
+  auto pattern = string::LinearString<>("adbbca");
+
+  ext::vector<ext::vector<unsigned int>> expected_result = {
+    ext::vector<unsigned int>({0, 1, 2, 3, 4, 5, 6}),
+    ext::vector<unsigned int>({0, 0, 1, 2, 3, 4, 5}),
+    ext::vector<unsigned int>({0, 1, 0, 1, 2, 3, 4}),
+    ext::vector<unsigned int>({0, 1, 1, 1, 2, 2, 3}),
+    ext::vector<unsigned int>({0, 0, 1, 2, 2, 3, 2}),
+    ext::vector<unsigned int>({0, 1, 1, 1, 2, 3, 4}),
+    ext::vector<unsigned int>({0, 1, 2, 2, 2, 2, 3}),
+    ext::vector<unsigned int>({0, 0, 1, 2, 3, 3, 2}),
+    ext::vector<unsigned int>({0, 0, 1, 2, 3, 4, 3}),
+    ext::vector<unsigned int>({0, 1, 1, 1, 2, 3, 4}),
+    ext::vector<unsigned int>({0, 0, 1, 2, 2, 3, 3}),
+    ext::vector<unsigned int>({0, 1, 0, 1, 2, 3, 4}),
+    ext::vector<unsigned int>({0, 1, 1, 0, 1, 2, 3}),
+    ext::vector<unsigned int>({0, 1, 2, 1, 0, 1, 2}),
+    ext::vector<unsigned int>({0, 1, 2, 2, 1, 0, 1}),
+    ext::vector<unsigned int>({0, 0, 1, 2, 2, 1, 0}),
+
+  };
+
+  CPPUNIT_ASSERT(expected_result == stringology::simulations::LevenshteinDynamicProgramming::compute_table(text, pattern));
+}
+
+
+void LevenshteinDynamicProgrammingTest::setUp() { }
+
+void LevenshteinDynamicProgrammingTest::tearDown() { }
diff --git a/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.h b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.h
new file mode 100644
index 0000000000..6dade7594b
--- /dev/null
+++ b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.h
@@ -0,0 +1,17 @@
+#ifndef LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_
+#define LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_
+
+#include <cppunit/extensions/HelperMacros.h>
+
+class LevenshteinDynamicProgrammingTest : public CppUnit::TestFixture {
+	CPPUNIT_TEST_SUITE(LevenshteinDynamicProgrammingTest);
+    CPPUNIT_TEST(testTableConstruction);
+  CPPUNIT_TEST_SUITE_END();
+
+public:
+    void setUp ( );
+    void tearDown ( );
+
+    void testTableConstruction();
+};
+#endif // LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_
-- 
GitLab