From 171e15b9c1bd1c67a474a5551c78a95555ada3eb Mon Sep 17 00:00:00 2001
From: Tomas Capek <tomas@capek.io>
Date: Tue, 1 May 2018 13:49:34 +0200
Subject: [PATCH] Implement simulation for aproximate string matching using
 dynamic programming and Hamming distance.

---
 .../simulations/HammingDynamicProgramming.h   | 70 +++++++++++++++++++
 .../HammingDynamicProgrammingTest.cpp         | 48 +++++++++++++
 .../HammingDynamicProgrammingTest.h           | 19 +++++
 3 files changed, 137 insertions(+)
 create mode 100644 alib2algo/src/stringology/simulations/HammingDynamicProgramming.h
 create mode 100644 alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.cpp
 create mode 100644 alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.h

diff --git a/alib2algo/src/stringology/simulations/HammingDynamicProgramming.h b/alib2algo/src/stringology/simulations/HammingDynamicProgramming.h
new file mode 100644
index 0000000000..c4de7edb15
--- /dev/null
+++ b/alib2algo/src/stringology/simulations/HammingDynamicProgramming.h
@@ -0,0 +1,70 @@
+/*
+ *  HammingDynamicProgramming.h
+ *
+ *  Created on: 1.5.2018
+ *      Author: Tomas Capek
+ */
+
+#ifndef _HAMMING_DYNAMIC_PROGRAMMING_H__
+#define _HAMMING_DYNAMIC_PROGRAMMING_H__
+
+#include <string/LinearString.h>
+
+namespace stringology {
+
+namespace simulations {
+
+class HammingDynamicProgramming {
+public:
+    template <class SymbolType>
+    static ext::vector<ext::vector<unsigned int>> compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors);
+
+    template <class SymbolType>
+    static ext::vector<unsigned int> search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors);
+
+};
+
+template <class SymbolType>
+ext::vector<ext::vector<unsigned int>> HammingDynamicProgramming::compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors) {
+  ext::vector< ext::vector <unsigned int> > table = ext::vector<ext::vector<unsigned int> > (text.getContent().size() + 1, ext::vector<unsigned int>(pattern.getContent().size() + 1, 0));
+
+  for(unsigned int j=1; j<=pattern.getContent().size(); j++) {
+    table[0][j] = errors + 1;
+  }
+
+  for(unsigned int i = 0; i<text.getContent().size(); i++) {
+    for(unsigned int j = 0; j<pattern.getContent().size(); j++) {
+      if (pattern.getContent()[j] == text.getContent()[i]) {
+        table[i+1][j+1] = table[i][j];
+      } else {
+        table[i+1][j+1] = table[i][j] + 1;
+      }
+    }
+  }
+
+
+  return table;
+}
+
+template <class SymbolType>
+ext::vector<unsigned int> HammingDynamicProgramming::search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors) {
+  auto table = HammingDynamicProgramming::compute_table(text, pattern, errors);
+
+  ext::vector<unsigned int> result = ext::vector<unsigned int>();
+
+  for(unsigned int i=1; i<=text.getContent().size(); i++) {
+    if (table[i][pattern.getContent().size()] <= errors) {
+      result.push_back(i - pattern.getContent().size());
+    }
+  }
+
+  return result;
+}
+
+
+
+} // namespace simulations
+
+} // namespace stringology
+
+#endif /* _HAMMING_DYNAMIC_PROGRAMMING_H__ */
diff --git a/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.cpp b/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.cpp
new file mode 100644
index 0000000000..2039f8ec2f
--- /dev/null
+++ b/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.cpp
@@ -0,0 +1,48 @@
+#include "HammingDynamicProgrammingTest.h"
+
+#include <string/LinearString.h>
+#include <stringology/simulations/HammingDynamicProgramming.h>
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( HammingDynamicProgrammingTest, "dynamic programming" );
+CPPUNIT_TEST_SUITE_REGISTRATION ( HammingDynamicProgrammingTest );
+
+void HammingDynamicProgrammingTest::testTableConstruction() {
+  auto text = string::LinearString<>("adcabcaabadbbca");
+  auto pattern = string::LinearString<>("adbbca");
+
+  ext::vector<ext::vector<unsigned int>> expected_result = {
+    ext::vector<unsigned int>({0, 4, 4, 4, 4, 4, 4}),
+    ext::vector<unsigned int>({0, 0, 5, 5, 5, 5, 4}),
+    ext::vector<unsigned int>({0, 1, 0, 6, 6, 6, 6}),
+    ext::vector<unsigned int>({0, 1, 2, 1, 7, 6, 7}),
+    ext::vector<unsigned int>({0, 0, 2, 3, 2, 8, 6}),
+    ext::vector<unsigned int>({0, 1, 1, 2, 3, 3, 9}),
+    ext::vector<unsigned int>({0, 1, 2, 2, 3, 3, 4}),
+    ext::vector<unsigned int>({0, 0, 2, 3, 3, 4, 3}),
+    ext::vector<unsigned int>({0, 0, 1, 3, 4, 4, 4}),
+    ext::vector<unsigned int>({0, 1, 1, 1, 3, 5, 5}),
+    ext::vector<unsigned int>({0, 0, 2, 2, 2, 4, 5}),
+    ext::vector<unsigned int>({0, 1, 0, 3, 3, 3, 5}),
+    ext::vector<unsigned int>({0, 1, 2, 0, 3, 4, 4}),
+    ext::vector<unsigned int>({0, 1, 2, 2, 0, 4, 5}),
+    ext::vector<unsigned int>({0, 1, 2, 3, 3, 0, 5}),
+    ext::vector<unsigned int>({0, 0, 2, 3, 4, 4, 0}),
+  };
+
+  CPPUNIT_ASSERT(expected_result == stringology::simulations::HammingDynamicProgramming::compute_table(text, pattern, 3));
+}
+
+void HammingDynamicProgrammingTest::testSearching() {
+  auto text = string::LinearString<>("patternpettannbalastpettern");
+  auto pattern = string::LinearString<>("pattern");
+
+  auto expected_result = ext::vector<unsigned int>({0, 7, 20});
+  auto result = stringology::simulations::HammingDynamicProgramming::search(text, pattern, 3);
+
+  CPPUNIT_ASSERT(expected_result == result);
+}
+
+
+void HammingDynamicProgrammingTest::setUp() { }
+
+void HammingDynamicProgrammingTest::tearDown() { }
diff --git a/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.h b/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.h
new file mode 100644
index 0000000000..87ce0f0e4b
--- /dev/null
+++ b/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.h
@@ -0,0 +1,19 @@
+#ifndef HAMMING_DYNAMIC_PROGRAMMING_TEST_H_
+#define HAMMING_DYNAMIC_PROGRAMMING_TEST_H_
+
+#include <cppunit/extensions/HelperMacros.h>
+
+class HammingDynamicProgrammingTest : public CppUnit::TestFixture {
+	CPPUNIT_TEST_SUITE(HammingDynamicProgrammingTest);
+    CPPUNIT_TEST(testTableConstruction);
+		CPPUNIT_TEST(testSearching);
+  CPPUNIT_TEST_SUITE_END();
+
+public:
+    void setUp ( );
+    void tearDown ( );
+
+    void testTableConstruction();
+		void testSearching();
+};
+#endif // HAMMING_DYNAMIC_PROGRAMMING_TEST_H_
-- 
GitLab