From 6efe67335d5e6434d5f3cdfbbd619da8db2b5139 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Capek?= <tomas@capek.io>
Date: Mon, 30 Apr 2018 18:21:21 +0200
Subject: [PATCH] Implement simulation for aproximate string matching using bit
 paralelism and Levenshtein distance.

---
 .../simulations/LevenshteinBitParalelism.h    | 84 +++++++++++++++++++
 .../LevenshteinBitParalelismTest.cpp          | 20 +++++
 .../LevenshteinBitParalelismTest.h            | 17 ++++
 3 files changed, 121 insertions(+)
 create mode 100644 alib2algo/src/stringology/simulations/LevenshteinBitParalelism.h
 create mode 100644 alib2algo/test-src/stringology/simulations/LevenshteinBitParalelismTest.cpp
 create mode 100644 alib2algo/test-src/stringology/simulations/LevenshteinBitParalelismTest.h

diff --git a/alib2algo/src/stringology/simulations/LevenshteinBitParalelism.h b/alib2algo/src/stringology/simulations/LevenshteinBitParalelism.h
new file mode 100644
index 0000000000..58e7f3b600
--- /dev/null
+++ b/alib2algo/src/stringology/simulations/LevenshteinBitParalelism.h
@@ -0,0 +1,84 @@
+/*
+ *  LevenshteinBitParalelism.h
+ *
+ *  Created on: 30.4.2018
+ *      Author: Tomas Capek
+ */
+
+#ifndef _LEVENSHTEIN_BIT_PARALELISM_H__
+#define _LEVENSHTEIN_BIT_PARALELISM_H__
+
+#include <exception>
+#include <string/LinearString.h>
+
+#include "BitParalelism.h"
+
+namespace stringology {
+
+namespace simulations {
+
+class LevenshteinBitParalelism {
+public:
+    template <class SymbolType>
+    static ext::vector<unsigned int> search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors);
+};
+
+
+template <class SymbolType>
+ext::vector<unsigned int> LevenshteinBitParalelism::search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors) {
+  // preparation stage
+  ext::set<SymbolType> common_alphabet = text.getAlphabet();
+  common_alphabet.insert(pattern.getAlphabet().begin(), pattern.getAlphabet().end());
+
+  ext::map<SymbolType, ext::vector<bool> > D_vectors = BitParalelism::constructDVectors(common_alphabet, pattern);
+
+  auto V_vector = ext::vector<bool>(pattern.getContent().size(), 0);
+  V_vector[pattern.getContent().size() - 1] = 1;
+
+  // computation part
+  ext::vector<unsigned int> result;
+
+  ext::vector<ext::vector<bool> > B_vectors;
+  for(unsigned int i=0; i<=errors; i++) {
+    B_vectors.push_back(ext::vector<bool>(pattern.getContent().size(), 0));
+  }
+
+  for(unsigned int l = 0; l <= errors; l++) {
+    for(unsigned int j = l; j <= pattern.getContent().size(); j++) {
+      B_vectors[l][j] = 1;
+    }
+  }
+
+  for(unsigned int i=0; i<text.getContent().size(); i++) {
+    ext::vector< ext::vector<bool> > previous_B_vectors = B_vectors;
+
+    B_vectors[0] = (B_vectors[0] << 1) | D_vectors[text.getContent()[i]];
+
+    for(unsigned int j=1; j<=errors; j++) {
+      B_vectors[j] = ((previous_B_vectors[j] << 1) | D_vectors[text.getContent()[i]]) &
+                     ( (previous_B_vectors[j-1] & B_vectors[j-1]) << 1) &
+                     ( previous_B_vectors[j-1] | V_vector );
+    }
+
+    for (const auto & data : B_vectors) {
+      if(data[pattern.getContent().size()-1] == false) {
+        if (i < pattern.getContent().size()) {
+          if (result.size() == 0) {
+            result.push_back(0);
+          }
+        } else {
+          result.push_back(i - pattern.getContent().size() + 1);
+        }
+        break;
+      }
+    }
+  }
+
+  return result;
+}
+
+} // namespace simulations
+
+} // namespace stringology
+
+#endif /* _LEVENSHTEIN_BIT_PARALELISM_H__ */
diff --git a/alib2algo/test-src/stringology/simulations/LevenshteinBitParalelismTest.cpp b/alib2algo/test-src/stringology/simulations/LevenshteinBitParalelismTest.cpp
new file mode 100644
index 0000000000..c8618a3b83
--- /dev/null
+++ b/alib2algo/test-src/stringology/simulations/LevenshteinBitParalelismTest.cpp
@@ -0,0 +1,20 @@
+#include "LevenshteinBitParalelismTest.h"
+
+#include <string/LinearString.h>
+#include <stringology/simulations/LevenshteinBitParalelism.h>
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( LevenshteinBitParalelismTest, "bit paralelism" );
+CPPUNIT_TEST_SUITE_REGISTRATION ( LevenshteinBitParalelismTest );
+
+void LevenshteinBitParalelismTest::testSimple() {
+  auto text = string::LinearString<>("adcabcaabadbbca");
+  auto pattern = string::LinearString<>("adbbca");
+
+  ext::vector<unsigned int> expected_result = {0, 1, 2, 4, 6, 7, 8, 9};
+  auto result = stringology::simulations::LevenshteinBitParalelism::search(text, pattern, 3);
+  CPPUNIT_ASSERT(expected_result == result);
+}
+
+void LevenshteinBitParalelismTest::setUp() { }
+
+void LevenshteinBitParalelismTest::tearDown() { }
diff --git a/alib2algo/test-src/stringology/simulations/LevenshteinBitParalelismTest.h b/alib2algo/test-src/stringology/simulations/LevenshteinBitParalelismTest.h
new file mode 100644
index 0000000000..89ad3fde39
--- /dev/null
+++ b/alib2algo/test-src/stringology/simulations/LevenshteinBitParalelismTest.h
@@ -0,0 +1,17 @@
+#ifndef LEVENSHTEIN_BIT_PARALELISM_TEST_H_
+#define LEVENSHTEIN_BIT_PARALELISM_TEST_H_
+
+#include <cppunit/extensions/HelperMacros.h>
+
+class LevenshteinBitParalelismTest : public CppUnit::TestFixture {
+	CPPUNIT_TEST_SUITE(LevenshteinBitParalelismTest);
+        CPPUNIT_TEST(testSimple);
+    CPPUNIT_TEST_SUITE_END();
+
+public:
+    void setUp ( );
+    void tearDown ( );
+
+    void testSimple();
+};
+#endif // LEVENSHTEIN_BIT_PARALELISM_TEST_H_
-- 
GitLab