From 171e15b9c1bd1c67a474a5551c78a95555ada3eb Mon Sep 17 00:00:00 2001 From: Tomas Capek <tomas@capek.io> Date: Tue, 1 May 2018 13:49:34 +0200 Subject: [PATCH] Implement simulation for aproximate string matching using dynamic programming and Hamming distance. --- .../simulations/HammingDynamicProgramming.h | 70 +++++++++++++++++++ .../HammingDynamicProgrammingTest.cpp | 48 +++++++++++++ .../HammingDynamicProgrammingTest.h | 19 +++++ 3 files changed, 137 insertions(+) create mode 100644 alib2algo/src/stringology/simulations/HammingDynamicProgramming.h create mode 100644 alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.cpp create mode 100644 alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.h diff --git a/alib2algo/src/stringology/simulations/HammingDynamicProgramming.h b/alib2algo/src/stringology/simulations/HammingDynamicProgramming.h new file mode 100644 index 0000000000..c4de7edb15 --- /dev/null +++ b/alib2algo/src/stringology/simulations/HammingDynamicProgramming.h @@ -0,0 +1,70 @@ +/* + * HammingDynamicProgramming.h + * + * Created on: 1.5.2018 + * Author: Tomas Capek + */ + +#ifndef _HAMMING_DYNAMIC_PROGRAMMING_H__ +#define _HAMMING_DYNAMIC_PROGRAMMING_H__ + +#include <string/LinearString.h> + +namespace stringology { + +namespace simulations { + +class HammingDynamicProgramming { +public: + template <class SymbolType> + static ext::vector<ext::vector<unsigned int>> compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors); + + template <class SymbolType> + static ext::vector<unsigned int> search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors); + +}; + +template <class SymbolType> +ext::vector<ext::vector<unsigned int>> HammingDynamicProgramming::compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors) { + ext::vector< ext::vector <unsigned int> > table = ext::vector<ext::vector<unsigned int> > (text.getContent().size() + 1, ext::vector<unsigned int>(pattern.getContent().size() + 1, 0)); + + for(unsigned int j=1; j<=pattern.getContent().size(); j++) { + table[0][j] = errors + 1; + } + + for(unsigned int i = 0; i<text.getContent().size(); i++) { + for(unsigned int j = 0; j<pattern.getContent().size(); j++) { + if (pattern.getContent()[j] == text.getContent()[i]) { + table[i+1][j+1] = table[i][j]; + } else { + table[i+1][j+1] = table[i][j] + 1; + } + } + } + + + return table; +} + +template <class SymbolType> +ext::vector<unsigned int> HammingDynamicProgramming::search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors) { + auto table = HammingDynamicProgramming::compute_table(text, pattern, errors); + + ext::vector<unsigned int> result = ext::vector<unsigned int>(); + + for(unsigned int i=1; i<=text.getContent().size(); i++) { + if (table[i][pattern.getContent().size()] <= errors) { + result.push_back(i - pattern.getContent().size()); + } + } + + return result; +} + + + +} // namespace simulations + +} // namespace stringology + +#endif /* _HAMMING_DYNAMIC_PROGRAMMING_H__ */ diff --git a/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.cpp b/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.cpp new file mode 100644 index 0000000000..2039f8ec2f --- /dev/null +++ b/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.cpp @@ -0,0 +1,48 @@ +#include "HammingDynamicProgrammingTest.h" + +#include <string/LinearString.h> +#include <stringology/simulations/HammingDynamicProgramming.h> + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( HammingDynamicProgrammingTest, "dynamic programming" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( HammingDynamicProgrammingTest ); + +void HammingDynamicProgrammingTest::testTableConstruction() { + auto text = string::LinearString<>("adcabcaabadbbca"); + auto pattern = string::LinearString<>("adbbca"); + + ext::vector<ext::vector<unsigned int>> expected_result = { + ext::vector<unsigned int>({0, 4, 4, 4, 4, 4, 4}), + ext::vector<unsigned int>({0, 0, 5, 5, 5, 5, 4}), + ext::vector<unsigned int>({0, 1, 0, 6, 6, 6, 6}), + ext::vector<unsigned int>({0, 1, 2, 1, 7, 6, 7}), + ext::vector<unsigned int>({0, 0, 2, 3, 2, 8, 6}), + ext::vector<unsigned int>({0, 1, 1, 2, 3, 3, 9}), + ext::vector<unsigned int>({0, 1, 2, 2, 3, 3, 4}), + ext::vector<unsigned int>({0, 0, 2, 3, 3, 4, 3}), + ext::vector<unsigned int>({0, 0, 1, 3, 4, 4, 4}), + ext::vector<unsigned int>({0, 1, 1, 1, 3, 5, 5}), + ext::vector<unsigned int>({0, 0, 2, 2, 2, 4, 5}), + ext::vector<unsigned int>({0, 1, 0, 3, 3, 3, 5}), + ext::vector<unsigned int>({0, 1, 2, 0, 3, 4, 4}), + ext::vector<unsigned int>({0, 1, 2, 2, 0, 4, 5}), + ext::vector<unsigned int>({0, 1, 2, 3, 3, 0, 5}), + ext::vector<unsigned int>({0, 0, 2, 3, 4, 4, 0}), + }; + + CPPUNIT_ASSERT(expected_result == stringology::simulations::HammingDynamicProgramming::compute_table(text, pattern, 3)); +} + +void HammingDynamicProgrammingTest::testSearching() { + auto text = string::LinearString<>("patternpettannbalastpettern"); + auto pattern = string::LinearString<>("pattern"); + + auto expected_result = ext::vector<unsigned int>({0, 7, 20}); + auto result = stringology::simulations::HammingDynamicProgramming::search(text, pattern, 3); + + CPPUNIT_ASSERT(expected_result == result); +} + + +void HammingDynamicProgrammingTest::setUp() { } + +void HammingDynamicProgrammingTest::tearDown() { } diff --git a/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.h b/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.h new file mode 100644 index 0000000000..87ce0f0e4b --- /dev/null +++ b/alib2algo/test-src/stringology/simulations/HammingDynamicProgrammingTest.h @@ -0,0 +1,19 @@ +#ifndef HAMMING_DYNAMIC_PROGRAMMING_TEST_H_ +#define HAMMING_DYNAMIC_PROGRAMMING_TEST_H_ + +#include <cppunit/extensions/HelperMacros.h> + +class HammingDynamicProgrammingTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(HammingDynamicProgrammingTest); + CPPUNIT_TEST(testTableConstruction); + CPPUNIT_TEST(testSearching); + CPPUNIT_TEST_SUITE_END(); + +public: + void setUp ( ); + void tearDown ( ); + + void testTableConstruction(); + void testSearching(); +}; +#endif // HAMMING_DYNAMIC_PROGRAMMING_TEST_H_ -- GitLab