From 891846d944d0e550185a4b6b20ec5366888ce448 Mon Sep 17 00:00:00 2001 From: Tomas Capek <tomas@capek.io> Date: Tue, 1 May 2018 16:51:28 +0200 Subject: [PATCH] Implement simulation for aproximate string matching using dynamic programming and Levenshtein distance. --- .../LevenshteinDynamicProgramming.h | 73 +++++++++++++++++++ ...lizedLevenshteinDynamicProgrammingTest.cpp | 23 ++++++ ...ralizedLevenshteinDynamicProgrammingTest.h | 17 +++++ .../LevenshteinDynamicProgrammingTest.cpp | 39 ++++++++++ .../LevenshteinDynamicProgrammingTest.h | 17 +++++ 5 files changed, 169 insertions(+) create mode 100644 alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h create mode 100644 alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp create mode 100644 alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h create mode 100644 alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp create mode 100644 alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.h diff --git a/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h b/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h new file mode 100644 index 0000000000..3c02137536 --- /dev/null +++ b/alib2algo/src/stringology/simulations/LevenshteinDynamicProgramming.h @@ -0,0 +1,73 @@ +/* + * LevenshteinDynamicProgramming.h + * + * Created on: 1.5.2018 + * Author: Tomas Capek + */ + +#ifndef _LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__ +#define _LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__ + +#include <algorithm> + +#include <string/LinearString.h> + +namespace stringology { + +namespace simulations { + +class LevenshteinDynamicProgramming { +public: + template <class SymbolType> + static ext::vector<ext::vector<unsigned int>> compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern); + +}; + +#include <iostream> + +template <class SymbolType> +ext::vector<ext::vector<unsigned int>> LevenshteinDynamicProgramming::compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern) { + ext::vector< ext::vector <unsigned int> > table = + ext::vector<ext::vector<unsigned int> > ( + pattern.getContent().size() + 1, + ext::vector<unsigned int>(text.getContent().size() + 1, 0) + ); + + for(unsigned int j = 0; j <= pattern.getContent().size(); j++) { + table[j][0] = j; + } + + for(unsigned int i = 1; i<=text.getContent().size(); i++) { + for(unsigned int j = 1; j<=pattern.getContent().size(); j++) { + unsigned int value_a; + if(pattern.getContent()[j-1] == text.getContent()[i-1]) { + value_a = table[j-1][i-1]; + } else { + value_a = table[j-1][i-1] + 1; + } + + unsigned int value_b; + if(j < pattern.getContent().size()) { + value_b = table[j][i-1] + 1; + } else { + value_b = table[j-1][i] + 1; + } + + table[j][i] = std::min({value_a, value_b}); + } + } + + for(const auto & row : table) { + std::cout << row << std::endl; + } + + return table; +} + + + +} // namespace simulations + +} // namespace stringology + +#endif /* _LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__ */ diff --git a/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp new file mode 100644 index 0000000000..229f053d3c --- /dev/null +++ b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.cpp @@ -0,0 +1,23 @@ +#include "GeneralizedLevenshteinDynamicProgrammingTest.h" + +#include <string/LinearString.h> +#include <stringology/simulations/GeneralizedLevenshteinDynamicProgramming.h> + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( GeneralizedLevenshteinDynamicProgrammingTest, "dynamic programming" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( GeneralizedLevenshteinDynamicProgrammingTest ); + +void GeneralizedLevenshteinDynamicProgrammingTest::testTableConstruction() { + auto text = string::LinearString<>("adbcbaabadbbca"); + auto pattern = string::LinearString<>("adbbca"); + + ext::vector<ext::vector<unsigned int>> expected_result = { + ext::vector<unsigned int>({}), + }; + + CPPUNIT_ASSERT(expected_result == stringology::simulations::GeneralizedLevenshteinDynamicProgramming::compute_table(text, pattern, 3)); +} + + +void GeneralizedLevenshteinDynamicProgrammingTest::setUp() { } + +void GeneralizedLevenshteinDynamicProgrammingTest::tearDown() { } diff --git a/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h new file mode 100644 index 0000000000..04d54222db --- /dev/null +++ b/alib2algo/test-src/stringology/simulations/GeneralizedLevenshteinDynamicProgrammingTest.h @@ -0,0 +1,17 @@ +#ifndef GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_ +#define GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_ + +#include <cppunit/extensions/HelperMacros.h> + +class GeneralizedLevenshteinDynamicProgrammingTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(GeneralizedLevenshteinDynamicProgrammingTest); + CPPUNIT_TEST(testTableConstruction); + CPPUNIT_TEST_SUITE_END(); + +public: + void setUp ( ); + void tearDown ( ); + + void testTableConstruction(); +}; +#endif // LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_ diff --git a/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp new file mode 100644 index 0000000000..804fa8f365 --- /dev/null +++ b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.cpp @@ -0,0 +1,39 @@ +#include "LevenshteinDynamicProgrammingTest.h" + +#include <string/LinearString.h> +#include <stringology/simulations/LevenshteinDynamicProgramming.h> + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( LevenshteinDynamicProgrammingTest, "dynamic programming" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( LevenshteinDynamicProgrammingTest ); + +void LevenshteinDynamicProgrammingTest::testTableConstruction() { + auto text = string::LinearString<>("adcabcaabadbbca"); + auto pattern = string::LinearString<>("adbbca"); + + ext::vector<ext::vector<unsigned int>> expected_result = { + ext::vector<unsigned int>({0, 1, 2, 3, 4, 5, 6}), + ext::vector<unsigned int>({0, 0, 1, 2, 3, 4, 5}), + ext::vector<unsigned int>({0, 1, 0, 1, 2, 3, 4}), + ext::vector<unsigned int>({0, 1, 1, 1, 2, 2, 3}), + ext::vector<unsigned int>({0, 0, 1, 2, 2, 3, 2}), + ext::vector<unsigned int>({0, 1, 1, 1, 2, 3, 4}), + ext::vector<unsigned int>({0, 1, 2, 2, 2, 2, 3}), + ext::vector<unsigned int>({0, 0, 1, 2, 3, 3, 2}), + ext::vector<unsigned int>({0, 0, 1, 2, 3, 4, 3}), + ext::vector<unsigned int>({0, 1, 1, 1, 2, 3, 4}), + ext::vector<unsigned int>({0, 0, 1, 2, 2, 3, 3}), + ext::vector<unsigned int>({0, 1, 0, 1, 2, 3, 4}), + ext::vector<unsigned int>({0, 1, 1, 0, 1, 2, 3}), + ext::vector<unsigned int>({0, 1, 2, 1, 0, 1, 2}), + ext::vector<unsigned int>({0, 1, 2, 2, 1, 0, 1}), + ext::vector<unsigned int>({0, 0, 1, 2, 2, 1, 0}), + + }; + + CPPUNIT_ASSERT(expected_result == stringology::simulations::LevenshteinDynamicProgramming::compute_table(text, pattern)); +} + + +void LevenshteinDynamicProgrammingTest::setUp() { } + +void LevenshteinDynamicProgrammingTest::tearDown() { } diff --git a/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.h b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.h new file mode 100644 index 0000000000..6dade7594b --- /dev/null +++ b/alib2algo/test-src/stringology/simulations/LevenshteinDynamicProgrammingTest.h @@ -0,0 +1,17 @@ +#ifndef LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_ +#define LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_ + +#include <cppunit/extensions/HelperMacros.h> + +class LevenshteinDynamicProgrammingTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(LevenshteinDynamicProgrammingTest); + CPPUNIT_TEST(testTableConstruction); + CPPUNIT_TEST_SUITE_END(); + +public: + void setUp ( ); + void tearDown ( ); + + void testTableConstruction(); +}; +#endif // LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_ -- GitLab