Skip to content
Snippets Groups Projects
Commit b13ebefe authored by Tomas Capek's avatar Tomas Capek
Browse files

Implement simulation for aproximate string matching using dynamic programming...

Implement simulation for aproximate string matching using dynamic programming and General Levenshtein distance.
parent f841f3e3
No related branches found
No related tags found
No related merge requests found
/*
* LevenshteinDynamicProgramming.h
*
* Created on: 1.5.2018
* Author: Tomas Capek
*/
#ifndef _GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__
#define _GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__
#include <algorithm>
#include <limits.h>
#include <string/LinearString.h>
namespace stringology {
namespace simulations {
class GeneralizedLevenshteinDynamicProgramming {
public:
template <class SymbolType>
static ext::vector<ext::vector<unsigned int>> compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern);
template <class SymbolType>
static ext::set<unsigned int> search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors);
};
template <class SymbolType>
ext::vector<ext::vector<unsigned int>> GeneralizedLevenshteinDynamicProgramming::compute_table(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern) {
ext::vector< ext::vector <unsigned int> > table =
ext::vector<ext::vector<unsigned int> > (
pattern.getContent().size() + 1,
ext::vector<unsigned int>(text.getContent().size() + 1, 0)
);
for(unsigned int j = 0; j <= pattern.getContent().size(); j++) {
table[j][0] = j;
}
for(unsigned int i = 1; i<=text.getContent().size(); i++) {
for(unsigned int j = 1; j<=pattern.getContent().size(); j++) {
unsigned int value_a;
if(pattern.getContent()[j-1] == text.getContent()[i-1]) {
value_a = table[j-1][i-1];
} else {
value_a = table[j-1][i-1] + 1;
}
unsigned int value_b = UINT_MAX;
if(j < pattern.getContent().size()) {
value_b = table[j][i-1] + 1;
}
value_b = std::min(table[j-1][i] + 1, value_b);
unsigned int value_c = UINT_MAX;
if(j>1 && i>1 && pattern.getContent()[j-2] == text.getContent()[i-1] && pattern.getContent()[j-1] == text.getContent()[i-2]) {
value_c = table[j-2][i-2] + 1;
}
table[j][i] = std::min({value_a, value_b, value_c});
}
}
return table;
}
template <class SymbolType>
ext::set<unsigned int> GeneralizedLevenshteinDynamicProgramming::search(const string::LinearString<SymbolType> & text, const string::LinearString<SymbolType> & pattern, unsigned int errors) {
auto table = GeneralizedLevenshteinDynamicProgramming::compute_table(text, pattern);
ext::set<unsigned int> result;
for(unsigned int i = 0; i<= text.getContent().size(); i++) {
if(table[pattern.getContent().size()][i] <= errors) {
result.insert(i-1);
}
}
return result;
}
} // namespace simulations
} // namespace stringology
#endif /* _GENERALIZED_LEVENSHTEIN_DYNAMIC_PROGRAMMING_H__ */
......@@ -11,12 +11,27 @@ void GeneralizedLevenshteinDynamicProgrammingTest::testTableConstruction() {
auto pattern = string::LinearString<>("adbbca");
 
ext::vector<ext::vector<unsigned int>> expected_result = {
ext::vector<unsigned int>({}),
ext::vector<unsigned int>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
ext::vector<unsigned int>({1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0}),
ext::vector<unsigned int>({2, 1, 0, 1, 2, 2, 1, 1, 1, 1, 0, 1, 2, 2, 1}),
ext::vector<unsigned int>({3, 2, 1, 0, 1, 2, 2, 2, 1, 2, 1, 0, 1, 2, 2}),
ext::vector<unsigned int>({4, 3, 2, 1, 1, 1, 2, 3, 2, 2, 2, 1, 0, 1, 2}),
ext::vector<unsigned int>({5, 4, 3, 2, 1, 1, 2, 3, 3, 3, 3, 2, 1, 0, 1}),
ext::vector<unsigned int>({6, 5, 4, 3, 2, 2, 1, 2, 4, 3, 4, 3, 2, 1, 0}),
};
 
CPPUNIT_ASSERT(expected_result == stringology::simulations::GeneralizedLevenshteinDynamicProgramming::compute_table(text, pattern, 3));
CPPUNIT_ASSERT(expected_result == stringology::simulations::GeneralizedLevenshteinDynamicProgramming::compute_table(text, pattern));
}
 
void GeneralizedLevenshteinDynamicProgrammingTest::testSearch() {
auto text = string::LinearString<>("adbcbaabadbbca");
auto pattern = string::LinearString<>("adbbca");
ext::set<unsigned int> expected_result = {2, 3, 4, 5, 6, 8, 10, 11, 12, 13};
auto result = stringology::simulations::GeneralizedLevenshteinDynamicProgramming::search(text, pattern, 3);
CPPUNIT_ASSERT(expected_result == result);
}
 
void GeneralizedLevenshteinDynamicProgrammingTest::setUp() { }
 
......
......@@ -6,6 +6,7 @@
class GeneralizedLevenshteinDynamicProgrammingTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(GeneralizedLevenshteinDynamicProgrammingTest);
CPPUNIT_TEST(testTableConstruction);
CPPUNIT_TEST(testSearch);
CPPUNIT_TEST_SUITE_END();
 
public:
......@@ -13,5 +14,6 @@ public:
void tearDown ( );
 
void testTableConstruction();
void testSearch();
};
#endif // LEVENSHTEIN_DYNAMIC_PROGRAMMING_TEST_H_
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment