Skip to content
Snippets Groups Projects
Commit 6d88134c authored by Tomas Capek's avatar Tomas Capek Committed by Jan Trávníček
Browse files

Implement GeneralizedLevenshteinSequenceMatchingAutomaton.

parent 15c8e069
No related branches found
No related tags found
No related merge requests found
/*
* LevenshteinSequenceMatchingAutomaton.cpp
*
* Created on: 29. 3. 2018
* Author: Tomas Capek
*/
#include "GeneralizedLevenshteinSequenceMatchingAutomaton.h"
#include <registration/AlgoRegistration.hpp>
namespace stringology {
namespace matching {
auto GeneralizedLevenshteinSequenceMatchingAutomatonLinearString = registration::AbstractRegister <GeneralizedLevenshteinSequenceMatchingAutomaton, automaton::EpsilonNFA < DefaultSymbolType, void, ext::pair<unsigned int, unsigned int> >, const string::LinearString < > &, unsigned > ( GeneralizedLevenshteinSequenceMatchingAutomaton::construct );
} /* namespace matching */
} /* namespace stringology */
/*
* GeneralizedLevenshteinSequenceMatchingAutomaton.h
*
* Created on: 29. 3. 2018
* Author: Tomas Capek
*/
#ifndef _GENERALIZED_LEVENSHTEIN_SEQUENCE_MATCHING_AUTOMATON_H__
#define _GENERALIZED_LEVENSHTEIN_SEQUENCE_MATCHING_AUTOMATON_H__
#include <automaton/FSM/EpsilonNFA.h>
#include <stringology/matching/GeneralizedLevenshteinMatchingAutomaton.h>
#include <string/LinearString.h>
namespace stringology {
namespace matching {
class GeneralizedLevenshteinSequenceMatchingAutomaton {
public:
/**
* Creates Generalized Levenshtein matching automata for sequence matching.
*
* @return automata for aproximate sequence matching using Levenshtein method.
*/
template < class SymbolType >
static automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors);
};
template < class SymbolType >
automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > GeneralizedLevenshteinSequenceMatchingAutomaton::construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors) {
auto result = stringology::matching::GeneralizedLevenshteinMatchingAutomaton::construct(pattern, allowed_errors);
for (unsigned int j = 0; j<allowed_errors + 1; j++) {
for (unsigned int i = j; i<pattern.getContent().size(); i++) {
auto current_state = ext::make_pair(i, j);
for (const SymbolType & symbol : pattern.getAlphabet()) {
if (symbol != pattern.getContent()[i]) {
result.addTransition(current_state, symbol, current_state);
}
}
}
}
for (unsigned int j = 0; j<allowed_errors; j++) {
for (unsigned int i = j; i<pattern.getContent().size(); i++) {
if (i+1 < pattern.getContent().size()) {
auto transpose_state = ext::make_pair(pattern.getContent().size()+1+i, j);
for (const SymbolType & symbol : pattern.getAlphabet()) {
if (symbol != pattern.getContent()[i]) {
result.addTransition(transpose_state, symbol, transpose_state);
}
}
}
}
}
return result;
}
} /* namespace matching */
} /* namespace stringology */
#endif /* _GENERALIZED_LEVENSHTEIN_SEQUENCE_MATCHING_AUTOMATON_H__ */
#include <stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomaton.h>
#include <automaton/FSM/NFA.h>
#include <string/LinearString.h>
#include "GeneralizedLevenshteinSequenceMatchingAutomatonTest.h"
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( GeneralizedLevenshteinSequenceMatchingAutomatonTest, "stringology" );
CPPUNIT_TEST_SUITE_REGISTRATION ( GeneralizedLevenshteinSequenceMatchingAutomatonTest );
void GeneralizedLevenshteinSequenceMatchingAutomatonTest::testSimpleConstruction() {
ext::set<char> alphabet{'a', 'b', 'c', 'd'};
string::LinearString <char> input_string(alphabet, ext::vector<char>{'a', 'b', 'c'});
auto resulting_automata = stringology::matching::GeneralizedLevenshteinSequenceMatchingAutomaton::construct(input_string, 2);
typedef ext::pair<unsigned int, unsigned int> State;
automaton::EpsilonNFA < char, void, State > test(ext::make_pair(0,0));
test.setInputAlphabet(ext::set<char>{'a', 'b', 'c', 'd'});
State q0 = ext::make_pair(0,0);
State q1 = ext::make_pair(1,0);
State q2 = ext::make_pair(2,0);
State q3 = ext::make_pair(3,0);
State q4 = ext::make_pair(1,1);
State q5 = ext::make_pair(2,1);
State q6 = ext::make_pair(3,1);
State q7 = ext::make_pair(2,2);
State q8 = ext::make_pair(3,2);
State r3 = ext::make_pair(5,1);
State r2 = ext::make_pair(5,0);
State r1 = ext::make_pair(4,0);
test.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7, q8, r1, r2, r3});
test.setFinalStates(ext::set<State> {q3, q6, q8});
test.addTransition(q0, 'a', q1); // vertical transitions (exact matching automata)
test.addTransition(q1, 'b', q2);
test.addTransition(q4, 'b', q5);
test.addTransition(q2, 'c', q3);
test.addTransition(q5, 'c', q6);
test.addTransition(q7, 'c', q8);
test.addTransition(q0, 'a', q0); // loops in initial state
test.addTransition(q0, 'b', q0);
test.addTransition(q0, 'c', q0);
test.addTransition(q0, 'd', q0);
test.addTransition(q0, 'b', q4); // diagonal transitions reptestenting replace
test.addTransition(q0, 'c', q4);
test.addTransition(q0, 'd', q4);
test.addTransition(q0, q4); // deletion
test.addTransition(q1, 'a', q5);
test.addTransition(q1, 'c', q5);
test.addTransition(q1, 'd', q5);
test.addTransition(q4, 'a', q7);
test.addTransition(q4, 'c', q7);
test.addTransition(q4, 'd', q7);
test.addTransition(q1, q5); // deletion
test.addTransition(q4, q7);
test.addTransition(q2, 'a', q6);
test.addTransition(q2, 'b', q6);
test.addTransition(q2, 'd', q6);
test.addTransition(q5, 'a', q8);
test.addTransition(q5, 'b', q8);
test.addTransition(q5, 'd', q8);
test.addTransition(q2, q6); // deletion
test.addTransition(q5, q8);
test.addTransition(q1, 'a', q4); // insertions
test.addTransition(q1, 'c', q4);
test.addTransition(q1, 'd', q4);
test.addTransition(q2, 'a', q5);
test.addTransition(q2, 'b', q5);
test.addTransition(q2, 'd', q5);
test.addTransition(q5, 'a', q7);
test.addTransition(q5, 'b', q7);
test.addTransition(q5, 'd', q7);
test.addTransition(q1, 'a', q1); // loops for sequence matching
test.addTransition(q1, 'c', q1);
test.addTransition(q1, 'd', q1);
test.addTransition(q2, 'a', q2);
test.addTransition(q2, 'b', q2);
test.addTransition(q2, 'd', q2);
test.addTransition(q4, 'a', q4);
test.addTransition(q4, 'c', q4);
test.addTransition(q4, 'd', q4);
test.addTransition(q5, 'a', q5);
test.addTransition(q5, 'b', q5);
test.addTransition(q5, 'd', q5);
test.addTransition(q7, 'a', q7);
test.addTransition(q7, 'b', q7);
test.addTransition(q7, 'd', q7);
test.addTransition(q0, 'b', r1); // transposition
test.addTransition(r1, 'a', q5);
test.addTransition(q1, 'c', r2);
test.addTransition(r2, 'b', q6);
test.addTransition(q4, 'c', r3);
test.addTransition(r3, 'b', q8);
test.addTransition(r1, 'b', r1); // loops in tranposition states
test.addTransition(r1, 'c', r1);
test.addTransition(r1, 'd', r1);
test.addTransition(r2, 'a', r2);
test.addTransition(r2, 'c', r2);
test.addTransition(r2, 'd', r2);
test.addTransition(r3, 'a', r3);
test.addTransition(r3, 'c', r3);
test.addTransition(r3, 'd', r3);
CPPUNIT_ASSERT(resulting_automata == test);
}
void GeneralizedLevenshteinSequenceMatchingAutomatonTest::setUp() { }
void GeneralizedLevenshteinSequenceMatchingAutomatonTest::tearDown() { }
#ifndef GENERALIZED_LEVENSHTEIN_SEQUENCE_MATCHING_AUTOMATA_TEST_H_
#define GENERALIZED_LEVENSHTEIN_SEQUENCE_MATCHING_AUTOMATA_TEST_H_
#include <cppunit/extensions/HelperMacros.h>
class GeneralizedLevenshteinSequenceMatchingAutomatonTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(GeneralizedLevenshteinSequenceMatchingAutomatonTest);
CPPUNIT_TEST(testSimpleConstruction);
CPPUNIT_TEST_SUITE_END();
public:
void setUp ( );
void tearDown ( );
void testSimpleConstruction();
};
#endif //GENERALIZED_LEVENSHTEIN_SEQUENCE_MATCHING_AUTOMATA_TEST_H_
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment