Skip to content
Snippets Groups Projects
Commit 700f8416 authored by Tomáš Čapek's avatar Tomáš Čapek Committed by Jan Trávníček
Browse files

Implement WildcardLinearString into LevenshteinMatchingAutomaton

parent a3972834
No related branches found
No related tags found
No related merge requests found
......@@ -9,7 +9,9 @@
#define _LEVENSHTEIN_MATCHING_AUTOMATON_H__
 
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/simplify/UnreachableStatesRemover.h>
#include <string/LinearString.h>
#include <string/WildcardLinearString.h>
#include <stringology/matching/HammingMatchingAutomaton.h>
 
 
......@@ -20,12 +22,20 @@ namespace matching {
class LevenshteinMatchingAutomaton {
public:
/**
* Creates Levenshtein matching automata.
* Creates Levenshtein matching automata form LinearString.
*
* @return automata for aproximate string matching using Hamming algorithm
*/
template < class SymbolType >
static automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors);
/**
* Creates Levenshtein matching automata from WildcardLinearString
*
* @return automata for aproximate string matching using Hamming algorithm
*/
template < class SymbolType >
static automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors);
};
 
 
......@@ -62,6 +72,41 @@ automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int>
}
 
 
template < class SymbolType >
automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > LevenshteinMatchingAutomaton::construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors) {
auto hamming_matching_automaton = stringology::matching::HammingMatchingAutomaton::construct_unclean(pattern, allowed_errors);
automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > result (hamming_matching_automaton);
ext::set<SymbolType> alphabet_without_wildcard = pattern.getAlphabet();
alphabet_without_wildcard.erase(pattern.getWildcardSymbol());
for (unsigned int j = 0; j<allowed_errors; j++) {
for (unsigned int i = j; i<pattern.getContent().size(); i++) {
auto from = ext::make_pair(i, j);
auto to = ext::make_pair(i + 1, j + 1);
// add diagonal transition representing deletion
result.addTransition(from, to);
if (i == j) {
continue;
}
to = ext::make_pair(i, j + 1);
for (const SymbolType& symbol : alphabet_without_wildcard) {
// add horizontal transition representing insertion
result.addTransition(from, symbol, to);
}
}
}
return automaton::simplify::UnreachableStatesRemover::remove(result);
}
} /* namespace matching */
 
} /* namespace stringology */
......
......@@ -87,6 +87,67 @@ void LevenshteinMatchingAutomatonTest::testSimpleConstruction() {
CPPUNIT_ASSERT(resulting_automata == res);
}
 
void LevenshteinMatchingAutomatonTest::testSimpleWildcardConstruction() {
ext::set<char> alphabet{'a', 'b', '@'};
string::WildcardLinearString <char> input_string(alphabet, ext::vector<char>{'a', '@', 'b'}, '@');
auto resulting_automata = stringology::matching::LevenshteinMatchingAutomaton::construct(input_string, 2);
typedef ext::pair<unsigned int, unsigned int> State;
automaton::EpsilonNFA < char, void, State > res(ext::make_pair(0,0));
res.setInputAlphabet(alphabet);
State q0 = ext::make_pair(0,0);
State q1 = ext::make_pair(1,0);
State q2 = ext::make_pair(2,0);
State q3 = ext::make_pair(3,0);
State q4 = ext::make_pair(1,1);
State q5 = ext::make_pair(2,1);
State q6 = ext::make_pair(3,1);
State q7 = ext::make_pair(2,2);
State q8 = ext::make_pair(3,2);
res.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7, q8});
res.setFinalStates(ext::set<State> {q3, q6, q8});
res.addTransition(q0, 'a', q0); // initial loops
res.addTransition(q0, 'b', q0);
res.addTransition(q0, 'a', q1); // 3 simple matching automatas
res.addTransition(q1, 'a', q2);
res.addTransition(q1, 'b', q2);
res.addTransition(q4, 'a', q5);
res.addTransition(q4, 'b', q5);
res.addTransition(q2, 'b', q3);
res.addTransition(q5, 'b', q6);
res.addTransition(q7, 'b', q8);
res.addTransition(q0, 'b', q4); // error transitions for replace
res.addTransition(q2, 'a', q6);
res.addTransition(q5, 'a', q8);
res.addTransition(q0, q4); // delete transition
res.addTransition(q1, q5);
res.addTransition(q2, q6);
res.addTransition(q4, q7);
res.addTransition(q5, q8);
res.addTransition(q1, 'a', q4);
res.addTransition(q1, 'b', q4);
res.addTransition(q2, 'a', q5);
res.addTransition(q2, 'b', q5);
res.addTransition(q5, 'a', q7);
res.addTransition(q5, 'b', q7);
CPPUNIT_ASSERT(resulting_automata == res);
}
void LevenshteinMatchingAutomatonTest::setUp() { }
 
void LevenshteinMatchingAutomatonTest::tearDown() { }
......@@ -5,13 +5,15 @@
 
class LevenshteinMatchingAutomatonTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(LevenshteinMatchingAutomatonTest);
CPPUNIT_TEST(testSimpleConstruction);
CPPUNIT_TEST_SUITE_END();
CPPUNIT_TEST(testSimpleConstruction);
CPPUNIT_TEST(testSimpleWildcardConstruction);
CPPUNIT_TEST_SUITE_END();
 
public:
void setUp ( );
void tearDown ( );
 
void testSimpleConstruction();
void testSimpleWildcardConstruction();
};
#endif //HAMMING_MATCHING_AUTOMATA_TEST_H_
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment