Skip to content
Snippets Groups Projects
Commit 03443ad1 authored by Tomáš Čapek's avatar Tomáš Čapek Committed by Jan Trávníček
Browse files

Implement WildcardLinearString into LevenshteinSequenceMatchingAutomaton

parent 50534f65
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,7 @@
#include <automaton/FSM/EpsilonNFA.h>
#include <stringology/matching/LevenshteinMatchingAutomaton.h>
#include <string/LinearString.h>
#include <string/WildcardLinearString.h>
 
 
namespace stringology {
......@@ -20,12 +21,21 @@ namespace matching {
class LevenshteinSequenceMatchingAutomaton {
public:
/**
* Creates Levenshtein matching automata for sequence matching.
* Creates Levenshtein matching automata for sequence matching from LinearString.
*
* @return automata for aproximate sequence matching using Levenshtein method.
*/
template < class SymbolType >
static automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors);
/**
* Creates Levenshtein matching automata for sequence matching from WildcardLinearString.
*
* @return automata for aproximate sequence matching using Levenshtein method.
*/
template < class SymbolType >
static automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors);
};
 
template < class SymbolType >
......@@ -47,6 +57,31 @@ automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int>
return result;
}
 
template < class SymbolType >
automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > LevenshteinSequenceMatchingAutomaton::construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors) {
auto result = stringology::matching::LevenshteinMatchingAutomaton::construct(pattern, allowed_errors);
SymbolType wildcard = pattern.getWildcardSymbol();
ext::set<SymbolType> alphabet_without_wildcard = pattern.getAlphabet();
alphabet_without_wildcard.erase(wildcard);
for (unsigned int j = 0; j<allowed_errors + 1; j++) {
for (unsigned int i = j; i<pattern.getContent().size(); i++) {
auto current_state = ext::make_pair(i, j);
if (pattern.getContent()[i] != wildcard) {
for (const SymbolType & symbol : alphabet_without_wildcard) {
if (symbol != pattern.getContent()[i]) {
result.addTransition(current_state, symbol, current_state);
}
}
}
}
}
return result;
}
 
} /* namespace matching */
 
......
......@@ -106,6 +106,72 @@ void LevenshteinSequenceMatchingAutomatonTest::testSimpleConstruction() {
CPPUNIT_ASSERT(resulting_automata == test);
}
 
void LevenshteinSequenceMatchingAutomatonTest::testSimpleWildcardConstruction() {
ext::set<char> alphabet{'a', 'b', '@'};
string::WildcardLinearString <char> input_string(alphabet, ext::vector<char>{'a', '@', 'b'}, '@');
auto resulting_automata = stringology::matching::LevenshteinSequenceMatchingAutomaton::construct(input_string, 2);
typedef ext::pair<unsigned int, unsigned int> State;
automaton::EpsilonNFA < char, void, State > res(ext::make_pair(0,0));
res.setInputAlphabet(alphabet);
State q0 = ext::make_pair(0,0);
State q1 = ext::make_pair(1,0);
State q2 = ext::make_pair(2,0);
State q3 = ext::make_pair(3,0);
State q4 = ext::make_pair(1,1);
State q5 = ext::make_pair(2,1);
State q6 = ext::make_pair(3,1);
State q7 = ext::make_pair(2,2);
State q8 = ext::make_pair(3,2);
res.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7, q8});
res.setFinalStates(ext::set<State> {q3, q6, q8});
res.addTransition(q0, 'a', q0); // initial loops
res.addTransition(q0, 'b', q0);
res.addTransition(q0, 'a', q1); // 3 simple matching automatas
res.addTransition(q1, 'a', q2);
res.addTransition(q1, 'b', q2);
res.addTransition(q4, 'a', q5);
res.addTransition(q4, 'b', q5);
res.addTransition(q2, 'b', q3);
res.addTransition(q5, 'b', q6);
res.addTransition(q7, 'b', q8);
res.addTransition(q0, 'b', q4); // error transitions for replace
res.addTransition(q2, 'a', q6);
res.addTransition(q5, 'a', q8);
res.addTransition(q0, q4); // delete transition
res.addTransition(q1, q5);
res.addTransition(q2, q6);
res.addTransition(q4, q7);
res.addTransition(q5, q8);
res.addTransition(q1, 'a', q4);
res.addTransition(q1, 'b', q4);
res.addTransition(q2, 'a', q5);
res.addTransition(q2, 'b', q5);
res.addTransition(q5, 'a', q7);
res.addTransition(q5, 'b', q7);
res.addTransition(q2, 'a', q2); // sequence matching loops
res.addTransition(q5, 'a', q5);
res.addTransition(q7, 'a', q7);
CPPUNIT_ASSERT(resulting_automata == res);
}
void LevenshteinSequenceMatchingAutomatonTest::setUp() { }
 
void LevenshteinSequenceMatchingAutomatonTest::tearDown() { }
......@@ -5,13 +5,15 @@
 
class LevenshteinSequenceMatchingAutomatonTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(LevenshteinSequenceMatchingAutomatonTest);
CPPUNIT_TEST(testSimpleConstruction);
CPPUNIT_TEST_SUITE_END();
CPPUNIT_TEST(testSimpleConstruction);
CPPUNIT_TEST(testSimpleWildcardConstruction);
CPPUNIT_TEST_SUITE_END();
 
public:
void setUp ( );
void tearDown ( );
 
void testSimpleConstruction();
void testSimpleWildcardConstruction();
};
#endif //LEVENSHTEIN_SEQUENCE_MATCHING_AUTOMATA_TEST_H_
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment