Skip to content
Snippets Groups Projects
Commit d11e9fb5 authored by Tomáš Čapek's avatar Tomáš Čapek Committed by Jan Trávníček
Browse files

Implement algorithm for construction of matching automata using Levenshtein method.

parent f35cb472
No related branches found
No related tags found
No related merge requests found
/*
* HammingMatchingAutomaton.cpp
*
* Created on: 12. 3. 2018
* Author: Tomas Capek
*/
#include "LevenshteinMatchingAutomaton.h"
#include <registration/AlgoRegistration.hpp>
namespace stringology {
namespace matching {
auto LevenshteinMatchingAutomatonLinearString = registration::AbstractRegister <LevenshteinMatchingAutomaton, automaton::EpsilonNFA < DefaultSymbolType, unsigned, unsigned>, const string::LinearString < > &, unsigned > ( LevenshteinMatchingAutomaton::construct );
} /* namespace matching */
} /* namespace stringology */
/*
* HammingMatchingAutomaton.h
*
* Created on: 19. 3. 2018
* Author: Tomas Capek
*/
#ifndef _EXACT_MATCHING_AUTOMATON_H__
#define _EXACT_MATCHING_AUTOMATON_H__
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/simplify/UnreachableStatesRemover.h>
#include <string/LinearString.h>
namespace stringology {
namespace matching {
class LevenshteinMatchingAutomaton {
public:
/**
* Creates Levenshtein matching automata.
*
* @return automata for aproximate string matching using Hamming algorithm
*/
template < class SymbolType >
static automaton::EpsilonNFA < SymbolType, unsigned, unsigned > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors);
};
template < class SymbolType >
automaton::EpsilonNFA < SymbolType, unsigned, unsigned > LevenshteinMatchingAutomaton::construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors) {
automaton::EpsilonNFA < SymbolType, unsigned, unsigned > res( 0 );
res.setInputAlphabet(pattern.getAlphabet());
// add k+1 paralel automatas (sfoeco type = exact matching) (where k is allowed_errors)
unsigned current_state = 0;
for (unsigned i = 0; i <= allowed_errors; i++) {
if ( current_state > 0 ) {
++current_state;
res.addState(current_state);
}
for(const SymbolType& symbol : pattern.getAlphabet()) {
res.addTransition( current_state, symbol, current_state);
}
for(const SymbolType& symbol : pattern.getContent()) {
++current_state;
res.addState( current_state );
res.addTransition( current_state - 1, symbol, current_state );
}
res.addFinalState( current_state );
}
for (unsigned int i = 0; i < pattern.getContent().size(); i++) {
for (unsigned int j = 0; j < allowed_errors; j++) {
unsigned int states_count = pattern.getContent().size() + 1;
unsigned int from_state = j*states_count + i;
unsigned int to_state = (j+1)*states_count + i + 1;
for ( const SymbolType& symbol : pattern.getAlphabet()) {
if (symbol != pattern.getContent()[i]) {
// add diagonal transition on mistake
res.addTransition(from_state, symbol, to_state);
if ( i > j && i - 1 < pattern.getContent().size() ) {
// condition limits following to upper triangle && non-final states
// add vertical transition representing insertion
res.addTransition(from_state, symbol, (j + 1) * states_count + i);
}
}
}
// add epsilon transition representing deletion
res.addTransition(from_state, to_state);
}
}
// remove all inaccessible states from starting state
return automaton::simplify::UnreachableStatesRemover::remove(res);
}
} /* namespace matching */
} /* namespace stringology */
#endif /* _HAMMING_MATCHING_AUTOMATON_H__ */
#include <stringology/matching/LevenshteinMatchingAutomaton.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <string/LinearString.h>
#include "LevenshteinMatchingAutomatonTest.h"
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( LevenshteinMatchingAutomatonTest, "stringology" );
CPPUNIT_TEST_SUITE_REGISTRATION ( LevenshteinMatchingAutomatonTest );
void LevenshteinMatchingAutomatonTest::testSimpleConstruction() {
ext::set<char> alphabet{'a', 'b', 'c', 'd'};
string::LinearString <char> input_string(alphabet, ext::vector<char>{'a', 'b', 'c'});
auto resulting_automata = stringology::matching::LevenshteinMatchingAutomaton::construct(input_string, 2);
automaton::EpsilonNFA < char, unsigned, unsigned > res(0);
res.setInputAlphabet(ext::set<char>{'a', 'b', 'c', 'd'});
res.setStates(ext::set<unsigned> {0, 1, 2, 3, 5, 6, 7, 10, 11});
res.setFinalStates(ext::set<unsigned> {3, 7, 11});
res.addTransition(0, 'a', 1); // paralel exact matching automatas
res.addTransition(1, 'b', 2);
res.addTransition(5, 'b', 6);
res.addTransition(2, 'c', 3);
res.addTransition(6, 'c', 7);
res.addTransition(10, 'c', 11);
res.addTransition(0, 'a', 0); // initial state's loops
res.addTransition(0, 'b', 0);
res.addTransition(0, 'c', 0);
res.addTransition(0, 'd', 0);
res.addTransition(0, 'b', 5); // first mistake
res.addTransition(0, 'c', 5);
res.addTransition(0, 'd', 5);
res.addTransition(0, 5); // deletion
res.addTransition(1, 'a', 5); // insertion
res.addTransition(1, 'c', 5);
res.addTransition(1, 'd', 5);
res.addTransition(1, 'a', 6);
res.addTransition(1, 'c', 6);
res.addTransition(1, 'd', 6);
res.addTransition(1, 6); // deletion
res.addTransition(2, 'a', 6); // insertion
res.addTransition(2, 'b', 6);
res.addTransition(2, 'd', 6);
res.addTransition(6, 'a', 10); // insertion
res.addTransition(6, 'b', 10);
res.addTransition(6, 'd', 10);
res.addTransition(5, 'a', 10);
res.addTransition(5, 'c', 10);
res.addTransition(5, 'd', 10);
res.addTransition(5, 10); // deletion
res.addTransition(2, 'a', 7);
res.addTransition(2, 'b', 7);
res.addTransition(2, 'd', 7);
res.addTransition(2, 7); // deletion
res.addTransition(6, 'a', 11);
res.addTransition(6, 'b', 11);
res.addTransition(6, 'd', 11);
res.addTransition(6, 11);
CPPUNIT_ASSERT(resulting_automata == res);
}
void LevenshteinMatchingAutomatonTest::setUp() { }
void LevenshteinMatchingAutomatonTest::tearDown() { }
#ifndef HAMMING_MATCHING_AUTOMATA_TEST_H_
#define HAMMING_MATCHING_AUTOMATA_TEST_H_
#include <cppunit/extensions/HelperMacros.h>
class LevenshteinMatchingAutomatonTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(LevenshteinMatchingAutomatonTest);
CPPUNIT_TEST(testSimpleConstruction);
CPPUNIT_TEST_SUITE_END();
public:
void setUp ( );
void tearDown ( );
void testSimpleConstruction();
};
#endif //HAMMING_MATCHING_AUTOMATA_TEST_H_
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment