diff --git a/alib2algo/src/stringology/matching/HammingMatchingAutomaton.cpp b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.cpp index 2e712d87d7328bf0b87a0c72ab1ce52cfe785403..758aaed89a7c5abcbcfe4ce28d552b846b0fae80 100644 --- a/alib2algo/src/stringology/matching/HammingMatchingAutomaton.cpp +++ b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.cpp @@ -12,7 +12,7 @@ namespace stringology { namespace matching { -auto HammingMatchingAutomatonLinearString = registration::AbstractRegister <HammingMatchingAutomaton, automaton::NFA < DefaultSymbolType, unsigned >, const string::LinearString < > &, unsigned > ( HammingMatchingAutomaton::construct ); +auto HammingMatchingAutomatonLinearString = registration::AbstractRegister <HammingMatchingAutomaton, automaton::NFA < DefaultSymbolType, ext::pair<unsigned int, unsigned int> >, const string::LinearString < > &, unsigned > ( HammingMatchingAutomaton::construct ); } /* namespace matching */ diff --git a/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h index 1e7fbab3ab1ef812c135290e719975e4938ee27c..495856e8e16ec7675169fb6dd68da1fd97ec063e 100644 --- a/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h +++ b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h @@ -5,8 +5,8 @@ * Author: Tomas Capek */ -#ifndef _EXACT_MATCHING_AUTOMATON_H__ -#define _EXACT_MATCHING_AUTOMATON_H__ +#ifndef _HAMMING_MATCHING_AUTOMATON_H__ +#define _HAMMING_MATCHING_AUTOMATON_H__ #include <automaton/FSM/NFA.h> #include <automaton/simplify/UnreachableStatesRemover.h> @@ -24,53 +24,55 @@ public: * @return automata for aproximate string matching using Hamming algorithm */ template < class SymbolType > - static automaton::NFA < SymbolType, unsigned > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors); + static automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors); }; - template < class SymbolType > -automaton::NFA < SymbolType, unsigned > HammingMatchingAutomaton::construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors) { - automaton::NFA < SymbolType, unsigned > res( 0 ); - res.setInputAlphabet(pattern.getAlphabet()); - +automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > HammingMatchingAutomaton::construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors) { + automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int > > result( ext::make_pair(0, 0) ); + result.setInputAlphabet(pattern.getAlphabet()); + // add k+1 paralel automatas (sfoeco type = exact matching) (where k is allowed_errors) - unsigned current_state = 0; - for (unsigned i = 0; i <= allowed_errors; i++) { - if ( current_state > 0 ) { - ++current_state; - res.addState(current_state); + for (unsigned int i = 0; i<pattern.getContent().size() + 1; i++) { + for (unsigned int j = 0; j<allowed_errors + 1; j++) { + result.addState(ext::make_pair(i, j)); + if (i == pattern.getContent().size()) { + result.addFinalState(ext::make_pair(i, j)); + } } + } - for(const SymbolType& symbol : pattern.getAlphabet()) { - res.addTransition( current_state, symbol, current_state); + for (unsigned int i = 0; i<allowed_errors + 1; i++) { + for (const SymbolType& symbol : pattern.getAlphabet()) { + auto initial_state = ext::make_pair(0, i); + result.addTransition(initial_state, symbol, initial_state); } + } - for(const SymbolType& symbol : pattern.getContent()) { - ++current_state; - res.addState( current_state ); - res.addTransition( current_state - 1, symbol, current_state ); + for (unsigned int i = 0; i<pattern.getContent().size(); i++) { + for (unsigned int j = 0; j < allowed_errors + 1; j++) { + auto from = ext::make_pair(i, j); + auto to = ext::make_pair(i+1, j); + result.addTransition(from, pattern.getContent()[i], to); } - res.addFinalState( current_state ); } - for (unsigned int i = 0; i < pattern.getContent().size(); i++) { - for (unsigned int j = 0; j < allowed_errors; j++) { - unsigned int states_count = pattern.getContent().size() + 1; - - unsigned int from_state = j*states_count + i; - unsigned int to_state = (j+1)*states_count + i + 1; + // add diagonal addTransition + for (unsigned int i = 0; i<pattern.getContent().size(); i++) { + for (unsigned int j = 0; j<allowed_errors; j++) { + auto from = ext::make_pair(i, j); + auto to = ext::make_pair(i + 1, j + 1); - for ( const SymbolType& symbol : pattern.getAlphabet()) { + for ( const SymbolType & symbol : pattern.getAlphabet()) { if (symbol != pattern.getContent()[i]) { - // add diagonal addTransition - res.addTransition(from_state, symbol, to_state); + result.addTransition(from, symbol, to); } } } } - + // remove all inaccessible states from state - return automaton::simplify::UnreachableStatesRemover::remove(res); + return automaton::simplify::UnreachableStatesRemover::remove(result); } diff --git a/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp index 0a0059cc7813f3828ba2f41b8374873a59894b39..fa005f18463caf674be65bf6be10d7ce91b260f9 100644 --- a/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp +++ b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp @@ -1,5 +1,3 @@ -#include <iostream> - #include <stringology/matching/HammingMatchingAutomaton.h> #include <automaton/FSM/NFA.h> #include <string/LinearString.h> @@ -7,57 +5,68 @@ #include "HammingMatchingAutomatonTest.h" + CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( HammingMatchingAutomatonTest, "stringology" ); CPPUNIT_TEST_SUITE_REGISTRATION ( HammingMatchingAutomatonTest ); - void HammingMatchingAutomatonTest::testSimpleConstruction() { - automaton::NFA < char, unsigned > test(0); - ext::set<char> alphabet{'a', 'b', 'c', 'd'}; string::LinearString <char> input_string(alphabet, ext::vector<char>{'a', 'b', 'c'}); auto resulting_automata = stringology::matching::HammingMatchingAutomaton::construct(input_string, 2); - automaton::NFA < char, unsigned > res(0); + typedef ext::pair<unsigned int, unsigned int> State; + + automaton::NFA < char, State > res(ext::make_pair(0,0)); res.setInputAlphabet(ext::set<char>{'a', 'b', 'c', 'd'}); - res.setStates(ext::set<unsigned> {0, 1, 2, 3, 5, 6, 7, 10, 11}); - res.setFinalStates(ext::set<unsigned> {3, 7, 11}); - - res.addTransition(0, 'a', 1); - - res.addTransition(1, 'b', 2); - res.addTransition(5, 'b', 6); - - res.addTransition(2, 'c', 3); - res.addTransition(6, 'c', 7); - res.addTransition(10, 'c', 11); - - res.addTransition(0, 'a', 0); - res.addTransition(0, 'b', 0); - res.addTransition(0, 'c', 0); - res.addTransition(0, 'd', 0); - - res.addTransition(0, 'b', 5); - res.addTransition(0, 'c', 5); - res.addTransition(0, 'd', 5); - - res.addTransition(1, 'a', 6); - res.addTransition(1, 'c', 6); - res.addTransition(1, 'd', 6); - res.addTransition(5, 'a', 10); - res.addTransition(5, 'c', 10); - res.addTransition(5, 'd', 10); - - res.addTransition(2, 'a', 7); - res.addTransition(2, 'b', 7); - res.addTransition(2, 'd', 7); - res.addTransition(6, 'a', 11); - res.addTransition(6, 'b', 11); - res.addTransition(6, 'd', 11); - + + State q0 = ext::make_pair(0,0); + State q1 = ext::make_pair(1,0); + State q2 = ext::make_pair(2,0); + State q3 = ext::make_pair(3,0); + State q4 = ext::make_pair(1,1); + State q5 = ext::make_pair(2,1); + State q6 = ext::make_pair(3,1); + State q7 = ext::make_pair(2,2); + State q8 = ext::make_pair(3,2); + + res.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7, q8}); + res.setFinalStates(ext::set<State> {q3, q6, q8}); + + res.addTransition(q0, 'a', q1); // vertical transitions (exact matching automata) + + res.addTransition(q1, 'b', q2); + res.addTransition(q4, 'b', q5); + + res.addTransition(q2, 'c', q3); + res.addTransition(q5, 'c', q6); + res.addTransition(q7, 'c', q8); + + res.addTransition(q0, 'a', q0); // loops in initial state + res.addTransition(q0, 'b', q0); + res.addTransition(q0, 'c', q0); + res.addTransition(q0, 'd', q0); + + res.addTransition(q0, 'b', q4); // diagonal transitions representing replace + res.addTransition(q0, 'c', q4); + res.addTransition(q0, 'd', q4); + + res.addTransition(q1, 'a', q5); + res.addTransition(q1, 'c', q5); + res.addTransition(q1, 'd', q5); + res.addTransition(q4, 'a', q7); + res.addTransition(q4, 'c', q7); + res.addTransition(q4, 'd', q7); + + res.addTransition(q2, 'a', q6); + res.addTransition(q2, 'b', q6); + res.addTransition(q2, 'd', q6); + res.addTransition(q5, 'a', q8); + res.addTransition(q5, 'b', q8); + res.addTransition(q5, 'd', q8); + CPPUNIT_ASSERT(resulting_automata == res); } void HammingMatchingAutomatonTest::setUp() { } -void HammingMatchingAutomatonTest::tearDown() { } \ No newline at end of file +void HammingMatchingAutomatonTest::tearDown() { }