diff --git a/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.cpp b/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.cpp new file mode 100644 index 0000000000000000000000000000000000000000..90eaf0a6cfb5cc7e9b0b92847884ea992f98b090 --- /dev/null +++ b/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.cpp @@ -0,0 +1,19 @@ +/* + * HammingSequenceMatchingAutomaton.cpp + * + * Created on: 29. 3. 2018 + * Author: Tomas Capek + */ + +#include "HammingSequenceMatchingAutomaton.h" +#include <registration/AlgoRegistration.hpp> + +namespace stringology { + +namespace matching { + +auto HammingSequenceMatchingAutomatonLinearString = registration::AbstractRegister <HammingSequenceMatchingAutomaton, automaton::NFA < DefaultSymbolType, ext::pair<unsigned int, unsigned int> >, const string::LinearString < > &, unsigned > ( HammingSequenceMatchingAutomaton::construct ); + +} /* namespace matching */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.h b/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.h new file mode 100644 index 0000000000000000000000000000000000000000..70a9ae2126ea28071938708e562f4b66161b087a --- /dev/null +++ b/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.h @@ -0,0 +1,55 @@ +/* + * HammingSequenceMatchingAutomaton.h + * + * Created on: 29. 3. 2018 + * Author: Tomas Capek + */ + +#ifndef _HAMMING_SEQUENCE_MATCHING_AUTOMATON_H__ +#define _HAMMING_SEQUENCE_MATCHING_AUTOMATON_H__ + +#include <automaton/FSM/NFA.h> +#include <stringology/matching/HammingMatchingAutomaton.h> +#include <string/LinearString.h> + + +namespace stringology { + +namespace matching { + +class HammingSequenceMatchingAutomaton { +public: + /** + * Creates Hamming matching automata for sequence matching. + * + * @return automata for aproximate sequence matching using Hamming method. + */ + template < class SymbolType > + static automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors); +}; + +template < class SymbolType > +automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > HammingSequenceMatchingAutomaton::construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors) { + automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int > > result = stringology::matching::HammingMatchingAutomaton::construct(pattern, allowed_errors); + + for (unsigned int j = 0; j<allowed_errors + 1; j++) { + for (unsigned int i = j; i<pattern.getContent().size(); i++) { + auto current_state = ext::make_pair(i, j); + + for (const SymbolType & symbol : pattern.getAlphabet()) { + if (symbol != pattern.getContent()[i]) { + result.addTransition(current_state, symbol, current_state); + } + } + } + } + + return result; +} + + +} /* namespace matching */ + +} /* namespace stringology */ + +#endif /* _HAMMING_SEQUENCE_MATCHING_AUTOMATON_H__ */ diff --git a/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.cpp b/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..880ffc46a6faaedf95f98aadfefe1f6662289d22 --- /dev/null +++ b/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.cpp @@ -0,0 +1,91 @@ +#include <stringology/matching/HammingSequenceMatchingAutomaton.h> +#include <automaton/FSM/NFA.h> +#include <string/LinearString.h> + +#include "HammingSequenceMatchingAutomatonTest.h" + + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( HammingSequenceMatchingAutomatonTest, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( HammingSequenceMatchingAutomatonTest ); + +void HammingSequenceMatchingAutomatonTest::testSimpleConstruction() { + ext::set<char> alphabet{'a', 'b', 'c', 'd'}; + string::LinearString <char> input_string(alphabet, ext::vector<char>{'a', 'b', 'c'}); + auto resulting_automata = stringology::matching::HammingSequenceMatchingAutomaton::construct(input_string, 2); + + typedef ext::pair<unsigned int, unsigned int> State; + + automaton::NFA < char, State > test(ext::make_pair(0,0)); + test.setInputAlphabet(ext::set<char>{'a', 'b', 'c', 'd'}); + + State q0 = ext::make_pair(0,0); + State q1 = ext::make_pair(1,0); + State q2 = ext::make_pair(2,0); + State q3 = ext::make_pair(3,0); + State q4 = ext::make_pair(1,1); + State q5 = ext::make_pair(2,1); + State q6 = ext::make_pair(3,1); + State q7 = ext::make_pair(2,2); + State q8 = ext::make_pair(3,2); + + test.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7, q8}); + test.setFinalStates(ext::set<State> {q3, q6, q8}); + + test.addTransition(q0, 'a', q1); // vertical transitions (exact matching automata) + + test.addTransition(q1, 'b', q2); + test.addTransition(q4, 'b', q5); + + test.addTransition(q2, 'c', q3); + test.addTransition(q5, 'c', q6); + test.addTransition(q7, 'c', q8); + + test.addTransition(q0, 'a', q0); // loops in initial state + test.addTransition(q0, 'b', q0); + test.addTransition(q0, 'c', q0); + test.addTransition(q0, 'd', q0); + + test.addTransition(q0, 'b', q4); // diagonal transitions representing replace + test.addTransition(q0, 'c', q4); + test.addTransition(q0, 'd', q4); + + test.addTransition(q1, 'a', q5); + test.addTransition(q1, 'c', q5); + test.addTransition(q1, 'd', q5); + test.addTransition(q4, 'a', q7); + test.addTransition(q4, 'c', q7); + test.addTransition(q4, 'd', q7); + + test.addTransition(q2, 'a', q6); + test.addTransition(q2, 'b', q6); + test.addTransition(q2, 'd', q6); + test.addTransition(q5, 'd', q8); + test.addTransition(q5, 'a', q8); + test.addTransition(q5, 'b', q8); + + test.addTransition(q1, 'a', q1); // loops for sequence matching + test.addTransition(q1, 'c', q1); + test.addTransition(q1, 'd', q1); + + test.addTransition(q2, 'a', q2); + test.addTransition(q2, 'b', q2); + test.addTransition(q2, 'd', q2); + + test.addTransition(q4, 'a', q4); + test.addTransition(q4, 'c', q4); + test.addTransition(q4, 'd', q4); + + test.addTransition(q5, 'a', q5); + test.addTransition(q5, 'b', q5); + test.addTransition(q5, 'd', q5); + + test.addTransition(q7, 'a', q7); + test.addTransition(q7, 'b', q7); + test.addTransition(q7, 'd', q7); + + CPPUNIT_ASSERT(resulting_automata == test); +} + +void HammingSequenceMatchingAutomatonTest::setUp() { } + +void HammingSequenceMatchingAutomatonTest::tearDown() { } diff --git a/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.h b/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.h new file mode 100644 index 0000000000000000000000000000000000000000..33e9424737985090a88b76ae37a23765370891cd --- /dev/null +++ b/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.h @@ -0,0 +1,17 @@ +#ifndef HAMMING_SEQUENCE_MATCHING_AUTOMATA_TEST_H_ +#define HAMMING_SEQUENCE_MATCHING_AUTOMATA_TEST_H_ + +#include <cppunit/extensions/HelperMacros.h> + +class HammingSequenceMatchingAutomatonTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(HammingSequenceMatchingAutomatonTest); + CPPUNIT_TEST(testSimpleConstruction); + CPPUNIT_TEST_SUITE_END(); + +public: + void setUp ( ); + void tearDown ( ); + + void testSimpleConstruction(); +}; +#endif //HAMMING_SEQUENCE_MATCHING_AUTOMATA_TEST_H_