diff --git a/alib2algo/src/stringology/matching/HammingMatchingAutomaton.cpp b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2e712d87d7328bf0b87a0c72ab1ce52cfe785403 --- /dev/null +++ b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.cpp @@ -0,0 +1,19 @@ +/* + * HammingMatchingAutomaton.cpp + * + * Created on: 12. 3. 2018 + * Author: Tomas Capek + */ + +#include "HammingMatchingAutomaton.h" +#include <registration/AlgoRegistration.hpp> + +namespace stringology { + +namespace matching { + +auto HammingMatchingAutomatonLinearString = registration::AbstractRegister <HammingMatchingAutomaton, automaton::NFA < DefaultSymbolType, unsigned >, const string::LinearString < > &, unsigned > ( HammingMatchingAutomaton::construct ); + +} /* namespace matching */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h new file mode 100644 index 0000000000000000000000000000000000000000..1e7fbab3ab1ef812c135290e719975e4938ee27c --- /dev/null +++ b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h @@ -0,0 +1,81 @@ +/* + * HammingMatchingAutomaton.h + * + * Created on: 12. 3. 2018 + * Author: Tomas Capek + */ + +#ifndef _EXACT_MATCHING_AUTOMATON_H__ +#define _EXACT_MATCHING_AUTOMATON_H__ + +#include <automaton/FSM/NFA.h> +#include <automaton/simplify/UnreachableStatesRemover.h> +#include <string/LinearString.h> + +namespace stringology { + +namespace matching { + +class HammingMatchingAutomaton { +public: + /** + * Creates Hamming matching automata. + * + * @return automata for aproximate string matching using Hamming algorithm + */ + template < class SymbolType > + static automaton::NFA < SymbolType, unsigned > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors); +}; + + +template < class SymbolType > +automaton::NFA < SymbolType, unsigned > HammingMatchingAutomaton::construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors) { + automaton::NFA < SymbolType, unsigned > res( 0 ); + res.setInputAlphabet(pattern.getAlphabet()); + + // add k+1 paralel automatas (sfoeco type = exact matching) (where k is allowed_errors) + unsigned current_state = 0; + for (unsigned i = 0; i <= allowed_errors; i++) { + if ( current_state > 0 ) { + ++current_state; + res.addState(current_state); + } + + for(const SymbolType& symbol : pattern.getAlphabet()) { + res.addTransition( current_state, symbol, current_state); + } + + for(const SymbolType& symbol : pattern.getContent()) { + ++current_state; + res.addState( current_state ); + res.addTransition( current_state - 1, symbol, current_state ); + } + res.addFinalState( current_state ); + } + + for (unsigned int i = 0; i < pattern.getContent().size(); i++) { + for (unsigned int j = 0; j < allowed_errors; j++) { + unsigned int states_count = pattern.getContent().size() + 1; + + unsigned int from_state = j*states_count + i; + unsigned int to_state = (j+1)*states_count + i + 1; + + for ( const SymbolType& symbol : pattern.getAlphabet()) { + if (symbol != pattern.getContent()[i]) { + // add diagonal addTransition + res.addTransition(from_state, symbol, to_state); + } + } + } + } + + // remove all inaccessible states from state + return automaton::simplify::UnreachableStatesRemover::remove(res); +} + + +} /* namespace matching */ + +} /* namespace stringology */ + +#endif /* _HAMMING_MATCHING_AUTOMATON_H__ */ diff --git a/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0a0059cc7813f3828ba2f41b8374873a59894b39 --- /dev/null +++ b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp @@ -0,0 +1,63 @@ +#include <iostream> + +#include <stringology/matching/HammingMatchingAutomaton.h> +#include <automaton/FSM/NFA.h> +#include <string/LinearString.h> + +#include "HammingMatchingAutomatonTest.h" + + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( HammingMatchingAutomatonTest, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( HammingMatchingAutomatonTest ); + + +void HammingMatchingAutomatonTest::testSimpleConstruction() { + automaton::NFA < char, unsigned > test(0); + + ext::set<char> alphabet{'a', 'b', 'c', 'd'}; + string::LinearString <char> input_string(alphabet, ext::vector<char>{'a', 'b', 'c'}); + auto resulting_automata = stringology::matching::HammingMatchingAutomaton::construct(input_string, 2); + + automaton::NFA < char, unsigned > res(0); + res.setInputAlphabet(ext::set<char>{'a', 'b', 'c', 'd'}); + res.setStates(ext::set<unsigned> {0, 1, 2, 3, 5, 6, 7, 10, 11}); + res.setFinalStates(ext::set<unsigned> {3, 7, 11}); + + res.addTransition(0, 'a', 1); + + res.addTransition(1, 'b', 2); + res.addTransition(5, 'b', 6); + + res.addTransition(2, 'c', 3); + res.addTransition(6, 'c', 7); + res.addTransition(10, 'c', 11); + + res.addTransition(0, 'a', 0); + res.addTransition(0, 'b', 0); + res.addTransition(0, 'c', 0); + res.addTransition(0, 'd', 0); + + res.addTransition(0, 'b', 5); + res.addTransition(0, 'c', 5); + res.addTransition(0, 'd', 5); + + res.addTransition(1, 'a', 6); + res.addTransition(1, 'c', 6); + res.addTransition(1, 'd', 6); + res.addTransition(5, 'a', 10); + res.addTransition(5, 'c', 10); + res.addTransition(5, 'd', 10); + + res.addTransition(2, 'a', 7); + res.addTransition(2, 'b', 7); + res.addTransition(2, 'd', 7); + res.addTransition(6, 'a', 11); + res.addTransition(6, 'b', 11); + res.addTransition(6, 'd', 11); + + CPPUNIT_ASSERT(resulting_automata == res); +} + +void HammingMatchingAutomatonTest::setUp() { } + +void HammingMatchingAutomatonTest::tearDown() { } \ No newline at end of file diff --git a/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.h b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.h new file mode 100644 index 0000000000000000000000000000000000000000..6471d078bb4e38aad97e4bab528f9366660dcb6b --- /dev/null +++ b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.h @@ -0,0 +1,17 @@ +#ifndef HAMMING_MATCHING_AUTOMATA_TEST_H_ +#define HAMMING_MATCHING_AUTOMATA_TEST_H_ + +#include <cppunit/extensions/HelperMacros.h> + +class HammingMatchingAutomatonTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(HammingMatchingAutomatonTest); + CPPUNIT_TEST(testSimpleConstruction); + CPPUNIT_TEST_SUITE_END(); + +public: + void setUp ( ); + void tearDown ( ); + + void testSimpleConstruction(); +}; +#endif //HAMMING_MATCHING_AUTOMATA_TEST_H_