diff --git a/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.h b/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.h index 70a9ae2126ea28071938708e562f4b66161b087a..a8eda045f484005c1e23b056905b335966eef6fa 100644 --- a/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.h +++ b/alib2algo/src/stringology/matching/HammingSequenceMatchingAutomaton.h @@ -10,7 +10,9 @@ #include <automaton/FSM/NFA.h> #include <stringology/matching/HammingMatchingAutomaton.h> +#include <automaton/simplify/UnreachableStatesRemover.h> #include <string/LinearString.h> +#include <string/WildcardLinearString.h> namespace stringology { @@ -20,12 +22,21 @@ namespace matching { class HammingSequenceMatchingAutomaton { public: /** - * Creates Hamming matching automata for sequence matching. + * Creates Hamming matching automata for sequence matching from LinearString. * * @return automata for aproximate sequence matching using Hamming method. */ template < class SymbolType > static automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors); + + /** + * Creates Hamming matching automata for sequence matching from WildcardLinearString. + * + * @return automata for aproximate sequence matching using Hamming method. + */ + template < class SymbolType > + static automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors); + }; template < class SymbolType > @@ -47,6 +58,30 @@ automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > HammingSequ return result; } +template < class SymbolType > +automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > HammingSequenceMatchingAutomaton::construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors) { + automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int > > result = stringology::matching::HammingMatchingAutomaton::construct_unclean(pattern, allowed_errors); + + SymbolType wildcard = pattern.getWildcardSymbol(); + ext::set<SymbolType> alphabet_without_wildcard = pattern.getAlphabet(); + alphabet_without_wildcard.erase(wildcard); + + for (unsigned int j = 0; j<allowed_errors + 1; j++) { + for (unsigned int i = j; i<pattern.getContent().size(); i++) { + auto current_state = ext::make_pair(i, j); + + if (pattern.getContent()[i] != wildcard) { + for (const SymbolType & symbol : alphabet_without_wildcard) { + if (symbol != pattern.getContent()[i]) { + result.addTransition(current_state, symbol, current_state); + } + } + } + } + } + + return automaton::simplify::UnreachableStatesRemover::remove(result); +} } /* namespace matching */ diff --git a/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.cpp b/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.cpp index 880ffc46a6faaedf95f98aadfefe1f6662289d22..2c0ee5041678ffb69c61a84bfdf014e60098b87b 100644 --- a/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.cpp +++ b/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.cpp @@ -86,6 +86,52 @@ void HammingSequenceMatchingAutomatonTest::testSimpleConstruction() { CPPUNIT_ASSERT(resulting_automata == test); } +void HammingSequenceMatchingAutomatonTest::testSimpleWildcardConstruction() { + ext::set<char> alphabet{'a', 'b', '@'}; + string::WildcardLinearString <char> input_string(alphabet, ext::vector<char>{'a', '@', 'b'}, '@'); + auto resulting_automata = stringology::matching::HammingSequenceMatchingAutomaton::construct(input_string, 2); + + typedef ext::pair<unsigned int, unsigned int> State; + + automaton::NFA < char, State > res(ext::make_pair(0,0)); + res.setInputAlphabet(alphabet); + + State q0 = ext::make_pair(0,0); + State q1 = ext::make_pair(1,0); + State q2 = ext::make_pair(2,0); + State q3 = ext::make_pair(3,0); + State q4 = ext::make_pair(1,1); + State q5 = ext::make_pair(2,1); + State q6 = ext::make_pair(3,1); + State q7 = ext::make_pair(3,2); + + res.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7}); + res.setFinalStates(ext::set<State> {q3, q6, q7}); + + res.addTransition(q0, 'a', q0); // initial loops + res.addTransition(q0, 'b', q0); + + res.addTransition(q0, 'a', q1); // 3 simple matching automatas (thrid is not connected) + + res.addTransition(q1, 'a', q2); + res.addTransition(q1, 'b', q2); + res.addTransition(q4, 'a', q5); + res.addTransition(q4, 'b', q5); + + res.addTransition(q2, 'b', q3); + res.addTransition(q5, 'b', q6); + + res.addTransition(q0, 'b', q4); // error transitions + + res.addTransition(q2, 'a', q6); + res.addTransition(q5, 'a', q7); + + res.addTransition(q2, 'a', q2); // loops for sequence matching + res.addTransition(q5, 'a', q5); + + CPPUNIT_ASSERT(resulting_automata == res); +} + void HammingSequenceMatchingAutomatonTest::setUp() { } void HammingSequenceMatchingAutomatonTest::tearDown() { } diff --git a/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.h b/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.h index 33e9424737985090a88b76ae37a23765370891cd..c33a109a6c461d698e12c8270f6bad2706fc3e23 100644 --- a/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.h +++ b/alib2algo/test-src/stringology/matching/HammingSequenceMatchingAutomatonTest.h @@ -5,13 +5,15 @@ class HammingSequenceMatchingAutomatonTest : public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(HammingSequenceMatchingAutomatonTest); - CPPUNIT_TEST(testSimpleConstruction); - CPPUNIT_TEST_SUITE_END(); + CPPUNIT_TEST(testSimpleConstruction); + CPPUNIT_TEST(testSimpleWildcardConstruction); + CPPUNIT_TEST_SUITE_END(); public: void setUp ( ); void tearDown ( ); void testSimpleConstruction(); + void testSimpleWildcardConstruction(); }; #endif //HAMMING_SEQUENCE_MATCHING_AUTOMATA_TEST_H_