From a3972834707d39f9c25e2172c7af70f5e2703e24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Capek?= <tomas@capek.io>
Date: Fri, 13 Apr 2018 18:57:54 +0200
Subject: [PATCH] Implement WildcardLinearString into HammingMatchingAutomaton.

---
 .../matching/HammingMatchingAutomaton.h       | 89 ++++++++++++++++++-
 .../matching/HammingMatchingAutomatonTest.cpp | 47 +++++++++-
 .../matching/HammingMatchingAutomatonTest.h   |  6 +-
 3 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h
index 495856e8e1..8ea6eabbdc 100644
--- a/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h
+++ b/alib2algo/src/stringology/matching/HammingMatchingAutomaton.h
@@ -3,7 +3,7 @@
  *
  *  Created on: 12. 3. 2018
  *      Author: Tomas Capek
- */
+*/
 
 #ifndef _HAMMING_MATCHING_AUTOMATON_H__
 #define _HAMMING_MATCHING_AUTOMATON_H__
@@ -11,6 +11,7 @@
 #include <automaton/FSM/NFA.h>
 #include <automaton/simplify/UnreachableStatesRemover.h>
 #include <string/LinearString.h>
+#include <string/WildcardLinearString.h>
 
 namespace stringology {
 
@@ -25,6 +26,22 @@ public:
 	 */
 	template < class SymbolType >
 	static automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors);
+
+	/**
+	 * Creates Hamming matching automata.
+	 *
+	 * @return automata for aproximate string matching using Hamming algorithm
+	 */
+	template < class SymbolType >
+	static automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors);
+
+	/**
+	 * Creates Hamming matching automata and won't remove useless states.
+	 *
+	 * @return automata for aproximate string matching using Hamming algorithm. This automata won't have useless states removed.
+	 */
+	template < class SymbolType >
+	static automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > construct_unclean(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors);
 };
 
 template < class SymbolType >
@@ -76,6 +93,76 @@ automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > HammingMatc
 }
 
 
+template < class SymbolType >
+automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > HammingMatchingAutomaton::construct_unclean(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors) {
+	automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int > > result( ext::make_pair(0, 0) );
+	result.setInputAlphabet(pattern.getAlphabet());
+
+	SymbolType wildcard = pattern.getWildcardSymbol();
+	ext::set<SymbolType> alphabet_without_wildcard = pattern.getAlphabet();
+	alphabet_without_wildcard.erase(wildcard);
+
+	// add k+1 paralel automatas (sfoeco type = exact matching) (where k is allowed_errors)
+	for (unsigned int i = 0; i<pattern.getContent().size() + 1; i++) {
+		for (unsigned int j = 0; j<allowed_errors + 1; j++) {
+			result.addState(ext::make_pair(i, j));
+			if (i == pattern.getContent().size()) {
+				result.addFinalState(ext::make_pair(i, j));
+			}
+		}
+	}
+
+	for (unsigned int i = 0; i<allowed_errors + 1; i++) {
+		for (const SymbolType& symbol : alphabet_without_wildcard) {
+			auto initial_state = ext::make_pair(0, i);
+
+			result.addTransition(initial_state, symbol, initial_state);
+		}
+	}
+
+	for (unsigned int i = 0; i<pattern.getContent().size(); i++) {
+		for (unsigned int j = 0; j < allowed_errors + 1; j++) {
+			auto from = ext::make_pair(i, j);
+			auto to = ext::make_pair(i+1, j);
+			if (pattern.getContent()[i] == pattern.getWildcardSymbol()) {
+				for (const SymbolType& symbol : alphabet_without_wildcard) {
+					result.addTransition(from, symbol, to);
+				}
+			} else {
+				result.addTransition(from, pattern.getContent()[i], to);
+			}
+		}
+	}
+
+	// add diagonal addTransition
+	for (unsigned int i = 0; i<pattern.getContent().size(); i++) {
+		for (unsigned int j = 0; j<allowed_errors; j++) {
+			auto from = ext::make_pair(i, j);
+			auto to = ext::make_pair(i + 1, j + 1);
+
+			if (pattern.getContent()[i] == wildcard) {
+				continue;
+			}
+
+			for ( const SymbolType & symbol : alphabet_without_wildcard ) {
+				if (symbol != pattern.getContent()[i]) {
+					result.addTransition(from, symbol, to);
+				}
+			}
+		}
+	}
+
+	return result;
+}
+
+template < class SymbolType >
+automaton::NFA < SymbolType, ext::pair<unsigned int, unsigned int> > HammingMatchingAutomaton::construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors) {
+	auto result = HammingMatchingAutomaton::construct_unclean(pattern, allowed_errors);
+
+	// remove all inaccessible states from state
+	return automaton::simplify::UnreachableStatesRemover::remove(result);
+}
+
 } /* namespace matching */
 
 } /* namespace stringology */
diff --git a/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp
index fa005f1846..0b25ab87d4 100644
--- a/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp
+++ b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.cpp
@@ -1,11 +1,11 @@
 #include <stringology/matching/HammingMatchingAutomaton.h>
 #include <automaton/FSM/NFA.h>
 #include <string/LinearString.h>
+#include <string/WildcardLinearString.h>
 
 #include "HammingMatchingAutomatonTest.h"
 
 
-
 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( HammingMatchingAutomatonTest, "stringology" );
 CPPUNIT_TEST_SUITE_REGISTRATION ( HammingMatchingAutomatonTest );
 
@@ -67,6 +67,51 @@ void HammingMatchingAutomatonTest::testSimpleConstruction() {
     CPPUNIT_ASSERT(resulting_automata == res);
 }
 
+
+void HammingMatchingAutomatonTest::testSimpleWildcardConstruction() {
+  ext::set<char> alphabet{'a', 'b', '@'};
+  string::WildcardLinearString <char> input_string(alphabet, ext::vector<char>{'a', '@', 'b'}, '@');
+  auto resulting_automata = stringology::matching::HammingMatchingAutomaton::construct(input_string, 2);
+
+  typedef ext::pair<unsigned int, unsigned int> State;
+
+  automaton::NFA < char, State > res(ext::make_pair(0,0));
+  res.setInputAlphabet(alphabet);
+
+  State q0 = ext::make_pair(0,0);
+  State q1 = ext::make_pair(1,0);
+  State q2 = ext::make_pair(2,0);
+  State q3 = ext::make_pair(3,0);
+  State q4 = ext::make_pair(1,1);
+  State q5 = ext::make_pair(2,1);
+  State q6 = ext::make_pair(3,1);
+  State q7 = ext::make_pair(3,2);
+
+  res.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7});
+  res.setFinalStates(ext::set<State> {q3, q6, q7});
+
+  res.addTransition(q0, 'a', q0); // initial loops
+  res.addTransition(q0, 'b', q0);
+
+  res.addTransition(q0, 'a', q1); // 3 simple matching automatas (thrid is not connected)
+
+  res.addTransition(q1, 'a', q2);
+  res.addTransition(q1, 'b', q2);
+  res.addTransition(q4, 'a', q5);
+  res.addTransition(q4, 'b', q5);
+
+  res.addTransition(q2, 'b', q3);
+  res.addTransition(q5, 'b', q6);
+
+  res.addTransition(q0, 'b', q4); // error transitions
+
+  res.addTransition(q2, 'a', q6);
+  res.addTransition(q5, 'a', q7);
+
+  CPPUNIT_ASSERT(resulting_automata == res);
+}
+
+
 void HammingMatchingAutomatonTest::setUp() { }
 
 void HammingMatchingAutomatonTest::tearDown() { }
diff --git a/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.h b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.h
index 6471d078bb..caff8e0d7d 100644
--- a/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.h
+++ b/alib2algo/test-src/stringology/matching/HammingMatchingAutomatonTest.h
@@ -5,13 +5,15 @@
 
 class HammingMatchingAutomatonTest : public CppUnit::TestFixture {
 	CPPUNIT_TEST_SUITE(HammingMatchingAutomatonTest);
-        CPPUNIT_TEST(testSimpleConstruction);
-    CPPUNIT_TEST_SUITE_END();
+    CPPUNIT_TEST(testSimpleConstruction);
+		CPPUNIT_TEST(testSimpleWildcardConstruction);		
+  CPPUNIT_TEST_SUITE_END();
 
 public:
     void setUp ( );
     void tearDown ( );
 
     void testSimpleConstruction();
+		void testSimpleWildcardConstruction();
 };
 #endif //HAMMING_MATCHING_AUTOMATA_TEST_H_
-- 
GitLab