From 82aa1ce9585e24eccdec25dfd0b9568a8691d89d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Capek?= <tomas@capek.io>
Date: Thu, 19 Apr 2018 09:46:47 +0200
Subject: [PATCH] Implement WildcardLinearString into
 GeneralizedLevenshteinSequenceMatchingAutomaton

---
 ...izedLevenshteinSequenceMatchingAutomaton.h | 53 +++++++++++-
 ...venshteinSequenceMatchingAutomatonTest.cpp | 83 +++++++++++++++++++
 ...LevenshteinSequenceMatchingAutomatonTest.h |  6 +-
 3 files changed, 139 insertions(+), 3 deletions(-)

diff --git a/alib2algo/src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomaton.h b/alib2algo/src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomaton.h
index dcc053c945..73b716623a 100644
--- a/alib2algo/src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomaton.h
+++ b/alib2algo/src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomaton.h
@@ -20,12 +20,21 @@ namespace matching {
 class GeneralizedLevenshteinSequenceMatchingAutomaton {
 public:
 	/**
-	 * Creates Generalized Levenshtein matching automata for sequence matching.
+	 * Creates Generalized Levenshtein matching automata for sequence matching from LinearString.
 	 *
 	 * @return automata for aproximate sequence matching using Levenshtein method.
 	 */
 	template < class SymbolType >
 	static automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors);
+
+	/**
+	 * Creates Generalized Levenshtein matching automata for sequence matching from WildcardLinearString.
+	 *
+	 * @return automata for aproximate sequence matching using Levenshtein method.
+	 */
+	template < class SymbolType >
+	static automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors);
+
 };
 
 template < class SymbolType >
@@ -59,6 +68,48 @@ automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int>
 	return result;
 }
 
+template < class SymbolType >
+automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > GeneralizedLevenshteinSequenceMatchingAutomaton::construct(const string::WildcardLinearString < SymbolType > & pattern, unsigned int allowed_errors) {
+	auto result = stringology::matching::GeneralizedLevenshteinMatchingAutomaton::construct(pattern, allowed_errors);
+
+	SymbolType wildcard = pattern.getWildcardSymbol();
+	ext::set<SymbolType> alphabet_without_wildcard = pattern.getAlphabet();
+	alphabet_without_wildcard.erase(wildcard);
+
+	for (unsigned int j = 0; j<allowed_errors + 1; j++) {
+		for (unsigned int i = j; i<pattern.getContent().size(); i++) {
+			auto current_state = ext::make_pair(i, j);
+
+			if (pattern.getContent()[i] != wildcard) {
+				for (const SymbolType & symbol : alphabet_without_wildcard) {
+					if (symbol != pattern.getContent()[i]) {
+						result.addTransition(current_state, symbol, current_state);
+					}
+				}
+			}
+		}
+	}
+
+	for (unsigned int j = 0; j<allowed_errors; j++) {
+		for (unsigned int i = j; i + 1 < pattern.getContent().size(); i++) {
+			if (pattern.getContent()[i] == wildcard) {
+				continue;
+			}
+
+			auto transpose_state = ext::make_pair(pattern.getContent().size()+1+i, j);
+
+			for (const SymbolType & symbol : alphabet_without_wildcard) {
+				if (symbol != pattern.getContent()[i]) {
+					result.addTransition(transpose_state, symbol, transpose_state);
+				}
+			}
+		}
+	}
+
+	return result;
+}
+
+
 
 } /* namespace matching */
 
diff --git a/alib2algo/test-src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomatonTest.cpp b/alib2algo/test-src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomatonTest.cpp
index d4d786c582..282552e68c 100644
--- a/alib2algo/test-src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomatonTest.cpp
+++ b/alib2algo/test-src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomatonTest.cpp
@@ -131,6 +131,89 @@ void GeneralizedLevenshteinSequenceMatchingAutomatonTest::testSimpleConstruction
   CPPUNIT_ASSERT(resulting_automata == test);
 }
 
+void GeneralizedLevenshteinSequenceMatchingAutomatonTest::testSimpleWildcardConstruction() {
+  ext::set<char> alphabet{'a', 'b', '@'};
+  string::WildcardLinearString <char> input_string(alphabet, ext::vector<char>{'a', '@', 'b'}, '@');
+  auto resulting_automata = stringology::matching::GeneralizedLevenshteinSequenceMatchingAutomaton::construct(input_string, 2);
+
+  typedef ext::pair<unsigned int, unsigned int> State;
+
+  automaton::EpsilonNFA < char, void, State > res(ext::make_pair(0,0));
+  res.setInputAlphabet(alphabet);
+
+  State q0 = ext::make_pair(0,0);
+  State q1 = ext::make_pair(1,0);
+  State q2 = ext::make_pair(2,0);
+  State q3 = ext::make_pair(3,0);
+  State q4 = ext::make_pair(1,1);
+  State q5 = ext::make_pair(2,1);
+  State q6 = ext::make_pair(3,1);
+  State q7 = ext::make_pair(2,2);
+  State q8 = ext::make_pair(3,2);
+
+  State r3 = ext::make_pair(5,1);
+  State r2 = ext::make_pair(5,0);
+  State r1 = ext::make_pair(4,0);
+
+  res.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7, q8, r1, r2, r3});
+  res.setFinalStates(ext::set<State> {q3, q6, q8});
+
+  res.addTransition(q0, 'a', q0); // initial loops
+  res.addTransition(q0, 'b', q0);
+
+  res.addTransition(q0, 'a', q1); // 3 simple matching automatas
+
+  res.addTransition(q1, 'a', q2);
+  res.addTransition(q1, 'b', q2);
+  res.addTransition(q4, 'a', q5);
+  res.addTransition(q4, 'b', q5);
+
+  res.addTransition(q2, 'b', q3);
+  res.addTransition(q5, 'b', q6);
+  res.addTransition(q7, 'b', q8);
+
+  res.addTransition(q0, 'b', q4); // error transitions for replace
+
+  res.addTransition(q2, 'a', q6);
+  res.addTransition(q5, 'a', q8);
+
+  res.addTransition(q0, q4); // delete transition
+  res.addTransition(q1, q5);
+  res.addTransition(q2, q6);
+
+  res.addTransition(q4, q7);
+  res.addTransition(q5, q8);
+
+  res.addTransition(q1, 'a', q4);
+  res.addTransition(q1, 'b', q4);
+
+  res.addTransition(q2, 'a', q5);
+  res.addTransition(q2, 'b', q5);
+
+  res.addTransition(q5, 'a', q7);
+  res.addTransition(q5, 'b', q7);
+
+  res.addTransition(q2, 'a', q2); // sequence matching loops
+  res.addTransition(q5, 'a', q5);
+  res.addTransition(q7, 'a', q7);
+
+  res.addTransition(r1, 'b', r1);
+
+  res.addTransition(q0, 'a', r1); // transpose states
+  res.addTransition(q0, 'b', r1);
+  res.addTransition(r1, 'a', q5);
+
+  res.addTransition(q1, 'b', r2);
+  res.addTransition(r2, 'a', q6);
+  res.addTransition(r2, 'b', q6);
+
+  res.addTransition(q4, 'b', r3);
+  res.addTransition(r3, 'a', q8);
+  res.addTransition(r3, 'b', q8);
+
+  CPPUNIT_ASSERT(resulting_automata == res);
+}
+
 void GeneralizedLevenshteinSequenceMatchingAutomatonTest::setUp() { }
 
 void GeneralizedLevenshteinSequenceMatchingAutomatonTest::tearDown() { }
diff --git a/alib2algo/test-src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomatonTest.h b/alib2algo/test-src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomatonTest.h
index 5e02ace748..c58d1d44d3 100644
--- a/alib2algo/test-src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomatonTest.h
+++ b/alib2algo/test-src/stringology/matching/GeneralizedLevenshteinSequenceMatchingAutomatonTest.h
@@ -5,13 +5,15 @@
 
 class GeneralizedLevenshteinSequenceMatchingAutomatonTest : public CppUnit::TestFixture {
 	CPPUNIT_TEST_SUITE(GeneralizedLevenshteinSequenceMatchingAutomatonTest);
-        CPPUNIT_TEST(testSimpleConstruction);
-    CPPUNIT_TEST_SUITE_END();
+    CPPUNIT_TEST(testSimpleConstruction);
+		CPPUNIT_TEST(testSimpleWildcardConstruction);
+  CPPUNIT_TEST_SUITE_END();
 
 public:
     void setUp ( );
     void tearDown ( );
 
     void testSimpleConstruction();
+		void testSimpleWildcardConstruction();
 };
 #endif //GENERALIZED_LEVENSHTEIN_SEQUENCE_MATCHING_AUTOMATA_TEST_H_
-- 
GitLab