From 4c2f457da61b4b017d8bda1571deddf99e2a286f Mon Sep 17 00:00:00 2001
From: Tomas Capek <tomas@capek.io>
Date: Wed, 28 Mar 2018 19:14:36 +0200
Subject: [PATCH] Refactor LevenshteinMatchingAutomaton.

It now uses ext::pair<unsigned, unsigned> as a state type.
---
 .../matching/LevenshteinMatchingAutomaton.cpp |   2 +-
 .../matching/LevenshteinMatchingAutomaton.h   |  72 ++++------
 .../LevenshteinMatchingAutomatonTest.cpp      | 125 ++++++++++--------
 3 files changed, 95 insertions(+), 104 deletions(-)

diff --git a/alib2algo/src/stringology/matching/LevenshteinMatchingAutomaton.cpp b/alib2algo/src/stringology/matching/LevenshteinMatchingAutomaton.cpp
index 1f53ed3a59..f2ac367f9d 100644
--- a/alib2algo/src/stringology/matching/LevenshteinMatchingAutomaton.cpp
+++ b/alib2algo/src/stringology/matching/LevenshteinMatchingAutomaton.cpp
@@ -12,7 +12,7 @@ namespace stringology {
 
 namespace matching {
 
-auto LevenshteinMatchingAutomatonLinearString = registration::AbstractRegister <LevenshteinMatchingAutomaton, automaton::EpsilonNFA < DefaultSymbolType, unsigned, unsigned>, const string::LinearString < > &, unsigned > ( LevenshteinMatchingAutomaton::construct );
+auto LevenshteinMatchingAutomatonLinearString = registration::AbstractRegister <LevenshteinMatchingAutomaton, automaton::EpsilonNFA < DefaultSymbolType, void, ext::pair<unsigned int, unsigned int>>, const string::LinearString < > &, unsigned > ( LevenshteinMatchingAutomaton::construct );
 
 } /* namespace matching */
 
diff --git a/alib2algo/src/stringology/matching/LevenshteinMatchingAutomaton.h b/alib2algo/src/stringology/matching/LevenshteinMatchingAutomaton.h
index 5cde7d4e19..c6dd1424f0 100644
--- a/alib2algo/src/stringology/matching/LevenshteinMatchingAutomaton.h
+++ b/alib2algo/src/stringology/matching/LevenshteinMatchingAutomaton.h
@@ -5,12 +5,13 @@
  *      Author: Tomas Capek
  */
 
-#ifndef _EXACT_MATCHING_AUTOMATON_H__
-#define _EXACT_MATCHING_AUTOMATON_H__
+#ifndef _LEVENSHTEIN_MATCHING_AUTOMATON_H__
+#define _LEVENSHTEIN_MATCHING_AUTOMATON_H__
 
 #include <automaton/FSM/EpsilonNFA.h>
-#include <automaton/simplify/UnreachableStatesRemover.h>
 #include <string/LinearString.h>
+#include <stringology/matching/HammingMatchingAutomaton.h>
+
 
 namespace stringology {
 
@@ -24,63 +25,44 @@ public:
 	 * @return automata for aproximate string matching using Hamming algorithm
 	 */
 	template < class SymbolType >
-	static automaton::EpsilonNFA < SymbolType, unsigned, unsigned > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors);
+	static automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors);
 };
 
 
 template < class SymbolType >
-	automaton::EpsilonNFA < SymbolType, unsigned, unsigned > LevenshteinMatchingAutomaton::construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors) {
-	automaton::EpsilonNFA < SymbolType, unsigned, unsigned > res( 0 );
-	res.setInputAlphabet(pattern.getAlphabet());
-
-	// add k+1 paralel automatas (sfoeco type = exact matching) (where k is allowed_errors)
-	unsigned current_state = 0;
-	for (unsigned i = 0; i <= allowed_errors; i++) {
-		if ( current_state > 0 ) {
-			++current_state;
-			res.addState(current_state);
-		}
+	automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> >  LevenshteinMatchingAutomaton::construct(const string::LinearString < SymbolType > & pattern, unsigned int allowed_errors) {
+	auto hamming_matching_automaton = stringology::matching::HammingMatchingAutomaton::construct(pattern, allowed_errors);
 
-		for(const SymbolType& symbol : pattern.getAlphabet()) {
-			res.addTransition( current_state, symbol, current_state);
-		}
+	automaton::EpsilonNFA < SymbolType, void, ext::pair<unsigned int, unsigned int> > result (hamming_matching_automaton);
 
-		for(const SymbolType& symbol : pattern.getContent()) {
-			++current_state;
-			res.addState( current_state );
-			res.addTransition( current_state - 1, symbol, current_state );
-		}
-		res.addFinalState( current_state );
-	}
+	for (unsigned int i=0; i<pattern.getContent().size(); i++) {
+		for (unsigned int j=0; j<allowed_errors; j++) {
+			if (i < j) {
+				continue;
+			}
 
-	for (unsigned int i = 0; i < pattern.getContent().size(); i++) {
-		for (unsigned int j = 0; j < allowed_errors; j++) {
-			unsigned int states_count = pattern.getContent().size() + 1;
+			auto from = ext::make_pair(i, j);
+			auto to = ext::make_pair(i + 1, j + 1);
 
-			unsigned int from_state = j*states_count + i;
-			unsigned int to_state = (j+1)*states_count + i + 1;
+			// add diagonal transition representing deletion
+			result.addTransition(from, to);
 
-			for ( const SymbolType& symbol : pattern.getAlphabet()) {
-				if (symbol != pattern.getContent()[i]) {
-					// add diagonal transition on mistake
-					res.addTransition(from_state, symbol, to_state);
+			if (i == j) {
+				continue;
+			}
 
-					if ( i > j && i - 1 < pattern.getContent().size() ) {
-						// condition limits following to upper triangle && non-final states
+			to = ext::make_pair(i, j + 1);
 
-						// add vertical transition representing insertion
-						res.addTransition(from_state, symbol, (j + 1) * states_count + i);
-					}
+			for (const SymbolType& symbol : pattern.getAlphabet()) {
+				if (symbol != pattern.getContent()[i]) {
+					// add horizontal transition representing insertion
+					result.addTransition(from, symbol, to);
 				}
 			}
-
-			// add epsilon transition representing deletion
-			res.addTransition(from_state, to_state);
 		}
 	}
 
-	// remove all inaccessible states from starting state
-	return automaton::simplify::UnreachableStatesRemover::remove(res);
+	return result;
 }
 
 
@@ -88,4 +70,4 @@ template < class SymbolType >
 
 } /* namespace stringology */
 
-#endif /* _HAMMING_MATCHING_AUTOMATON_H__ */
+#endif /* _LEVENSHTEIN_MATCHING_AUTOMATON_H__ */
diff --git a/alib2algo/test-src/stringology/matching/LevenshteinMatchingAutomatonTest.cpp b/alib2algo/test-src/stringology/matching/LevenshteinMatchingAutomatonTest.cpp
index ce4c0fea04..4f9ab10ac9 100644
--- a/alib2algo/test-src/stringology/matching/LevenshteinMatchingAutomatonTest.cpp
+++ b/alib2algo/test-src/stringology/matching/LevenshteinMatchingAutomatonTest.cpp
@@ -14,66 +14,75 @@ void LevenshteinMatchingAutomatonTest::testSimpleConstruction() {
     string::LinearString <char> input_string(alphabet, ext::vector<char>{'a', 'b', 'c'});
     auto resulting_automata = stringology::matching::LevenshteinMatchingAutomaton::construct(input_string, 2);
 
-    automaton::EpsilonNFA < char, unsigned, unsigned > res(0);
-    res.setInputAlphabet(ext::set<char>{'a', 'b', 'c', 'd'});
-    res.setStates(ext::set<unsigned> {0, 1, 2, 3, 5, 6, 7, 10, 11});
-    res.setFinalStates(ext::set<unsigned> {3, 7, 11});
-
-    res.addTransition(0, 'a', 1); // paralel exact matching automatas
-
-    res.addTransition(1, 'b', 2);
-    res.addTransition(5, 'b', 6);
-
-    res.addTransition(2, 'c', 3);
-    res.addTransition(6, 'c', 7);
-    res.addTransition(10, 'c', 11);
-
-    res.addTransition(0, 'a', 0); // initial state's loops
-    res.addTransition(0, 'b', 0);
-    res.addTransition(0, 'c', 0);
-    res.addTransition(0, 'd', 0);
-
-    res.addTransition(0, 'b', 5); // first mistake
-    res.addTransition(0, 'c', 5);
-    res.addTransition(0, 'd', 5);
-
-    res.addTransition(0, 5); // deletion
-
-    res.addTransition(1, 'a', 5); // insertion
-    res.addTransition(1, 'c', 5);
-    res.addTransition(1, 'd', 5);
-
-    res.addTransition(1, 'a', 6);
-    res.addTransition(1, 'c', 6);
-    res.addTransition(1, 'd', 6);
+    typedef ext::pair<unsigned int, unsigned int> State;
 
-    res.addTransition(1, 6); // deletion
-
-    res.addTransition(2, 'a', 6); // insertion
-    res.addTransition(2, 'b', 6);
-    res.addTransition(2, 'd', 6);
-
-    res.addTransition(6, 'a', 10); // insertion
-    res.addTransition(6, 'b', 10);
-    res.addTransition(6, 'd', 10);
-
-    res.addTransition(5, 'a', 10);
-    res.addTransition(5, 'c', 10);
-    res.addTransition(5, 'd', 10);
-
-    res.addTransition(5, 10); // deletion
-
-    res.addTransition(2, 'a', 7);
-    res.addTransition(2, 'b', 7);
-    res.addTransition(2, 'd', 7);
-
-    res.addTransition(2, 7); // deletion
-
-    res.addTransition(6, 'a', 11);
-    res.addTransition(6, 'b', 11);
-    res.addTransition(6, 'd', 11);
+    automaton::EpsilonNFA < char, void, State > res(ext::make_pair(0,0));
+    res.setInputAlphabet(ext::set<char>{'a', 'b', 'c', 'd'});
 
-    res.addTransition(6, 11);
+    State q0 = ext::make_pair(0,0);
+    State q1 = ext::make_pair(1,0);
+    State q2 = ext::make_pair(2,0);
+    State q3 = ext::make_pair(3,0);
+    State q4 = ext::make_pair(1,1);
+    State q5 = ext::make_pair(2,1);
+    State q6 = ext::make_pair(3,1);
+    State q7 = ext::make_pair(2,2);
+    State q8 = ext::make_pair(3,2);
+
+    res.setStates(ext::set<State> {q0, q1, q2, q3, q4, q5, q6, q7, q8});
+    res.setFinalStates(ext::set<State> {q3, q6, q8});
+
+    res.addTransition(q0, 'a', q1); // vertical transitions (exact matching automata)
+
+    res.addTransition(q1, 'b', q2);
+    res.addTransition(q4, 'b', q5);
+
+    res.addTransition(q2, 'c', q3);
+    res.addTransition(q5, 'c', q6);
+    res.addTransition(q7, 'c', q8);
+
+    res.addTransition(q0, 'a', q0); // loops in initial state
+    res.addTransition(q0, 'b', q0);
+    res.addTransition(q0, 'c', q0);
+    res.addTransition(q0, 'd', q0);
+
+    res.addTransition(q0, 'b', q4); // diagonal transitions representing replace
+    res.addTransition(q0, 'c', q4);
+    res.addTransition(q0, 'd', q4);
+
+    res.addTransition(q0, q4); // deletion
+
+    res.addTransition(q1, 'a', q5);
+    res.addTransition(q1, 'c', q5);
+    res.addTransition(q1, 'd', q5);
+    res.addTransition(q4, 'a', q7);
+    res.addTransition(q4, 'c', q7);
+    res.addTransition(q4, 'd', q7);
+
+    res.addTransition(q1, q5); // deletion
+    res.addTransition(q4, q7);
+
+    res.addTransition(q2, 'a', q6);
+    res.addTransition(q2, 'b', q6);
+    res.addTransition(q2, 'd', q6);
+    res.addTransition(q5, 'a', q8);
+    res.addTransition(q5, 'b', q8);
+    res.addTransition(q5, 'd', q8);
+
+    res.addTransition(q2, q6); // deletion
+    res.addTransition(q5, q8);
+
+    res.addTransition(q1, 'a', q4); // insertions
+    res.addTransition(q1, 'c', q4);
+    res.addTransition(q1, 'd', q4);
+
+    res.addTransition(q2, 'a', q5);
+    res.addTransition(q2, 'b', q5);
+    res.addTransition(q2, 'd', q5);
+
+    res.addTransition(q5, 'a', q7);
+    res.addTransition(q5, 'b', q7);
+    res.addTransition(q5, 'd', q7);
 
     CPPUNIT_ASSERT(resulting_automata == res);
 }
-- 
GitLab