From 9489b6a42c40abfeee4af25ef5db84941da45a48 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Fri, 24 Jan 2020 10:36:27 +0200
Subject: [PATCH] Quantum Leap string pattern matching algorithm

---
 .../QuickSearchBadCharacterShiftTable.cpp     | 15 ----
 .../properties/QuickSearchShiftTable.cpp      | 15 ++++
 ...erShiftTable.h => QuickSearchShiftTable.h} | 14 ++--
 .../ReversedQuickSearchShiftTable.cpp         | 15 ++++
 .../ReversedQuickSearchShiftTable.h           | 55 ++++++++++++++
 .../QuantumLeapUsingQuickSearchShift.cpp      | 15 ++++
 .../exact/QuantumLeapUsingQuickSearchShift.h  | 74 +++++++++++++++++++
 alib2algo/src/stringology/exact/QuickSearch.h |  4 +-
 .../test-src/tests/exactMatching.cpp          |  2 +
 9 files changed, 185 insertions(+), 24 deletions(-)
 delete mode 100644 alib2algo/src/string/properties/QuickSearchBadCharacterShiftTable.cpp
 create mode 100644 alib2algo/src/string/properties/QuickSearchShiftTable.cpp
 rename alib2algo/src/string/properties/{QuickSearchBadCharacterShiftTable.h => QuickSearchShiftTable.h} (66%)
 create mode 100644 alib2algo/src/string/properties/ReversedQuickSearchShiftTable.cpp
 create mode 100644 alib2algo/src/string/properties/ReversedQuickSearchShiftTable.h
 create mode 100644 alib2algo/src/stringology/exact/QuantumLeapUsingQuickSearchShift.cpp
 create mode 100644 alib2algo/src/stringology/exact/QuantumLeapUsingQuickSearchShift.h

diff --git a/alib2algo/src/string/properties/QuickSearchBadCharacterShiftTable.cpp b/alib2algo/src/string/properties/QuickSearchBadCharacterShiftTable.cpp
deleted file mode 100644
index cf39042760..0000000000
--- a/alib2algo/src/string/properties/QuickSearchBadCharacterShiftTable.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * QuickSearchBadCharacterShiftTable.cpp
- *
- *  Created on: 23. 2. 2018
- *	  Author: Michal Cvach
- */
-
-#include "QuickSearchBadCharacterShiftTable.h"
-#include <registration/AlgoRegistration.hpp>
-
-namespace {
-
-auto QuickSearchBadCharacterShiftTableLinearString = registration::AbstractRegister < string::properties::QuickSearchBadCharacterShiftTable, ext::map < DefaultSymbolType, size_t >, const string::LinearString < > & > ( string::properties::QuickSearchBadCharacterShiftTable::qsbcs );
-
-} /* namespace */
diff --git a/alib2algo/src/string/properties/QuickSearchShiftTable.cpp b/alib2algo/src/string/properties/QuickSearchShiftTable.cpp
new file mode 100644
index 0000000000..079eaa4ebe
--- /dev/null
+++ b/alib2algo/src/string/properties/QuickSearchShiftTable.cpp
@@ -0,0 +1,15 @@
+/*
+ * QuickSearchShiftTable.cpp
+ *
+ *  Created on: 23. 2. 2018
+ *	  Author: Michal Cvach
+ */
+
+#include "QuickSearchShiftTable.h"
+#include <registration/AlgoRegistration.hpp>
+
+namespace {
+
+auto QuickSearchShiftTableLinearString = registration::AbstractRegister < string::properties::QuickSearchShiftTable, ext::map < DefaultSymbolType, size_t >, const string::LinearString < > & > ( string::properties::QuickSearchShiftTable::qss );
+
+} /* namespace */
diff --git a/alib2algo/src/string/properties/QuickSearchBadCharacterShiftTable.h b/alib2algo/src/string/properties/QuickSearchShiftTable.h
similarity index 66%
rename from alib2algo/src/string/properties/QuickSearchBadCharacterShiftTable.h
rename to alib2algo/src/string/properties/QuickSearchShiftTable.h
index cba6760ab4..0e186613d8 100644
--- a/alib2algo/src/string/properties/QuickSearchBadCharacterShiftTable.h
+++ b/alib2algo/src/string/properties/QuickSearchShiftTable.h
@@ -1,12 +1,12 @@
 /*
- * QuickSearchBadCharacterShiftTable.h
+ * QuickSearchShiftTable.h
  *
  *  Created on: 23. 2. 2018
  *	  Author: Michal Cvach
  */
 
-#ifndef _STRINGOLOGY_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_
-#define _STRINGOLOGY_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_
+#ifndef _STRINGOLOGY_QUICK_SEARCH_SHIFT_TABLE_H_
+#define _STRINGOLOGY_QUICK_SEARCH_SHIFT_TABLE_H_
 
 #include <set>
 #include <map>
@@ -20,19 +20,19 @@ namespace properties {
 /**
 * Computation of BCS table for the QuickSearch algorithm, as presented in the Daniel M. Sunday article.
 */
-class QuickSearchBadCharacterShiftTable {
+class QuickSearchShiftTable {
 public:
 	/**
 	 * Creates a bad character shift table which can be later used for the QuickSearch algorithm.
 	 * @return the BCS table in form of a map where key is the character from an alphabet and value is the shift.
 	 */
 	template < class SymbolType >
-	static ext::map < SymbolType, size_t > qsbcs ( const string::LinearString < SymbolType > & pattern );
+	static ext::map < SymbolType, size_t > qss ( const string::LinearString < SymbolType > & pattern );
 
 };
 
 template < class SymbolType >
-ext::map<SymbolType, size_t> QuickSearchBadCharacterShiftTable::qsbcs(const string::LinearString < SymbolType >& pattern) {
+ext::map<SymbolType, size_t> QuickSearchShiftTable::qss(const string::LinearString < SymbolType >& pattern) {
 	ext::map<SymbolType, size_t> bcs;
 
 	/* Initialization of BCS. */
@@ -50,4 +50,4 @@ ext::map<SymbolType, size_t> QuickSearchBadCharacterShiftTable::qsbcs(const stri
 
 } /* namespace string */
 
-#endif /* _STRINGOLOGY_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_ */
+#endif /* _STRINGOLOGY_QUICK_SEARCH_SHIFT_TABLE_H_ */
diff --git a/alib2algo/src/string/properties/ReversedQuickSearchShiftTable.cpp b/alib2algo/src/string/properties/ReversedQuickSearchShiftTable.cpp
new file mode 100644
index 0000000000..9a99278fc8
--- /dev/null
+++ b/alib2algo/src/string/properties/ReversedQuickSearchShiftTable.cpp
@@ -0,0 +1,15 @@
+/*
+ * ReversedQuickSearchShiftTable.cpp
+ *
+ *  Created on: 24. 1. 2020
+ *      Author: Jan Travnicek
+ */
+
+#include "ReversedQuickSearchShiftTable.h"
+#include <registration/AlgoRegistration.hpp>
+
+namespace {
+
+auto ReversedQuickSearchShiftTableLinearString = registration::AbstractRegister < string::properties::ReversedQuickSearchShiftTable, ext::map < DefaultSymbolType, size_t >, const string::LinearString < > & > ( string::properties::ReversedQuickSearchShiftTable::rqss );
+
+} /* namespace */
diff --git a/alib2algo/src/string/properties/ReversedQuickSearchShiftTable.h b/alib2algo/src/string/properties/ReversedQuickSearchShiftTable.h
new file mode 100644
index 0000000000..cd28cfe4f6
--- /dev/null
+++ b/alib2algo/src/string/properties/ReversedQuickSearchShiftTable.h
@@ -0,0 +1,55 @@
+/*
+ * ReversedQuickSearchShiftTable.h
+ *
+ *  Created on: 24. 1. 2020
+ *      Author: Jan Travnicek
+ */
+
+#ifndef _STRINGOLOGY_REVERSED_QUICK_SEARCH_SHIFT_TABLE_H_
+#define _STRINGOLOGY_REVERSED_QUICK_SEARCH_SHIFT_TABLE_H_
+
+#include <alib/set>
+#include <alib/map>
+
+#include <string/LinearString.h>
+
+namespace string {
+
+namespace properties {
+
+/**
+ * Computation of BCS table for BMH from MI(E+\eps)-EVY course 2014
+ * To get rid of zeros in BCS table we ignore last haystack character
+ */
+class ReversedQuickSearchShiftTable {
+public:
+	/**
+	 * Search for pattern in linear string.
+	 * @return set set of occurences
+	 */
+	template < class SymbolType >
+	static ext::map < SymbolType, size_t > rqss ( const string::LinearString < SymbolType > & pattern );
+
+};
+
+template < class SymbolType >
+ext::map < SymbolType, size_t > ReversedQuickSearchShiftTable::rqss ( const string::LinearString < SymbolType > & pattern ) {
+	const ext::set < SymbolType > & alphabet = pattern.getAlphabet ( );
+	ext::map < SymbolType, size_t > bcs;
+
+	 /* Initialization of BCS to the length of the needle. */
+	for ( const auto & symbol : alphabet )
+		bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) + 1 ) );
+
+	 /* Filling out BCS, ignoring first character. */
+	for ( ssize_t i = pattern.getContent ( ).size ( ) - 1; i >= 0; i-- )
+		bcs[pattern.getContent ( ).at ( i )] = i + 1;
+
+	return bcs;
+}
+
+} /* namespace properties */
+
+} /* namespace string */
+
+#endif /* _STRINGOLOGY_REVERSED_QUICK_SEARCH_SHIFT_TABLE_H_ */
diff --git a/alib2algo/src/stringology/exact/QuantumLeapUsingQuickSearchShift.cpp b/alib2algo/src/stringology/exact/QuantumLeapUsingQuickSearchShift.cpp
new file mode 100644
index 0000000000..2982f6c378
--- /dev/null
+++ b/alib2algo/src/stringology/exact/QuantumLeapUsingQuickSearchShift.cpp
@@ -0,0 +1,15 @@
+/*
+ * QuantumLeapUsingQuickSearchShift.cpp
+ *
+ *  Created on: 24. 1. 2020
+ *      Author: Jan Travnicek
+ */
+
+#include "QuantumLeapUsingQuickSearchShift.h"
+#include <registration/AlgoRegistration.hpp>
+
+namespace {
+
+auto QuantumLeapUsingQuickSearchShiftLinearStringLinearString = registration::AbstractRegister < stringology::exact::QuantumLeapUsingQuickSearchShift, ext::set < unsigned >, const string::LinearString < > &, const string::LinearString < > &, size_t > ( stringology::exact::QuantumLeapUsingQuickSearchShift::match );
+
+} /* namespace */
diff --git a/alib2algo/src/stringology/exact/QuantumLeapUsingQuickSearchShift.h b/alib2algo/src/stringology/exact/QuantumLeapUsingQuickSearchShift.h
new file mode 100644
index 0000000000..cce556d466
--- /dev/null
+++ b/alib2algo/src/stringology/exact/QuantumLeapUsingQuickSearchShift.h
@@ -0,0 +1,74 @@
+/*
+ * QuantumLeapUsingQuickSearchShift.h
+ *
+ *  Created on: 24. 1. 2020
+ *      Author: Jan Travnicek
+ */
+
+#ifndef _QUICK_SEARCH_USING_QUICK_SEARCH_SHIFT_H_
+#define _QUICK_SEARCH_USING_QUICK_SEARCH_SHIFT_H_
+
+#include <alib/set>
+#include <alib/map>
+
+#include <string/LinearString.h>
+
+#include <string/properties/QuickSearchShiftTable.h>
+#include <string/properties/ReversedQuickSearchShiftTable.h>
+
+namespace stringology {
+
+namespace exact {
+
+/**
+ * Implementation of DeadZone matching using bcs as shifting method to both directions
+ */
+class QuantumLeapUsingQuickSearchShift {
+public:
+	/**
+	 * Search for pattern in linear string.
+	 * @return set set of occurences
+	 */
+	template < class SymbolType >
+	static ext::set < unsigned > match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, size_t z );
+};
+
+template < class SymbolType >
+ext::set < unsigned > QuantumLeapUsingQuickSearchShift::match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, size_t z ) {
+	ext::set < unsigned > occ;
+	ext::map < SymbolType, size_t > fqss = string::properties::QuickSearchShiftTable::qss ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
+	ext::map < SymbolType, size_t > bqss = string::properties::ReversedQuickSearchShiftTable::rqss ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
+	for ( const SymbolType & symbol : pattern.getAlphabet ( ) ) {
+		bqss [ symbol ] = z - bqss [ symbol ];
+	}
+
+	size_t haystack_offset = 0;
+
+	while ( haystack_offset + pattern.getContent ( ).size ( ) <= string.getContent ( ).size ( ) ) {
+		size_t i = 0;
+		while ( i < pattern.getContent ( ).size ( ) && string.getContent ( ) [ haystack_offset + i ] == pattern.getContent ( ) [ i ] )
+			i ++;
+
+		 // Yay, there is match!!!
+		if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( haystack_offset );
+
+		if ( haystack_offset + pattern.getContent().size() == string.getContent().size() ) { // this is needed only because there is no terminating character in the string ...
+			break; // Here we don't do any more shifts if the pattern is already aligned at the utter end of the text
+		}
+
+		size_t shf = fqss [ string.getContent ( ) [ haystack_offset + pattern.getContent ( ).size ( ) ] ];
+
+		size_t shb = z;
+		if ( haystack_offset + z - 1 < string.getContent ( ).size ( ) ) // this condition is needed because at worst MAX ( z - m, 0 ) additional characters are needed in the subject after its end
+			shb = bqss [ string.getContent ( ) [ haystack_offset + z - 1 ] ];
+		haystack_offset += shf > shb ? z : shf;
+	}
+
+	return occ;
+}
+
+} /* namespace exact */
+
+} /* namespace stringology */
+
+#endif /* _QUICK_SEARCH_USING_QUICK_SEARCH_SHIFT_H_ */
diff --git a/alib2algo/src/stringology/exact/QuickSearch.h b/alib2algo/src/stringology/exact/QuickSearch.h
index af47eb9305..32e10365ea 100644
--- a/alib2algo/src/stringology/exact/QuickSearch.h
+++ b/alib2algo/src/stringology/exact/QuickSearch.h
@@ -14,7 +14,7 @@
 
 #include <string/LinearString.h>
 
-#include <string/properties/QuickSearchBadCharacterShiftTable.h>
+#include <string/properties/QuickSearchShiftTable.h>
 
 #include <global/GlobalData.h>
 
@@ -41,7 +41,7 @@ ext::set<unsigned> QuickSearch::match(const string::LinearString < SymbolType >&
 	ext::set<unsigned> occ;
 
 	measurements::start ( "Preprocess", measurements::Type::PREPROCESS );
-	ext::map<SymbolType, size_t> bcs = string::properties::QuickSearchBadCharacterShiftTable::qsbcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern
+	ext::map<SymbolType, size_t> bcs = string::properties::QuickSearchShiftTable::qss(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern
 	measurements::end ( );
 
 	if(common::GlobalData::verbose)
diff --git a/alib2integrationtest/test-src/tests/exactMatching.cpp b/alib2integrationtest/test-src/tests/exactMatching.cpp
index 8266d4a16e..9f1421d952 100644
--- a/alib2integrationtest/test-src/tests/exactMatching.cpp
+++ b/alib2integrationtest/test-src/tests/exactMatching.cpp
@@ -1,5 +1,6 @@
 #include <catch2/catch.hpp>
 #include <alib/vector>
+#include <alib/string>
 
 #include "testing/TimeoutAqlTest.hpp"
 #include "testing/TestFiles.hpp"
@@ -31,6 +32,7 @@ TEST_CASE ( "ExactMatching", "[integration]" ) {
 			std::make_tuple ( "Exact Reversed Boyer Moore Horspool", " stringology::exact::ReversedBoyerMooreHorspool $subject $pattern", true ),
 			std::make_tuple ( "Quick Search", "stringology::exact::QuickSearch $subject $pattern", true ),
 			std::make_tuple ( "Exact Dead Zone Using Bad Character Shift", "stringology::exact::DeadZoneUsingBadCharacterShift $subject $pattern", true ),
+			std::make_tuple ( "Exact Quantum Leap Using Quick Search Shift", "stringology::exact::QuantumLeapUsingQuickSearchShift $subject $pattern " + ext::to_string ( 2 * PATTERN_SIZE ), true ),
 			std::make_tuple ( "Exact Matching Automaton", "automaton::run::Occurrences <(stringology::matching::ExactMatchingAutomaton $pattern | automaton::determinize::Determinize -) $subject", true ),
 			std::make_tuple ( "DAWG Factors", "stringology::indexing::ExactSuffixAutomaton $subject | stringology::query::SuffixAutomatonFactors - $pattern", false ),
 			std::make_tuple ( "BNDM Matcher", "stringology::matching::BNDMMatcherConstruction $pattern | stringology::query::BNDMOccurrences - $subject", false ),
-- 
GitLab