From 1c08550da5b10f956db4932e839d1b4ae7dffb9d Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Fri, 19 Jun 2020 18:52:15 +0200
Subject: [PATCH] Quantum leap stringology algo

---
 ...ersedQuickSearchBadCharacterShiftTable.cpp | 15 +++
 ...eversedQuickSearchBadCharacterShiftTable.h | 53 ++++++++++
 .../src/stringology/exact/QuantumLeap.cpp     | 15 +++
 alib2algo/src/stringology/exact/QuantumLeap.h | 96 +++++++++++++++++++
 .../test-src/tests/exactMatching.cpp          |  1 +
 5 files changed, 180 insertions(+)
 create mode 100644 alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.cpp
 create mode 100644 alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.h
 create mode 100644 alib2algo/src/stringology/exact/QuantumLeap.cpp
 create mode 100644 alib2algo/src/stringology/exact/QuantumLeap.h

diff --git a/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.cpp b/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.cpp
new file mode 100644
index 0000000000..13888b8f77
--- /dev/null
+++ b/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.cpp
@@ -0,0 +1,15 @@
+/*
+ * ReversedQuickSearchBadCharacterShiftTable.cpp
+ *
+ *  Created on: 23. 2. 2018
+ *	  Author: Michal Cvach
+ */
+
+#include "ReversedQuickSearchBadCharacterShiftTable.h"
+#include <registration/AlgoRegistration.hpp>
+
+namespace {
+
+auto ReversedQuickSearchBadCharacterShiftTableLinearString = registration::AbstractRegister < string::properties::ReversedQuickSearchBadCharacterShiftTable, ext::map < DefaultSymbolType, size_t >, const string::LinearString < > & > ( string::properties::ReversedQuickSearchBadCharacterShiftTable::qsbcs );
+
+} /* namespace */
diff --git a/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.h b/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.h
new file mode 100644
index 0000000000..977cdf35e0
--- /dev/null
+++ b/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.h
@@ -0,0 +1,53 @@
+/*
+ * ReversedQuickSearchBadCharacterShiftTable.h
+ *
+ *  Created on: 23. 2. 2018
+ *	  Author: Michal Cvach
+ */
+
+#ifndef _STRINGOLOGY_REVERSED_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_
+#define _STRINGOLOGY_REVERSED_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_
+
+#include <set>
+#include <map>
+
+#include <string/LinearString.h>
+
+namespace string {
+
+namespace properties {
+
+/**
+* Computation of BCS table for the QuickSearch algorithm, as presented in the Daniel M. Sunday article.
+*/
+class ReversedQuickSearchBadCharacterShiftTable {
+public:
+	/**
+	 * Creates a bad character shift table which can be later used for the QuickSearch algorithm.
+	 * @return the BCS table in form of a map where key is the character from an alphabet and value is the shift.
+	 */
+	template < class SymbolType >
+	static ext::map < SymbolType, size_t > qsbcs ( const string::LinearString < SymbolType > & pattern );
+
+};
+
+template < class SymbolType >
+ext::map<SymbolType, size_t> ReversedQuickSearchBadCharacterShiftTable::qsbcs(const string::LinearString < SymbolType >& pattern) {
+	ext::map<SymbolType, size_t> bcs;
+
+	/* Initialization of BCS. */
+	for(const SymbolType & symbol : pattern.getAlphabet ( ) )
+		bcs.insert(std::make_pair(symbol, pattern.getContent().size() + 1));
+
+	/* Filling out BCS. */
+	for ( ssize_t i = pattern.getContent ( ).size ( ) - 1; i >= 0; -- i )
+		bcs [ pattern.getContent ( ) [ i ] ] = i + 1;
+
+	return bcs;
+}
+
+} /* namespace properties */
+
+} /* namespace string */
+
+#endif /* _STRINGOLOGY_REVERSED_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_ */
diff --git a/alib2algo/src/stringology/exact/QuantumLeap.cpp b/alib2algo/src/stringology/exact/QuantumLeap.cpp
new file mode 100644
index 0000000000..6f03074e6b
--- /dev/null
+++ b/alib2algo/src/stringology/exact/QuantumLeap.cpp
@@ -0,0 +1,15 @@
+/*
+ * QuantumLeap.cpp
+ *
+ *  Created on: 19. 6. 2020
+ *      Author: Jan Travnicek
+ */
+
+#include "QuantumLeap.h"
+#include <registration/AlgoRegistration.hpp>
+
+namespace {
+
+auto QuantumLeapLinearStringLinearString = registration::AbstractRegister < stringology::exact::QuantumLeap, ext::set < unsigned >, const string::LinearString < > &, const string::LinearString < > &, size_t > ( stringology::exact::QuantumLeap::match );
+
+} /* namespace */
diff --git a/alib2algo/src/stringology/exact/QuantumLeap.h b/alib2algo/src/stringology/exact/QuantumLeap.h
new file mode 100644
index 0000000000..ee48ea9f3f
--- /dev/null
+++ b/alib2algo/src/stringology/exact/QuantumLeap.h
@@ -0,0 +1,96 @@
+/*
+ * QuantumLeap.h
+ *
+ *  Created on: 19. 6. 2020
+ *      Author: Jan Travnicek
+ */
+
+#ifndef STRINGOLOGY_QUANTUM_LEAP_H_
+#define STRINGOLOGY_QUANTUM_LEAP_H_
+
+#include <alib/set>
+#include <alib/map>
+#include <alib/measure>
+
+#include <string/LinearString.h>
+#include <alphabet/EndSymbol.h>
+
+#include <string/properties/QuickSearchBadCharacterShiftTable.h>
+#include <string/properties/ReversedQuickSearchBadCharacterShiftTable.h>
+
+#include <global/GlobalData.h>
+
+namespace stringology {
+
+namespace exact {
+
+/**
+ * Implementation of BMH for MI(E+\eps)-EVY course 2014
+ * To get rid of zeros in BCS table we ignore last haystack character
+ */
+class QuantumLeap {
+public:
+	/**
+	 * Search for pattern in linear string.
+	 * @return set set of occurences
+	 */
+	template < class SymbolType >
+	static ext::set < unsigned > match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, size_t );
+
+};
+
+template < class SymbolType >
+ext::set<unsigned> QuantumLeap::match (const string::LinearString < SymbolType >& string, const string::LinearString < SymbolType >& pattern, size_t z) {
+	ext::set<unsigned> occ;
+
+	size_t m = pattern.getContent ( ).size ( );
+	size_t n = string.getContent ( ).size ( );
+
+	const std::vector < SymbolType > & needle = pattern.getContent ( );
+	std::vector < SymbolType > haystack = string.getContent ( );
+
+	/* make sure there are symbols beyond the size of the pattern */
+	for ( size_t j = 0; j < z; ++ j )
+		haystack.push_back ( alphabet::EndSymbol::instance < SymbolType > ( ) );
+
+	measurements::start ( "Preprocess", measurements::Type::PREPROCESS );
+	ext::map<SymbolType, size_t> fbcs = string::properties::QuickSearchBadCharacterShiftTable::qsbcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern
+	ext::map<SymbolType, size_t> bbcs = string::properties::ReversedQuickSearchBadCharacterShiftTable::qsbcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern
+
+	fbcs.insert ( alphabet::EndSymbol::instance < SymbolType > ( ), needle.size ( ) + 1);
+	bbcs.insert ( alphabet::EndSymbol::instance < SymbolType > ( ), needle.size ( ) + 1);
+
+	for ( std::pair < const SymbolType, size_t > & entry : bbcs )
+		entry.second = z - entry.second;
+	measurements::end ( );
+
+	if(common::GlobalData::verbose) {
+		common::Streams::log << "fbcs = " << fbcs << std::endl;
+		common::Streams::log << "bbcs = " << bbcs << std::endl;
+	}
+
+	measurements::start ( "Algorithm", measurements::Type::ALGORITHM );
+	size_t i = 0;
+	while(i + m <= n) {
+		size_t j = 0;
+		while(j < m && haystack[i + j] == needle[j])
+			++ j;
+
+		// Yay, there is match!!!
+		if(j == m) occ.insert(i);
+		size_t sf = fbcs.at ( haystack [ i + m ] );
+		size_t sb = bbcs.at ( haystack [ i + z - 1 ] );
+		i += ( sf <= sb ) ? sf : z;
+
+		//common::Streams::out << i << std::endl;
+	}
+	measurements::end ( );
+
+	return occ;
+}
+
+} /* namespace exact */
+
+} /* namespace stringology */
+
+#endif /* STRINGOLOGY_QUANTUM_LEAP_H_ */
diff --git a/alib2integrationtest/test-src/tests/exactMatching.cpp b/alib2integrationtest/test-src/tests/exactMatching.cpp
index 7e6119a75b..696efd2c95 100644
--- a/alib2integrationtest/test-src/tests/exactMatching.cpp
+++ b/alib2integrationtest/test-src/tests/exactMatching.cpp
@@ -25,6 +25,7 @@ static std::string qGenString ( size_t min_len, size_t max_len, size_t alph_len,
 
 TEST_CASE ( "ExactMatching", "[integration]" ) {
 	auto definition = GENERATE ( as < std::tuple < std::string, std::string, bool > > ( ),
+			std::make_tuple ( "Exact Quantum Leap", "stringology::exact::QuantumLeap $subject $pattern 10", true ),
 			std::make_tuple ( "Exact Boyer Moore", "stringology::exact::BoyerMoore $subject $pattern", true ),
 			std::make_tuple ( "Exact Knuth Morris Pratt", "stringology::exact::KnuthMorrisPratt $subject $pattern", false ),
 			std::make_tuple ( "Exact Boyer Moore Horspool", " stringology::exact::BoyerMooreHorspool $subject $pattern", true ),
-- 
GitLab