From d44964c58090c802f5e3df230459ab977f8aa931 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Tue, 6 Dec 2016 10:03:12 +0100
Subject: [PATCH] template some stringology algorithms

---
 .../generate/RandomSubstringFactory.cpp       | 19 ---------
 .../string/generate/RandomSubstringFactory.h  | 21 +++++++++-
 .../stringology/exact/BoyerMooreHorspool.cpp  | 32 ---------------
 .../stringology/exact/BoyerMooreHorspool.h    | 36 +++++++++++++++-
 .../exact/DeadZoneUsingBadCharacterShift.cpp  | 32 ---------------
 .../exact/DeadZoneUsingBadCharacterShift.h    | 41 +++++++++++++++++--
 .../stringology/exact/ExactFactorMatch.cpp    | 19 ---------
 .../src/stringology/exact/ExactFactorMatch.h  | 26 +++++++++++-
 .../exact/ReversedBoyerMooreHorspool.cpp      | 29 -------------
 .../exact/ReversedBoyerMooreHorspool.h        | 33 ++++++++++++++-
 10 files changed, 149 insertions(+), 139 deletions(-)

diff --git a/alib2algo/src/string/generate/RandomSubstringFactory.cpp b/alib2algo/src/string/generate/RandomSubstringFactory.cpp
index 2f945b3da6..f624a03a54 100644
--- a/alib2algo/src/string/generate/RandomSubstringFactory.cpp
+++ b/alib2algo/src/string/generate/RandomSubstringFactory.cpp
@@ -7,12 +7,6 @@
 
 #include "RandomSubstringFactory.h"
 
-#include <algorithm>
-#include <random>
-#include <exception/CommonException.h>
-
-#include <string/LinearString.h>
-
 namespace string {
 
 namespace generate {
@@ -21,19 +15,6 @@ string::String RandomSubstringFactory::generateSubstring ( size_t size, const st
 	return dispatch ( size, v.getData ( ) );
 }
 
-string::LinearString < > RandomSubstringFactory::generateSubstring ( size_t size, const string::LinearString < > & string ) {
-	if ( size > string.getContent ( ).size ( ) )
-		throw exception::CommonException ( "String not long enough" );
-
-	size_t begin = std::random_devices::semirandom ( ) % ( string.getContent ( ).size ( ) - size + 1 );
-
-	std::vector < alphabet::Symbol > data ( string.getContent ( ).begin ( ) + begin, string.getContent ( ).begin ( ) + begin + size );
-
-	return LinearString < > {
-			   string.getAlphabet ( ), data
-	};
-}
-
 auto RandomSubstringFactoryLinearString = RandomSubstringFactory::RegistratorWrapper < string::LinearString < >, string::LinearString < > > ( RandomSubstringFactory::generateSubstring );
 
 } /* namespace generate */
diff --git a/alib2algo/src/string/generate/RandomSubstringFactory.h b/alib2algo/src/string/generate/RandomSubstringFactory.h
index 584b321722..e1ac8c0f58 100644
--- a/alib2algo/src/string/generate/RandomSubstringFactory.h
+++ b/alib2algo/src/string/generate/RandomSubstringFactory.h
@@ -12,6 +12,12 @@
 #include <string/String.h>
 #include <string/StringFeatures.h>
 
+#include <algorithm>
+#include <random>
+#include <exception/CommonException.h>
+
+#include <string/LinearString.h>
+
 namespace string {
 
 namespace generate {
@@ -20,10 +26,23 @@ class RandomSubstringFactory : public std::SingleDispatchFirstStaticParam < Rand
 public:
 	static string::String generateSubstring ( size_t size, const string::String & );
 
-	static string::LinearString < > generateSubstring ( size_t size, const string::LinearString < > & );
+	template < class SymbolType >
+	static string::LinearString < SymbolType > generateSubstring ( size_t size, const string::LinearString < SymbolType > & );
 
 };
 
+template < class SymbolType >
+string::LinearString < SymbolType > RandomSubstringFactory::generateSubstring ( size_t size, const string::LinearString < SymbolType > & string ) {
+	if ( size > string.getContent ( ).size ( ) )
+		throw exception::CommonException ( "String not long enough" );
+
+	size_t begin = std::random_devices::semirandom ( ) % ( string.getContent ( ).size ( ) - size + 1 );
+
+	std::vector < SymbolType > data ( string.getContent ( ).begin ( ) + begin, string.getContent ( ).begin ( ) + begin + size );
+
+	return LinearString < SymbolType > ( string.getAlphabet ( ), data );
+}
+
 } /* namespace generate */
 
 } /* namespace string */
diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp
index 56831ebd76..d0107b4f9d 100644
--- a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp
+++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp
@@ -6,13 +6,6 @@
  */
 
 #include "BoyerMooreHorspool.h"
-#include <string/properties/BadCharacterShiftTable.h>
-
-#include <string/LinearString.h>
-#include <alphabet/Symbol.h>
-
-#include <map>
-#include <measure>
 
 namespace stringology {
 
@@ -22,31 +15,6 @@ std::set<unsigned> BoyerMooreHorspool::match(const string::String& subject, cons
 	return dispatch(subject.getData(), pattern.getData());
 }
 
-std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString < >& string, const string::LinearString < >& pattern) {
-	std::set<unsigned> occ;
-
-	measurements::start ( "Preprocess", measurements::Type::PREPROCESS );
-	std::map<alphabet::Symbol, size_t> bcs = string::properties::BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern
-	measurements::end ( );
-
-	measurements::start ( "Algorithm", measurements::Type::ALGORITHM );
-	size_t haystack_offset = 0;
-	while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) {
-		size_t i = pattern.getContent().size();
-		while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) {
-			i--;
-		}
-
-		// Yay, there is match!!!
-		if(i == 0) occ.insert(haystack_offset);
-		haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]];
-		//std::cout << haystack_offset << std::endl;
-	}
-	measurements::end ( );
-
-	return occ;
-}
-
 auto BoyerMooreHorpoolLinearStringLinearString = BoyerMooreHorspool::RegistratorWrapper<std::set<unsigned>, string::LinearString < >, string::LinearString < >>(BoyerMooreHorspool::match);
 
 } /* namespace exact */
diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.h b/alib2algo/src/stringology/exact/BoyerMooreHorspool.h
index 5cbe934193..e147d26fcf 100644
--- a/alib2algo/src/stringology/exact/BoyerMooreHorspool.h
+++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.h
@@ -9,9 +9,16 @@
 #define _STRINGOLOGY_BOYER_MOORE_HORSPOOL_H_
 
 #include <set>
+#include <map>
+#include <measure>
+
 #include <core/multipleDispatch.hpp>
+
 #include <string/String.h>
 #include <string/StringFeatures.h>
+#include <string/LinearString.h>
+
+#include <string/properties/BadCharacterShiftTable.h>
 
 namespace stringology {
 
@@ -29,10 +36,37 @@ public:
 	 */
 	static std::set < unsigned > match ( const string::String & subject, const string::String & pattern );
 
-	static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern );
+	template < class SymbolType >
+	static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern );
 
 };
 
+template < class SymbolType >
+std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString < SymbolType >& string, const string::LinearString < SymbolType >& pattern) {
+	std::set<unsigned> occ;
+
+	measurements::start ( "Preprocess", measurements::Type::PREPROCESS );
+	std::map<SymbolType, size_t> bcs = string::properties::BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern
+	measurements::end ( );
+
+	measurements::start ( "Algorithm", measurements::Type::ALGORITHM );
+	size_t haystack_offset = 0;
+	while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) {
+		size_t i = pattern.getContent().size();
+		while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) {
+			i--;
+		}
+
+		// Yay, there is match!!!
+		if(i == 0) occ.insert(haystack_offset);
+		haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]];
+		//std::cout << haystack_offset << std::endl;
+	}
+	measurements::end ( );
+
+	return occ;
+}
+
 } /* namespace exact */
 
 } /* namespace stringology */
diff --git a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp
index 831366295e..5006c77bc4 100644
--- a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp
+++ b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp
@@ -6,13 +6,6 @@
  */
 
 #include "DeadZoneUsingBadCharacterShift.h"
-#include <string/properties/BadCharacterShiftTable.h>
-#include <string/properties/ReversedBadCharacterShiftTable.h>
-
-#include <string/LinearString.h>
-#include <alphabet/Symbol.h>
-
-#include <map>
 
 namespace stringology {
 
@@ -22,31 +15,6 @@ std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::Stri
 	return dispatch ( subject.getData ( ), pattern.getData ( ) );
 }
 
-std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::LinearString < > & string, const string::LinearString < > & pattern ) {
-	std::set < unsigned > occ;
-	std::map < alphabet::Symbol, size_t > fbcs = string::properties::BadCharacterShiftTable::bcs ( pattern );     // NOTE: the subjects alphabet must be a subset or equal to the pattern
-	std::map < alphabet::Symbol, size_t > bbcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
-
-	match_rec ( occ, string, pattern, fbcs, bbcs, 0, string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
-	return occ;
-}
-
-void DeadZoneUsingBadCharacterShift::match_rec ( std::set < unsigned > & occ, const string::LinearString < > & string, const string::LinearString < > & pattern, std::map < alphabet::Symbol, size_t > & fbcs, std::map < alphabet::Symbol, size_t > & bbcs, int low, int high ) {
-	if ( low >= high ) return;
-
-	int middle = ( low + high ) / 2;
-	size_t i = 0;
-
-	while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[middle + i] == pattern.getContent ( )[i] )
-		i++;
-
-	 // Yay, there is match!!!
-	if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( middle );
-
-	match_rec ( occ, string, pattern, fbcs, bbcs, low, middle - bbcs[string.getContent ( )[middle]] + 1 );
-	match_rec ( occ, string, pattern, fbcs, bbcs, middle + fbcs[string.getContent ( )[middle + pattern.getContent ( ).size ( ) - 1]], high );
-}
-
 auto DeadZoneUsingBadCharacterShiftLinearStringLinearString = DeadZoneUsingBadCharacterShift::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( DeadZoneUsingBadCharacterShift::match );
 
 } /* namespace exact */
diff --git a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h
index 18136315af..694f495805 100644
--- a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h
+++ b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h
@@ -9,9 +9,15 @@
 #define _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_H_
 
 #include <set>
+#include <map>
+
 #include <core/multipleDispatch.hpp>
 #include <string/String.h>
 #include <string/StringFeatures.h>
+#include <string/LinearString.h>
+
+#include <string/properties/BadCharacterShiftTable.h>
+#include <string/properties/ReversedBadCharacterShiftTable.h>
 
 namespace stringology {
 
@@ -21,6 +27,9 @@ namespace exact {
  * Implementation of DeadZone matching using bcs as shifting method to both directions
  */
 class DeadZoneUsingBadCharacterShift : public std::DoubleDispatch < DeadZoneUsingBadCharacterShift, std::set < unsigned >, const string::StringBase &, const string::StringBase & > {
+	template < class SymbolType >
+	static void match_rec ( std::set < unsigned > & occ, const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, std::map < SymbolType, size_t > & fbcs, std::map < SymbolType, size_t > & bbcs, int low, int high );
+
 public:
 	/**
 	 * Search for pattern in linear string.
@@ -28,11 +37,37 @@ public:
 	 */
 	static std::set < unsigned > match ( const string::String & subject, const string::String & pattern );
 
-	static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern );
-	static void match_rec ( std::set < unsigned > & occ, const string::LinearString < > & string, const string::LinearString < > & pattern, std::map < alphabet::Symbol, size_t > & fbcs, std::map < alphabet::Symbol, size_t > & bbcs, int low, int high );
-
+	template < class SymbolType >
+	static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern );
 };
 
+template < class SymbolType >
+std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern ) {
+	std::set < unsigned > occ;
+	std::map < SymbolType, size_t > fbcs = string::properties::BadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
+	std::map < SymbolType, size_t > bbcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
+
+	match_rec ( occ, string, pattern, fbcs, bbcs, 0, string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
+	return occ;
+}
+
+template < class SymbolType >
+void DeadZoneUsingBadCharacterShift::match_rec ( std::set < unsigned > & occ, const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, std::map < SymbolType, size_t > & fbcs, std::map < SymbolType, size_t > & bbcs, int low, int high ) {
+	if ( low >= high ) return;
+
+	int middle = ( low + high ) / 2;
+	size_t i = 0;
+
+	while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[middle + i] == pattern.getContent ( )[i] )
+		i++;
+
+	 // Yay, there is match!!!
+	if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( middle );
+
+	match_rec ( occ, string, pattern, fbcs, bbcs, low, middle - bbcs[string.getContent ( )[middle]] + 1 );
+	match_rec ( occ, string, pattern, fbcs, bbcs, middle + fbcs[string.getContent ( )[middle + pattern.getContent ( ).size ( ) - 1]], high );
+}
+
 } /* namespace exact */
 
 } /* namespace stringology */
diff --git a/alib2algo/src/stringology/exact/ExactFactorMatch.cpp b/alib2algo/src/stringology/exact/ExactFactorMatch.cpp
index 1afd9953be..3ad442f933 100644
--- a/alib2algo/src/stringology/exact/ExactFactorMatch.cpp
+++ b/alib2algo/src/stringology/exact/ExactFactorMatch.cpp
@@ -6,9 +6,6 @@
  */
 
 #include "ExactFactorMatch.h"
-#include <string/LinearString.h>
-
-#include <deque>
 
 namespace stringology {
 
@@ -18,22 +15,6 @@ std::set < unsigned > ExactFactorMatch::match ( const string::String & subject,
 	return dispatch ( subject.getData ( ), pattern.getData ( ) );
 }
 
-std::set < unsigned > ExactFactorMatch::match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) {
-	std::set < unsigned > occ;
-
-	for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) {
-		unsigned j = 0;
-
-		for ( ; j < pattern.getContent ( ).size ( ); j++ )
-			if ( pattern.getContent ( )[j] != subject.getContent ( )[i + j] ) break;
-
-		if ( j == pattern.getContent ( ).size ( ) )
-			occ.insert ( i );
-	}
-
-	return occ;
-}
-
 auto ExactFactorMatchLinearString = ExactFactorMatch::RegistratorWrapper < std::set < unsigned >, string::LinearString < > > ( ExactFactorMatch::match );
 
 } /* namespace exact */
diff --git a/alib2algo/src/stringology/exact/ExactFactorMatch.h b/alib2algo/src/stringology/exact/ExactFactorMatch.h
index 2818f2e4b9..3e7a7401e3 100644
--- a/alib2algo/src/stringology/exact/ExactFactorMatch.h
+++ b/alib2algo/src/stringology/exact/ExactFactorMatch.h
@@ -8,10 +8,14 @@
 #ifndef _EXACT_MATCH_H__
 #define _EXACT_MATCH_H__
 
-#include <string/String.h>
 #include <set>
+#include <deque>
+
 #include <core/multipleDispatch.hpp>
+
+#include <string/String.h>
 #include <string/StringFeatures.h>
+#include <string/LinearString.h>
 
 namespace stringology {
 
@@ -25,9 +29,27 @@ public:
 	 */
 	static std::set<unsigned> match(const string::String& subject, const string::String& pattern);
 
-	static std::set<unsigned> match(const string::LinearString < >& subject, const string::LinearString < >& pattern);
+	template < class SymbolType >
+	static std::set<unsigned> match(const string::LinearString < SymbolType >& subject, const string::LinearString < SymbolType >& pattern);
 };
 
+template < class SymbolType >
+std::set < unsigned > ExactFactorMatch::match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ) {
+	std::set < unsigned > occ;
+
+	for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) {
+		unsigned j = 0;
+
+		for ( ; j < pattern.getContent ( ).size ( ); j++ )
+			if ( pattern.getContent ( )[j] != subject.getContent ( )[i + j] ) break;
+
+		if ( j == pattern.getContent ( ).size ( ) )
+			occ.insert ( i );
+	}
+
+	return occ;
+}
+
 } /* namespace exact */
 
 } /* namespace stringology */
diff --git a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp
index fc189d7cad..6b6d1d8804 100644
--- a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp
+++ b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp
@@ -6,12 +6,6 @@
  */
 
 #include "ReversedBoyerMooreHorspool.h"
-#include <string/properties/ReversedBadCharacterShiftTable.h>
-
-#include <string/LinearString.h>
-#include <alphabet/Symbol.h>
-
-#include <map>
 
 namespace stringology {
 
@@ -21,29 +15,6 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::String &
 	return dispatch ( subject.getData ( ), pattern.getData ( ) );
 }
 
-std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::LinearString < > & string, const string::LinearString < > & pattern ) {
-	std::set < unsigned > occ;
-	std::map < alphabet::Symbol, size_t > bcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
-
-	int haystack_offset = string.getContent ( ).size ( ) - pattern.getContent ( ).size ( );
-
-	while ( haystack_offset >= 0 ) {
-		size_t i = 0;
-
-		while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[haystack_offset + i] == pattern.getContent ( )[i] )
-			i++;
-
-		 // Yay, there is match!!!
-		if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( haystack_offset );
-
-		haystack_offset -= bcs[string.getContent ( )[haystack_offset]];
-
-		// std::cout << haystack_offset << std::endl;
-	}
-
-	return occ;
-}
-
 auto ReversedBoyerMooreHorpoolLinearStringLinearString = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( ReversedBoyerMooreHorspool::match );
 
 } /* namespace exact */
diff --git a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h
index 207b4d7864..803ab30fcd 100644
--- a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h
+++ b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h
@@ -9,9 +9,15 @@
 #define _STRINGOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_
 
 #include <set>
+#include <map>
+
 #include <core/multipleDispatch.hpp>
+
 #include <string/String.h>
 #include <string/StringFeatures.h>
+#include <string/LinearString.h>
+
+#include <string/properties/ReversedBadCharacterShiftTable.h>
 
 namespace stringology {
 
@@ -29,10 +35,35 @@ public:
 	 */
 	static std::set < unsigned > match ( const string::String & subject, const string::String & pattern );
 
-	static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern );
+	template < class SymbolType >
+	static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern );
 
 };
 
+template < class SymbolType >
+std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern ) {
+	std::set < unsigned > occ;
+	std::map < SymbolType, size_t > bcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
+
+	int haystack_offset = string.getContent ( ).size ( ) - pattern.getContent ( ).size ( );
+
+	while ( haystack_offset >= 0 ) {
+		size_t i = 0;
+
+		while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[haystack_offset + i] == pattern.getContent ( )[i] )
+			i++;
+
+		 // Yay, there is match!!!
+		if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( haystack_offset );
+
+		haystack_offset -= bcs[string.getContent ( )[haystack_offset]];
+
+		// std::cout << haystack_offset << std::endl;
+	}
+
+	return occ;
+}
+
 } /* namespace exact */
 
 } /* namespace stringology */
-- 
GitLab