From 1c08550da5b10f956db4932e839d1b4ae7dffb9d Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Fri, 19 Jun 2020 18:52:15 +0200 Subject: [PATCH] Quantum leap stringology algo --- ...ersedQuickSearchBadCharacterShiftTable.cpp | 15 +++ ...eversedQuickSearchBadCharacterShiftTable.h | 53 ++++++++++ .../src/stringology/exact/QuantumLeap.cpp | 15 +++ alib2algo/src/stringology/exact/QuantumLeap.h | 96 +++++++++++++++++++ .../test-src/tests/exactMatching.cpp | 1 + 5 files changed, 180 insertions(+) create mode 100644 alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.cpp create mode 100644 alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.h create mode 100644 alib2algo/src/stringology/exact/QuantumLeap.cpp create mode 100644 alib2algo/src/stringology/exact/QuantumLeap.h diff --git a/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.cpp b/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.cpp new file mode 100644 index 0000000000..13888b8f77 --- /dev/null +++ b/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.cpp @@ -0,0 +1,15 @@ +/* + * ReversedQuickSearchBadCharacterShiftTable.cpp + * + * Created on: 23. 2. 2018 + * Author: Michal Cvach + */ + +#include "ReversedQuickSearchBadCharacterShiftTable.h" +#include <registration/AlgoRegistration.hpp> + +namespace { + +auto ReversedQuickSearchBadCharacterShiftTableLinearString = registration::AbstractRegister < string::properties::ReversedQuickSearchBadCharacterShiftTable, ext::map < DefaultSymbolType, size_t >, const string::LinearString < > & > ( string::properties::ReversedQuickSearchBadCharacterShiftTable::qsbcs ); + +} /* namespace */ diff --git a/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.h b/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.h new file mode 100644 index 0000000000..977cdf35e0 --- /dev/null +++ b/alib2algo/src/string/properties/ReversedQuickSearchBadCharacterShiftTable.h @@ -0,0 +1,53 @@ +/* + * ReversedQuickSearchBadCharacterShiftTable.h + * + * Created on: 23. 2. 2018 + * Author: Michal Cvach + */ + +#ifndef _STRINGOLOGY_REVERSED_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_ +#define _STRINGOLOGY_REVERSED_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_ + +#include <set> +#include <map> + +#include <string/LinearString.h> + +namespace string { + +namespace properties { + +/** +* Computation of BCS table for the QuickSearch algorithm, as presented in the Daniel M. Sunday article. +*/ +class ReversedQuickSearchBadCharacterShiftTable { +public: + /** + * Creates a bad character shift table which can be later used for the QuickSearch algorithm. + * @return the BCS table in form of a map where key is the character from an alphabet and value is the shift. + */ + template < class SymbolType > + static ext::map < SymbolType, size_t > qsbcs ( const string::LinearString < SymbolType > & pattern ); + +}; + +template < class SymbolType > +ext::map<SymbolType, size_t> ReversedQuickSearchBadCharacterShiftTable::qsbcs(const string::LinearString < SymbolType >& pattern) { + ext::map<SymbolType, size_t> bcs; + + /* Initialization of BCS. */ + for(const SymbolType & symbol : pattern.getAlphabet ( ) ) + bcs.insert(std::make_pair(symbol, pattern.getContent().size() + 1)); + + /* Filling out BCS. */ + for ( ssize_t i = pattern.getContent ( ).size ( ) - 1; i >= 0; -- i ) + bcs [ pattern.getContent ( ) [ i ] ] = i + 1; + + return bcs; +} + +} /* namespace properties */ + +} /* namespace string */ + +#endif /* _STRINGOLOGY_REVERSED_QUICK_SEARCH_BAD_CHARACTER_SHIFT_TABLE_H_ */ diff --git a/alib2algo/src/stringology/exact/QuantumLeap.cpp b/alib2algo/src/stringology/exact/QuantumLeap.cpp new file mode 100644 index 0000000000..6f03074e6b --- /dev/null +++ b/alib2algo/src/stringology/exact/QuantumLeap.cpp @@ -0,0 +1,15 @@ +/* + * QuantumLeap.cpp + * + * Created on: 19. 6. 2020 + * Author: Jan Travnicek + */ + +#include "QuantumLeap.h" +#include <registration/AlgoRegistration.hpp> + +namespace { + +auto QuantumLeapLinearStringLinearString = registration::AbstractRegister < stringology::exact::QuantumLeap, ext::set < unsigned >, const string::LinearString < > &, const string::LinearString < > &, size_t > ( stringology::exact::QuantumLeap::match ); + +} /* namespace */ diff --git a/alib2algo/src/stringology/exact/QuantumLeap.h b/alib2algo/src/stringology/exact/QuantumLeap.h new file mode 100644 index 0000000000..ee48ea9f3f --- /dev/null +++ b/alib2algo/src/stringology/exact/QuantumLeap.h @@ -0,0 +1,96 @@ +/* + * QuantumLeap.h + * + * Created on: 19. 6. 2020 + * Author: Jan Travnicek + */ + +#ifndef STRINGOLOGY_QUANTUM_LEAP_H_ +#define STRINGOLOGY_QUANTUM_LEAP_H_ + +#include <alib/set> +#include <alib/map> +#include <alib/measure> + +#include <string/LinearString.h> +#include <alphabet/EndSymbol.h> + +#include <string/properties/QuickSearchBadCharacterShiftTable.h> +#include <string/properties/ReversedQuickSearchBadCharacterShiftTable.h> + +#include <global/GlobalData.h> + +namespace stringology { + +namespace exact { + +/** + * Implementation of BMH for MI(E+\eps)-EVY course 2014 + * To get rid of zeros in BCS table we ignore last haystack character + */ +class QuantumLeap { +public: + /** + * Search for pattern in linear string. + * @return set set of occurences + */ + template < class SymbolType > + static ext::set < unsigned > match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, size_t ); + +}; + +template < class SymbolType > +ext::set<unsigned> QuantumLeap::match (const string::LinearString < SymbolType >& string, const string::LinearString < SymbolType >& pattern, size_t z) { + ext::set<unsigned> occ; + + size_t m = pattern.getContent ( ).size ( ); + size_t n = string.getContent ( ).size ( ); + + const std::vector < SymbolType > & needle = pattern.getContent ( ); + std::vector < SymbolType > haystack = string.getContent ( ); + + /* make sure there are symbols beyond the size of the pattern */ + for ( size_t j = 0; j < z; ++ j ) + haystack.push_back ( alphabet::EndSymbol::instance < SymbolType > ( ) ); + + measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); + ext::map<SymbolType, size_t> fbcs = string::properties::QuickSearchBadCharacterShiftTable::qsbcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern + ext::map<SymbolType, size_t> bbcs = string::properties::ReversedQuickSearchBadCharacterShiftTable::qsbcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern + + fbcs.insert ( alphabet::EndSymbol::instance < SymbolType > ( ), needle.size ( ) + 1); + bbcs.insert ( alphabet::EndSymbol::instance < SymbolType > ( ), needle.size ( ) + 1); + + for ( std::pair < const SymbolType, size_t > & entry : bbcs ) + entry.second = z - entry.second; + measurements::end ( ); + + if(common::GlobalData::verbose) { + common::Streams::log << "fbcs = " << fbcs << std::endl; + common::Streams::log << "bbcs = " << bbcs << std::endl; + } + + measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); + size_t i = 0; + while(i + m <= n) { + size_t j = 0; + while(j < m && haystack[i + j] == needle[j]) + ++ j; + + // Yay, there is match!!! + if(j == m) occ.insert(i); + size_t sf = fbcs.at ( haystack [ i + m ] ); + size_t sb = bbcs.at ( haystack [ i + z - 1 ] ); + i += ( sf <= sb ) ? sf : z; + + //common::Streams::out << i << std::endl; + } + measurements::end ( ); + + return occ; +} + +} /* namespace exact */ + +} /* namespace stringology */ + +#endif /* STRINGOLOGY_QUANTUM_LEAP_H_ */ diff --git a/alib2integrationtest/test-src/tests/exactMatching.cpp b/alib2integrationtest/test-src/tests/exactMatching.cpp index 7e6119a75b..696efd2c95 100644 --- a/alib2integrationtest/test-src/tests/exactMatching.cpp +++ b/alib2integrationtest/test-src/tests/exactMatching.cpp @@ -25,6 +25,7 @@ static std::string qGenString ( size_t min_len, size_t max_len, size_t alph_len, TEST_CASE ( "ExactMatching", "[integration]" ) { auto definition = GENERATE ( as < std::tuple < std::string, std::string, bool > > ( ), + std::make_tuple ( "Exact Quantum Leap", "stringology::exact::QuantumLeap $subject $pattern 10", true ), std::make_tuple ( "Exact Boyer Moore", "stringology::exact::BoyerMoore $subject $pattern", true ), std::make_tuple ( "Exact Knuth Morris Pratt", "stringology::exact::KnuthMorrisPratt $subject $pattern", false ), std::make_tuple ( "Exact Boyer Moore Horspool", " stringology::exact::BoyerMooreHorspool $subject $pattern", true ), -- GitLab