From d44964c58090c802f5e3df230459ab977f8aa931 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Tue, 6 Dec 2016 10:03:12 +0100 Subject: [PATCH] template some stringology algorithms --- .../generate/RandomSubstringFactory.cpp | 19 --------- .../string/generate/RandomSubstringFactory.h | 21 +++++++++- .../stringology/exact/BoyerMooreHorspool.cpp | 32 --------------- .../stringology/exact/BoyerMooreHorspool.h | 36 +++++++++++++++- .../exact/DeadZoneUsingBadCharacterShift.cpp | 32 --------------- .../exact/DeadZoneUsingBadCharacterShift.h | 41 +++++++++++++++++-- .../stringology/exact/ExactFactorMatch.cpp | 19 --------- .../src/stringology/exact/ExactFactorMatch.h | 26 +++++++++++- .../exact/ReversedBoyerMooreHorspool.cpp | 29 ------------- .../exact/ReversedBoyerMooreHorspool.h | 33 ++++++++++++++- 10 files changed, 149 insertions(+), 139 deletions(-) diff --git a/alib2algo/src/string/generate/RandomSubstringFactory.cpp b/alib2algo/src/string/generate/RandomSubstringFactory.cpp index 2f945b3da6..f624a03a54 100644 --- a/alib2algo/src/string/generate/RandomSubstringFactory.cpp +++ b/alib2algo/src/string/generate/RandomSubstringFactory.cpp @@ -7,12 +7,6 @@ #include "RandomSubstringFactory.h" -#include <algorithm> -#include <random> -#include <exception/CommonException.h> - -#include <string/LinearString.h> - namespace string { namespace generate { @@ -21,19 +15,6 @@ string::String RandomSubstringFactory::generateSubstring ( size_t size, const st return dispatch ( size, v.getData ( ) ); } -string::LinearString < > RandomSubstringFactory::generateSubstring ( size_t size, const string::LinearString < > & string ) { - if ( size > string.getContent ( ).size ( ) ) - throw exception::CommonException ( "String not long enough" ); - - size_t begin = std::random_devices::semirandom ( ) % ( string.getContent ( ).size ( ) - size + 1 ); - - std::vector < alphabet::Symbol > data ( string.getContent ( ).begin ( ) + begin, string.getContent ( ).begin ( ) + begin + size ); - - return LinearString < > { - string.getAlphabet ( ), data - }; -} - auto RandomSubstringFactoryLinearString = RandomSubstringFactory::RegistratorWrapper < string::LinearString < >, string::LinearString < > > ( RandomSubstringFactory::generateSubstring ); } /* namespace generate */ diff --git a/alib2algo/src/string/generate/RandomSubstringFactory.h b/alib2algo/src/string/generate/RandomSubstringFactory.h index 584b321722..e1ac8c0f58 100644 --- a/alib2algo/src/string/generate/RandomSubstringFactory.h +++ b/alib2algo/src/string/generate/RandomSubstringFactory.h @@ -12,6 +12,12 @@ #include <string/String.h> #include <string/StringFeatures.h> +#include <algorithm> +#include <random> +#include <exception/CommonException.h> + +#include <string/LinearString.h> + namespace string { namespace generate { @@ -20,10 +26,23 @@ class RandomSubstringFactory : public std::SingleDispatchFirstStaticParam < Rand public: static string::String generateSubstring ( size_t size, const string::String & ); - static string::LinearString < > generateSubstring ( size_t size, const string::LinearString < > & ); + template < class SymbolType > + static string::LinearString < SymbolType > generateSubstring ( size_t size, const string::LinearString < SymbolType > & ); }; +template < class SymbolType > +string::LinearString < SymbolType > RandomSubstringFactory::generateSubstring ( size_t size, const string::LinearString < SymbolType > & string ) { + if ( size > string.getContent ( ).size ( ) ) + throw exception::CommonException ( "String not long enough" ); + + size_t begin = std::random_devices::semirandom ( ) % ( string.getContent ( ).size ( ) - size + 1 ); + + std::vector < SymbolType > data ( string.getContent ( ).begin ( ) + begin, string.getContent ( ).begin ( ) + begin + size ); + + return LinearString < SymbolType > ( string.getAlphabet ( ), data ); +} + } /* namespace generate */ } /* namespace string */ diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp index 56831ebd76..d0107b4f9d 100644 --- a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp +++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp @@ -6,13 +6,6 @@ */ #include "BoyerMooreHorspool.h" -#include <string/properties/BadCharacterShiftTable.h> - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <map> -#include <measure> namespace stringology { @@ -22,31 +15,6 @@ std::set<unsigned> BoyerMooreHorspool::match(const string::String& subject, cons return dispatch(subject.getData(), pattern.getData()); } -std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString < >& string, const string::LinearString < >& pattern) { - std::set<unsigned> occ; - - measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); - std::map<alphabet::Symbol, size_t> bcs = string::properties::BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern - measurements::end ( ); - - measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); - size_t haystack_offset = 0; - while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) { - size_t i = pattern.getContent().size(); - while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) { - i--; - } - - // Yay, there is match!!! - if(i == 0) occ.insert(haystack_offset); - haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]]; - //std::cout << haystack_offset << std::endl; - } - measurements::end ( ); - - return occ; -} - auto BoyerMooreHorpoolLinearStringLinearString = BoyerMooreHorspool::RegistratorWrapper<std::set<unsigned>, string::LinearString < >, string::LinearString < >>(BoyerMooreHorspool::match); } /* namespace exact */ diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.h b/alib2algo/src/stringology/exact/BoyerMooreHorspool.h index 5cbe934193..e147d26fcf 100644 --- a/alib2algo/src/stringology/exact/BoyerMooreHorspool.h +++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.h @@ -9,9 +9,16 @@ #define _STRINGOLOGY_BOYER_MOORE_HORSPOOL_H_ #include <set> +#include <map> +#include <measure> + #include <core/multipleDispatch.hpp> + #include <string/String.h> #include <string/StringFeatures.h> +#include <string/LinearString.h> + +#include <string/properties/BadCharacterShiftTable.h> namespace stringology { @@ -29,10 +36,37 @@ public: */ static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); + template < class SymbolType > + static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ); }; +template < class SymbolType > +std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString < SymbolType >& string, const string::LinearString < SymbolType >& pattern) { + std::set<unsigned> occ; + + measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); + std::map<SymbolType, size_t> bcs = string::properties::BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern + measurements::end ( ); + + measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); + size_t haystack_offset = 0; + while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) { + size_t i = pattern.getContent().size(); + while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) { + i--; + } + + // Yay, there is match!!! + if(i == 0) occ.insert(haystack_offset); + haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]]; + //std::cout << haystack_offset << std::endl; + } + measurements::end ( ); + + return occ; +} + } /* namespace exact */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp index 831366295e..5006c77bc4 100644 --- a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp +++ b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp @@ -6,13 +6,6 @@ */ #include "DeadZoneUsingBadCharacterShift.h" -#include <string/properties/BadCharacterShiftTable.h> -#include <string/properties/ReversedBadCharacterShiftTable.h> - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <map> namespace stringology { @@ -22,31 +15,6 @@ std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::Stri return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::LinearString < > & string, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - std::map < alphabet::Symbol, size_t > fbcs = string::properties::BadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - std::map < alphabet::Symbol, size_t > bbcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - - match_rec ( occ, string, pattern, fbcs, bbcs, 0, string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); - return occ; -} - -void DeadZoneUsingBadCharacterShift::match_rec ( std::set < unsigned > & occ, const string::LinearString < > & string, const string::LinearString < > & pattern, std::map < alphabet::Symbol, size_t > & fbcs, std::map < alphabet::Symbol, size_t > & bbcs, int low, int high ) { - if ( low >= high ) return; - - int middle = ( low + high ) / 2; - size_t i = 0; - - while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[middle + i] == pattern.getContent ( )[i] ) - i++; - - // Yay, there is match!!! - if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( middle ); - - match_rec ( occ, string, pattern, fbcs, bbcs, low, middle - bbcs[string.getContent ( )[middle]] + 1 ); - match_rec ( occ, string, pattern, fbcs, bbcs, middle + fbcs[string.getContent ( )[middle + pattern.getContent ( ).size ( ) - 1]], high ); -} - auto DeadZoneUsingBadCharacterShiftLinearStringLinearString = DeadZoneUsingBadCharacterShift::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( DeadZoneUsingBadCharacterShift::match ); } /* namespace exact */ diff --git a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h index 18136315af..694f495805 100644 --- a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h +++ b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h @@ -9,9 +9,15 @@ #define _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_H_ #include <set> +#include <map> + #include <core/multipleDispatch.hpp> #include <string/String.h> #include <string/StringFeatures.h> +#include <string/LinearString.h> + +#include <string/properties/BadCharacterShiftTable.h> +#include <string/properties/ReversedBadCharacterShiftTable.h> namespace stringology { @@ -21,6 +27,9 @@ namespace exact { * Implementation of DeadZone matching using bcs as shifting method to both directions */ class DeadZoneUsingBadCharacterShift : public std::DoubleDispatch < DeadZoneUsingBadCharacterShift, std::set < unsigned >, const string::StringBase &, const string::StringBase & > { + template < class SymbolType > + static void match_rec ( std::set < unsigned > & occ, const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, std::map < SymbolType, size_t > & fbcs, std::map < SymbolType, size_t > & bbcs, int low, int high ); + public: /** * Search for pattern in linear string. @@ -28,11 +37,37 @@ public: */ static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); - static void match_rec ( std::set < unsigned > & occ, const string::LinearString < > & string, const string::LinearString < > & pattern, std::map < alphabet::Symbol, size_t > & fbcs, std::map < alphabet::Symbol, size_t > & bbcs, int low, int high ); - + template < class SymbolType > + static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ); }; +template < class SymbolType > +std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern ) { + std::set < unsigned > occ; + std::map < SymbolType, size_t > fbcs = string::properties::BadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::map < SymbolType, size_t > bbcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + + match_rec ( occ, string, pattern, fbcs, bbcs, 0, string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); + return occ; +} + +template < class SymbolType > +void DeadZoneUsingBadCharacterShift::match_rec ( std::set < unsigned > & occ, const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, std::map < SymbolType, size_t > & fbcs, std::map < SymbolType, size_t > & bbcs, int low, int high ) { + if ( low >= high ) return; + + int middle = ( low + high ) / 2; + size_t i = 0; + + while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[middle + i] == pattern.getContent ( )[i] ) + i++; + + // Yay, there is match!!! + if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( middle ); + + match_rec ( occ, string, pattern, fbcs, bbcs, low, middle - bbcs[string.getContent ( )[middle]] + 1 ); + match_rec ( occ, string, pattern, fbcs, bbcs, middle + fbcs[string.getContent ( )[middle + pattern.getContent ( ).size ( ) - 1]], high ); +} + } /* namespace exact */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/ExactFactorMatch.cpp b/alib2algo/src/stringology/exact/ExactFactorMatch.cpp index 1afd9953be..3ad442f933 100644 --- a/alib2algo/src/stringology/exact/ExactFactorMatch.cpp +++ b/alib2algo/src/stringology/exact/ExactFactorMatch.cpp @@ -6,9 +6,6 @@ */ #include "ExactFactorMatch.h" -#include <string/LinearString.h> - -#include <deque> namespace stringology { @@ -18,22 +15,6 @@ std::set < unsigned > ExactFactorMatch::match ( const string::String & subject, return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > ExactFactorMatch::match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - - for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) { - unsigned j = 0; - - for ( ; j < pattern.getContent ( ).size ( ); j++ ) - if ( pattern.getContent ( )[j] != subject.getContent ( )[i + j] ) break; - - if ( j == pattern.getContent ( ).size ( ) ) - occ.insert ( i ); - } - - return occ; -} - auto ExactFactorMatchLinearString = ExactFactorMatch::RegistratorWrapper < std::set < unsigned >, string::LinearString < > > ( ExactFactorMatch::match ); } /* namespace exact */ diff --git a/alib2algo/src/stringology/exact/ExactFactorMatch.h b/alib2algo/src/stringology/exact/ExactFactorMatch.h index 2818f2e4b9..3e7a7401e3 100644 --- a/alib2algo/src/stringology/exact/ExactFactorMatch.h +++ b/alib2algo/src/stringology/exact/ExactFactorMatch.h @@ -8,10 +8,14 @@ #ifndef _EXACT_MATCH_H__ #define _EXACT_MATCH_H__ -#include <string/String.h> #include <set> +#include <deque> + #include <core/multipleDispatch.hpp> + +#include <string/String.h> #include <string/StringFeatures.h> +#include <string/LinearString.h> namespace stringology { @@ -25,9 +29,27 @@ public: */ static std::set<unsigned> match(const string::String& subject, const string::String& pattern); - static std::set<unsigned> match(const string::LinearString < >& subject, const string::LinearString < >& pattern); + template < class SymbolType > + static std::set<unsigned> match(const string::LinearString < SymbolType >& subject, const string::LinearString < SymbolType >& pattern); }; +template < class SymbolType > +std::set < unsigned > ExactFactorMatch::match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ) { + std::set < unsigned > occ; + + for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) { + unsigned j = 0; + + for ( ; j < pattern.getContent ( ).size ( ); j++ ) + if ( pattern.getContent ( )[j] != subject.getContent ( )[i + j] ) break; + + if ( j == pattern.getContent ( ).size ( ) ) + occ.insert ( i ); + } + + return occ; +} + } /* namespace exact */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp index fc189d7cad..6b6d1d8804 100644 --- a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp +++ b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp @@ -6,12 +6,6 @@ */ #include "ReversedBoyerMooreHorspool.h" -#include <string/properties/ReversedBadCharacterShiftTable.h> - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <map> namespace stringology { @@ -21,29 +15,6 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::String & return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::LinearString < > & string, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - std::map < alphabet::Symbol, size_t > bcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - - int haystack_offset = string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ); - - while ( haystack_offset >= 0 ) { - size_t i = 0; - - while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[haystack_offset + i] == pattern.getContent ( )[i] ) - i++; - - // Yay, there is match!!! - if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( haystack_offset ); - - haystack_offset -= bcs[string.getContent ( )[haystack_offset]]; - - // std::cout << haystack_offset << std::endl; - } - - return occ; -} - auto ReversedBoyerMooreHorpoolLinearStringLinearString = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( ReversedBoyerMooreHorspool::match ); } /* namespace exact */ diff --git a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h index 207b4d7864..803ab30fcd 100644 --- a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h +++ b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h @@ -9,9 +9,15 @@ #define _STRINGOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_ #include <set> +#include <map> + #include <core/multipleDispatch.hpp> + #include <string/String.h> #include <string/StringFeatures.h> +#include <string/LinearString.h> + +#include <string/properties/ReversedBadCharacterShiftTable.h> namespace stringology { @@ -29,10 +35,35 @@ public: */ static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); + template < class SymbolType > + static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ); }; +template < class SymbolType > +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern ) { + std::set < unsigned > occ; + std::map < SymbolType, size_t > bcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + + int haystack_offset = string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ); + + while ( haystack_offset >= 0 ) { + size_t i = 0; + + while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[haystack_offset + i] == pattern.getContent ( )[i] ) + i++; + + // Yay, there is match!!! + if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( haystack_offset ); + + haystack_offset -= bcs[string.getContent ( )[haystack_offset]]; + + // std::cout << haystack_offset << std::endl; + } + + return occ; +} + } /* namespace exact */ } /* namespace stringology */ -- GitLab