diff --git a/alib2algo/src/string/generate/RandomSubstringFactory.cpp b/alib2algo/src/string/generate/RandomSubstringFactory.cpp index 2f945b3da6bb24abdbf48e33c01f016737acaf23..f624a03a54b9752151090b29b3affb093f4b7a52 100644 --- a/alib2algo/src/string/generate/RandomSubstringFactory.cpp +++ b/alib2algo/src/string/generate/RandomSubstringFactory.cpp @@ -7,12 +7,6 @@ #include "RandomSubstringFactory.h" -#include <algorithm> -#include <random> -#include <exception/CommonException.h> - -#include <string/LinearString.h> - namespace string { namespace generate { @@ -21,19 +15,6 @@ string::String RandomSubstringFactory::generateSubstring ( size_t size, const st return dispatch ( size, v.getData ( ) ); } -string::LinearString < > RandomSubstringFactory::generateSubstring ( size_t size, const string::LinearString < > & string ) { - if ( size > string.getContent ( ).size ( ) ) - throw exception::CommonException ( "String not long enough" ); - - size_t begin = std::random_devices::semirandom ( ) % ( string.getContent ( ).size ( ) - size + 1 ); - - std::vector < alphabet::Symbol > data ( string.getContent ( ).begin ( ) + begin, string.getContent ( ).begin ( ) + begin + size ); - - return LinearString < > { - string.getAlphabet ( ), data - }; -} - auto RandomSubstringFactoryLinearString = RandomSubstringFactory::RegistratorWrapper < string::LinearString < >, string::LinearString < > > ( RandomSubstringFactory::generateSubstring ); } /* namespace generate */ diff --git a/alib2algo/src/string/generate/RandomSubstringFactory.h b/alib2algo/src/string/generate/RandomSubstringFactory.h index 584b3217226a79d0a8ef78991dd7fb05153868a5..e1ac8c0f58800fdcca1dd478e1ddc42978fb17d4 100644 --- a/alib2algo/src/string/generate/RandomSubstringFactory.h +++ b/alib2algo/src/string/generate/RandomSubstringFactory.h @@ -12,6 +12,12 @@ #include <string/String.h> #include <string/StringFeatures.h> +#include <algorithm> +#include <random> +#include <exception/CommonException.h> + +#include <string/LinearString.h> + namespace string { namespace generate { @@ -20,10 +26,23 @@ class RandomSubstringFactory : public std::SingleDispatchFirstStaticParam < Rand public: static string::String generateSubstring ( size_t size, const string::String & ); - static string::LinearString < > generateSubstring ( size_t size, const string::LinearString < > & ); + template < class SymbolType > + static string::LinearString < SymbolType > generateSubstring ( size_t size, const string::LinearString < SymbolType > & ); }; +template < class SymbolType > +string::LinearString < SymbolType > RandomSubstringFactory::generateSubstring ( size_t size, const string::LinearString < SymbolType > & string ) { + if ( size > string.getContent ( ).size ( ) ) + throw exception::CommonException ( "String not long enough" ); + + size_t begin = std::random_devices::semirandom ( ) % ( string.getContent ( ).size ( ) - size + 1 ); + + std::vector < SymbolType > data ( string.getContent ( ).begin ( ) + begin, string.getContent ( ).begin ( ) + begin + size ); + + return LinearString < SymbolType > ( string.getAlphabet ( ), data ); +} + } /* namespace generate */ } /* namespace string */ diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp index 56831ebd76dce209782710b537b5399bd0ac6d1b..d0107b4f9d2dbb8416fef32dc4ca70396132bb38 100644 --- a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp +++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp @@ -6,13 +6,6 @@ */ #include "BoyerMooreHorspool.h" -#include <string/properties/BadCharacterShiftTable.h> - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <map> -#include <measure> namespace stringology { @@ -22,31 +15,6 @@ std::set<unsigned> BoyerMooreHorspool::match(const string::String& subject, cons return dispatch(subject.getData(), pattern.getData()); } -std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString < >& string, const string::LinearString < >& pattern) { - std::set<unsigned> occ; - - measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); - std::map<alphabet::Symbol, size_t> bcs = string::properties::BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern - measurements::end ( ); - - measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); - size_t haystack_offset = 0; - while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) { - size_t i = pattern.getContent().size(); - while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) { - i--; - } - - // Yay, there is match!!! - if(i == 0) occ.insert(haystack_offset); - haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]]; - //std::cout << haystack_offset << std::endl; - } - measurements::end ( ); - - return occ; -} - auto BoyerMooreHorpoolLinearStringLinearString = BoyerMooreHorspool::RegistratorWrapper<std::set<unsigned>, string::LinearString < >, string::LinearString < >>(BoyerMooreHorspool::match); } /* namespace exact */ diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.h b/alib2algo/src/stringology/exact/BoyerMooreHorspool.h index 5cbe9341935ae8c224f0dbad859ed14632fc21c1..e147d26fcfb7e980c604661a5d4c6557302cd24b 100644 --- a/alib2algo/src/stringology/exact/BoyerMooreHorspool.h +++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.h @@ -9,9 +9,16 @@ #define _STRINGOLOGY_BOYER_MOORE_HORSPOOL_H_ #include <set> +#include <map> +#include <measure> + #include <core/multipleDispatch.hpp> + #include <string/String.h> #include <string/StringFeatures.h> +#include <string/LinearString.h> + +#include <string/properties/BadCharacterShiftTable.h> namespace stringology { @@ -29,10 +36,37 @@ public: */ static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); + template < class SymbolType > + static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ); }; +template < class SymbolType > +std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString < SymbolType >& string, const string::LinearString < SymbolType >& pattern) { + std::set<unsigned> occ; + + measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); + std::map<SymbolType, size_t> bcs = string::properties::BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern + measurements::end ( ); + + measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); + size_t haystack_offset = 0; + while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) { + size_t i = pattern.getContent().size(); + while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) { + i--; + } + + // Yay, there is match!!! + if(i == 0) occ.insert(haystack_offset); + haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]]; + //std::cout << haystack_offset << std::endl; + } + measurements::end ( ); + + return occ; +} + } /* namespace exact */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp index 831366295e14c0a0e99e1f1a86c47b2d64b30575..5006c77bc4c7dc319185a1548b8cbad0bc45ab71 100644 --- a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp +++ b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.cpp @@ -6,13 +6,6 @@ */ #include "DeadZoneUsingBadCharacterShift.h" -#include <string/properties/BadCharacterShiftTable.h> -#include <string/properties/ReversedBadCharacterShiftTable.h> - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <map> namespace stringology { @@ -22,31 +15,6 @@ std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::Stri return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::LinearString < > & string, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - std::map < alphabet::Symbol, size_t > fbcs = string::properties::BadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - std::map < alphabet::Symbol, size_t > bbcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - - match_rec ( occ, string, pattern, fbcs, bbcs, 0, string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); - return occ; -} - -void DeadZoneUsingBadCharacterShift::match_rec ( std::set < unsigned > & occ, const string::LinearString < > & string, const string::LinearString < > & pattern, std::map < alphabet::Symbol, size_t > & fbcs, std::map < alphabet::Symbol, size_t > & bbcs, int low, int high ) { - if ( low >= high ) return; - - int middle = ( low + high ) / 2; - size_t i = 0; - - while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[middle + i] == pattern.getContent ( )[i] ) - i++; - - // Yay, there is match!!! - if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( middle ); - - match_rec ( occ, string, pattern, fbcs, bbcs, low, middle - bbcs[string.getContent ( )[middle]] + 1 ); - match_rec ( occ, string, pattern, fbcs, bbcs, middle + fbcs[string.getContent ( )[middle + pattern.getContent ( ).size ( ) - 1]], high ); -} - auto DeadZoneUsingBadCharacterShiftLinearStringLinearString = DeadZoneUsingBadCharacterShift::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( DeadZoneUsingBadCharacterShift::match ); } /* namespace exact */ diff --git a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h index 18136315af8a2e9169727c646ca290b659554e41..694f495805eba509d4720409c1cf0d0767865f90 100644 --- a/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h +++ b/alib2algo/src/stringology/exact/DeadZoneUsingBadCharacterShift.h @@ -9,9 +9,15 @@ #define _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_H_ #include <set> +#include <map> + #include <core/multipleDispatch.hpp> #include <string/String.h> #include <string/StringFeatures.h> +#include <string/LinearString.h> + +#include <string/properties/BadCharacterShiftTable.h> +#include <string/properties/ReversedBadCharacterShiftTable.h> namespace stringology { @@ -21,6 +27,9 @@ namespace exact { * Implementation of DeadZone matching using bcs as shifting method to both directions */ class DeadZoneUsingBadCharacterShift : public std::DoubleDispatch < DeadZoneUsingBadCharacterShift, std::set < unsigned >, const string::StringBase &, const string::StringBase & > { + template < class SymbolType > + static void match_rec ( std::set < unsigned > & occ, const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, std::map < SymbolType, size_t > & fbcs, std::map < SymbolType, size_t > & bbcs, int low, int high ); + public: /** * Search for pattern in linear string. @@ -28,11 +37,37 @@ public: */ static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); - static void match_rec ( std::set < unsigned > & occ, const string::LinearString < > & string, const string::LinearString < > & pattern, std::map < alphabet::Symbol, size_t > & fbcs, std::map < alphabet::Symbol, size_t > & bbcs, int low, int high ); - + template < class SymbolType > + static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ); }; +template < class SymbolType > +std::set < unsigned > DeadZoneUsingBadCharacterShift::match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern ) { + std::set < unsigned > occ; + std::map < SymbolType, size_t > fbcs = string::properties::BadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::map < SymbolType, size_t > bbcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + + match_rec ( occ, string, pattern, fbcs, bbcs, 0, string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); + return occ; +} + +template < class SymbolType > +void DeadZoneUsingBadCharacterShift::match_rec ( std::set < unsigned > & occ, const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern, std::map < SymbolType, size_t > & fbcs, std::map < SymbolType, size_t > & bbcs, int low, int high ) { + if ( low >= high ) return; + + int middle = ( low + high ) / 2; + size_t i = 0; + + while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[middle + i] == pattern.getContent ( )[i] ) + i++; + + // Yay, there is match!!! + if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( middle ); + + match_rec ( occ, string, pattern, fbcs, bbcs, low, middle - bbcs[string.getContent ( )[middle]] + 1 ); + match_rec ( occ, string, pattern, fbcs, bbcs, middle + fbcs[string.getContent ( )[middle + pattern.getContent ( ).size ( ) - 1]], high ); +} + } /* namespace exact */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/ExactFactorMatch.cpp b/alib2algo/src/stringology/exact/ExactFactorMatch.cpp index 1afd9953be6f7adddd7c8c7df65bb64613c5b938..3ad442f933086939245aff1429bfab408e535711 100644 --- a/alib2algo/src/stringology/exact/ExactFactorMatch.cpp +++ b/alib2algo/src/stringology/exact/ExactFactorMatch.cpp @@ -6,9 +6,6 @@ */ #include "ExactFactorMatch.h" -#include <string/LinearString.h> - -#include <deque> namespace stringology { @@ -18,22 +15,6 @@ std::set < unsigned > ExactFactorMatch::match ( const string::String & subject, return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > ExactFactorMatch::match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - - for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) { - unsigned j = 0; - - for ( ; j < pattern.getContent ( ).size ( ); j++ ) - if ( pattern.getContent ( )[j] != subject.getContent ( )[i + j] ) break; - - if ( j == pattern.getContent ( ).size ( ) ) - occ.insert ( i ); - } - - return occ; -} - auto ExactFactorMatchLinearString = ExactFactorMatch::RegistratorWrapper < std::set < unsigned >, string::LinearString < > > ( ExactFactorMatch::match ); } /* namespace exact */ diff --git a/alib2algo/src/stringology/exact/ExactFactorMatch.h b/alib2algo/src/stringology/exact/ExactFactorMatch.h index 2818f2e4b9e833daf1ea483e7d6064b544d18e19..3e7a7401e3d6e984e5c3cfae2f312d3abf7cd2f0 100644 --- a/alib2algo/src/stringology/exact/ExactFactorMatch.h +++ b/alib2algo/src/stringology/exact/ExactFactorMatch.h @@ -8,10 +8,14 @@ #ifndef _EXACT_MATCH_H__ #define _EXACT_MATCH_H__ -#include <string/String.h> #include <set> +#include <deque> + #include <core/multipleDispatch.hpp> + +#include <string/String.h> #include <string/StringFeatures.h> +#include <string/LinearString.h> namespace stringology { @@ -25,9 +29,27 @@ public: */ static std::set<unsigned> match(const string::String& subject, const string::String& pattern); - static std::set<unsigned> match(const string::LinearString < >& subject, const string::LinearString < >& pattern); + template < class SymbolType > + static std::set<unsigned> match(const string::LinearString < SymbolType >& subject, const string::LinearString < SymbolType >& pattern); }; +template < class SymbolType > +std::set < unsigned > ExactFactorMatch::match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ) { + std::set < unsigned > occ; + + for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) { + unsigned j = 0; + + for ( ; j < pattern.getContent ( ).size ( ); j++ ) + if ( pattern.getContent ( )[j] != subject.getContent ( )[i + j] ) break; + + if ( j == pattern.getContent ( ).size ( ) ) + occ.insert ( i ); + } + + return occ; +} + } /* namespace exact */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp index fc189d7cad4c02fe4634fbcbae1e893e97338fc0..6b6d1d8804790bd9297ba798f3483809da33db6d 100644 --- a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp +++ b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.cpp @@ -6,12 +6,6 @@ */ #include "ReversedBoyerMooreHorspool.h" -#include <string/properties/ReversedBadCharacterShiftTable.h> - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <map> namespace stringology { @@ -21,29 +15,6 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::String & return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::LinearString < > & string, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - std::map < alphabet::Symbol, size_t > bcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - - int haystack_offset = string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ); - - while ( haystack_offset >= 0 ) { - size_t i = 0; - - while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[haystack_offset + i] == pattern.getContent ( )[i] ) - i++; - - // Yay, there is match!!! - if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( haystack_offset ); - - haystack_offset -= bcs[string.getContent ( )[haystack_offset]]; - - // std::cout << haystack_offset << std::endl; - } - - return occ; -} - auto ReversedBoyerMooreHorpoolLinearStringLinearString = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( ReversedBoyerMooreHorspool::match ); } /* namespace exact */ diff --git a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h index 207b4d78642c9afc9668aa05edc25f2ecfdc941f..803ab30fcd94f1b45edae06c38b9dd2f7402a284 100644 --- a/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h +++ b/alib2algo/src/stringology/exact/ReversedBoyerMooreHorspool.h @@ -9,9 +9,15 @@ #define _STRINGOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_ #include <set> +#include <map> + #include <core/multipleDispatch.hpp> + #include <string/String.h> #include <string/StringFeatures.h> +#include <string/LinearString.h> + +#include <string/properties/ReversedBadCharacterShiftTable.h> namespace stringology { @@ -29,10 +35,35 @@ public: */ static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); + template < class SymbolType > + static std::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ); }; +template < class SymbolType > +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const string::LinearString < SymbolType > & string, const string::LinearString < SymbolType > & pattern ) { + std::set < unsigned > occ; + std::map < SymbolType, size_t > bcs = string::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + + int haystack_offset = string.getContent ( ).size ( ) - pattern.getContent ( ).size ( ); + + while ( haystack_offset >= 0 ) { + size_t i = 0; + + while ( i < pattern.getContent ( ).size ( ) && string.getContent ( )[haystack_offset + i] == pattern.getContent ( )[i] ) + i++; + + // Yay, there is match!!! + if ( i == pattern.getContent ( ).size ( ) ) occ.insert ( haystack_offset ); + + haystack_offset -= bcs[string.getContent ( )[haystack_offset]]; + + // std::cout << haystack_offset << std::endl; + } + + return occ; +} + } /* namespace exact */ } /* namespace stringology */