/* * QuickSearch.h * * Created on: 23. 2. 2018 * Author: Michal Cvach */ #ifndef _STRINGOLOGY_QUICK_SEARCH_H_ #define _STRINGOLOGY_QUICK_SEARCH_H_ #include <set> #include <map> #include <alib/measure> #include <string/LinearString.h> #include <string/properties/QuickSearchBadCharacterShiftTable.h> #include <global/GlobalData.h> namespace stringology { namespace exact { /** * Implementation of the QuickSearch substring matching algorithm as presented in the Daniel M. Sunday article. */ class QuickSearch { public: /** * Search for pattern in linear string. * @return set of occurences */ template < class SymbolType > static ext::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ); }; template < class SymbolType > ext::set<unsigned> QuickSearch::match(const string::LinearString < SymbolType >& string, const string::LinearString < SymbolType >& pattern) { ext::set<unsigned> occ; measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); ext::map<SymbolType, size_t> bcs = string::properties::QuickSearchBadCharacterShiftTable::qsbcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern measurements::end ( ); if(common::GlobalData::verbose) common::Streams::log << "bcs = " << bcs << std::endl; measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); size_t i = 0; size_t j; while( i + pattern.getContent().size() <= string.getContent().size() ) { for ( j = 0; j < pattern.getContent().size(); j++ ) if ( pattern.getContent()[j] != string.getContent()[i+j]) break; if ( j == pattern.getContent ( ).size ( ) ) { occ.insert(i); } if ( i + pattern.getContent().size() == string.getContent().size() ) { break; // Here we don't do any more shifts if the pattern is already aligned at the utter end of the text } i += bcs[string.getContent()[i+pattern.getContent().size()]]; } measurements::end ( ); return occ; } } /* namespace exact */ } /* namespace stringology */ #endif /* _STRINGOLOGY_QUICK_SEARCH_H_ */