Skip to content
Snippets Groups Projects

Jirakjan bp rebase

Merged Jan Trávníček requested to merge jirakjan-bp-rebase into master
7 files
+ 27
7
Compare changes
  • Side-by-side
  • Inline
Files
7
+ 88
0
/*
* CGR.h
*
* Created on: 19. 3. 2020
* Author: Jan Jirak
*/
#ifndef _STRINGOLOGY_CGR_H_
#define _STRINGOLOGY_CGR_H_
#include <alib/measure>
#include <alib/set>
#include <alib/vector>
#include <string/LinearString.h>
#include <string/properties/Repetition.h>
namespace stringology {
namespace exact {
/**
* Implementation of the CGR algorithm from article "Constant-space string-matching in sublinear average time"
* Maxim Crochemore and Leszek Gasieniec and Wojciech Rytter
*/
class CGR {
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
template < class SymbolType >
static ext::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern );
};
template < class SymbolType >
ext::set < unsigned > CGR::match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ) {
ext::set<unsigned> occ;
const auto & text = subject.getContent();
const auto & pat = pattern.getContent();
size_t n = text.size(), m = pat.size();
size_t repSize, p , q ;
measurements::start ( "Preprocess", measurements::Type::PREPROCESS );
std::tie( repSize , p , q ) = string::properties::Repetition::construct(pattern) ;
measurements::end ( );
measurements::start ( "Algorithm", measurements::Type::ALGORITHM );
// for repSize == 0 or 1 use naive solution
if ( repSize == 0 || repSize == 1 ) {
for ( size_t i = 0 ; i <= n - m ; ++ i ) {
size_t j = 0 ;
while ( j < m && text[i + j ] == pat[j] ) ++ j ;
if ( j == m ) occ.insert(i) ;
}
measurements::end();
return occ ;
}
size_t i = repSize/2 ;
while ( i <= n - m ) {
bool leftmostMismatch = std::equal(text.begin() + i + p , text.begin() + i + p + repSize/2 , text.begin() + i + q ) ;
if ( leftmostMismatch ) {
for ( size_t i_0 = i - repSize / 2 ; i_0 <= i ; ++ i_0 ){
if ( std::equal( pat.begin() , pat.end() , text.begin() + i_0 ) ) occ.insert(i_0) ;
}
}
i += repSize / 2 ;
}
// check last positions, where i jump into last window
for ( size_t j = i - repSize / 2 ; j <= n - m ; ++ j ) {
if ( std::equal( pat.begin() , pat.end() , text.begin() + j ) ) occ.insert(j) ;
}
measurements::end();
return occ ;
}
} /* namespace exact */
} /* namespace stringology */
#endif /* _STRINGOLOGY_TAILED_SUBSTRING_H_ */
Loading