Skip to content
Snippets Groups Projects
Commit 21e2c5e8 authored by Jan Jirák's avatar Jan Jirák Committed by Jan Trávníček
Browse files

CGR complete

parent 2aeb709d
No related branches found
No related tags found
1 merge request!153Jirakjan bp rebase
This commit is part of merge request !153. Comments created here will be created in the context of that merge request.
/*
* CGR.cpp
*
* Created on: 19. 3. 2020
* Author: Jan Jirak
*/
#include "CGR.h"
#include <registration/AlgoRegistration.hpp>
namespace {
auto CGR = registration::AbstractRegister < stringology::exact::CGR, ext::set < unsigned >, const string::LinearString < > &, const string::LinearString < > & > ( stringology::exact::CGR::match );
} /* namespace */
/*
* CGR.h
*
* Created on: 19. 3. 2020
* Author: Jan Jirak
*/
#ifndef _STRINGOLOGY_CGR_H_
#define _STRINGOLOGY_CGR_H_
#include <alib/measure>
#include <alib/set>
#include <alib/vector>
#include <string/LinearString.h>
#include <string/properties/Repetition.h>
namespace stringology {
namespace exact {
/**
* Implementation of the CGR algorithm from article "Constant-space string-matching in sublinear average time"
* Maxim Crochemore and Leszek Gasieniec and Wojciech Rytter
*/
class CGR {
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
template < class SymbolType >
static ext::set < unsigned > match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern );
};
template < class SymbolType >
ext::set < unsigned > CGR::match ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ) {
ext::set<unsigned> occ;
const auto & text = subject.getContent();
const auto & pat = pattern.getContent();
size_t n = text.size(), m = pat.size();
size_t repSize, p , q ;
std::tie( repSize , p , q ) = string::properties::Repetition::construct(pattern) ;
// for repSize == 0 or 1 use naive solution
if ( repSize == 0 || repSize == 1 ) {
for ( size_t i = 0 ; i <= n - m ; ++ i ) {
size_t j = 0 ;
while ( j < m && text[i + j ] == pat[j] ) ++ j ;
if ( j == m ) occ.insert(i) ;
}
return occ ;
}
size_t i = repSize/2 ;
while ( i <= n - m ) {
bool leftmostMismatch = std::equal(text.begin() + i + p , text.begin() + i + p + repSize/2 , text.begin() + i + q ) ;
if ( leftmostMismatch ) {
for ( size_t i_0 = i - repSize / 2 ; i_0 <= i ; ++ i_0 ){
if ( std::equal( pat.begin() , pat.end() , text.begin() + i_0 ) ) occ.insert(i_0) ;
}
}
i += repSize / 2 ;
}
// check last positions, where i jump into last window
for ( size_t j = i - repSize / 2 ; j <= n - m ; ++ j ) {
if ( std::equal( pat.begin() , pat.end() , text.begin() + j ) ) occ.insert(j) ;
}
return occ ;
}
} /* namespace exact */
} /* namespace stringology */
#endif /* _STRINGOLOGY_TAILED_SUBSTRING_H_ */
......@@ -30,6 +30,7 @@ TEST_CASE ( "ExactMatching", "[integration]" ) {
std::make_tuple ( "Exact Quite Naive", "stringology::exact::QuiteNaive $subject $pattern", false ),
std::make_tuple ( "Exact Galil Seiferas", "stringology::exact::GalilSeiferas $subject $pattern", false ),
std::make_tuple ( "Exact Sequential Sampling", "stringology::exact::SequentialSampling $subject $pattern", false ),
std::make_tuple ( "Exact CGR", "stringology::exact::CGR $subject $pattern", false ),
std::make_tuple ( "Exact Boyer Moore", "stringology::exact::BoyerMoore $subject $pattern", true ),
std::make_tuple ( "Exact Knuth Morris Pratt", "stringology::exact::KnuthMorrisPratt $subject $pattern", false ),
std::make_tuple ( "Exact Boyer Moore Horspool", " stringology::exact::BoyerMooreHorspool $subject $pattern", true ),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment