Skip to content
Snippets Groups Projects
Commit 4a66dea5 authored by Tomáš Pecka's avatar Tomáš Pecka Committed by Jan Trávníček
Browse files

algo: String Matching: Boyer-Moore-Horspool

Conflicts:
	astringology2/src/astringology.cpp
parent ff35da6e
No related branches found
No related tags found
No related merge requests found
/*
* BoyerMooreHorspool.cpp
*
* Created on: 5. 11. 2014
* Author: Radomir Polach, Tomas Pecka
*/
#include "BoyerMooreHorspool.h"
#include <exception/AlibException.h>
#include <string/LinearString.h>
#include <alphabet/Symbol.h>
#include <map>
namespace stringology {
namespace exact {
std::set<unsigned> BoyerMooreHorspool::match(const string::String& subject, const string::String& pattern) {
std::set<unsigned> data;
Accept((void*) &data, subject.getData(), pattern.getData(), BoyerMooreHorspool::BOYER_MOORE_HORSPOOL);
return data;
}
std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString& string, const string::LinearString& pattern)
{
std::set<unsigned> occ;
std::map<alphabet::Symbol, size_t> bcs;
/* Initialization of BCS to the length of the needle. */
for(const auto& symbol : string.getAlphabet())
bcs.insert(std::make_pair(symbol, pattern.getContent().size()));
/* Filling out BCS, ignoring last character. */
for(size_t i = 0; i < pattern.getContent().size() - 1; i++)
bcs[pattern.getContent().at(i)] = pattern.getContent().size() - i - 1;
/*
for(const auto& kv: bcs)
std::cout << std::string(kv.first) << " " << kv.second << std::endl;
for(const auto& s: string.getContent())
std::cout << std::string(s);std::cout << std::endl;
*/
size_t haystack_offset = 0;
while(haystack_offset + pattern.getContent().size() <= string.getContent().size())
{
size_t i = pattern.getContent().size();
while(string.getContent().at(haystack_offset + i - 1) == pattern.getContent().at(i - 1))
{
i--;
if(i == 0) // Yay, there is match!!!
{
occ.insert(haystack_offset);
haystack_offset ++;
break;
}
}
if(i != 0)
{
haystack_offset += bcs[string.getContent().at(haystack_offset + i - 1)];
}
//std::cout << haystack_offset << std::endl;
}
return occ;
}
void BoyerMooreHorspool::Visit(void*, const string::Epsilon&, const string::Epsilon&) const {
throw exception::AlibException("Unsupported string type Epsilon");
}
void BoyerMooreHorspool::Visit(void* data, const string::LinearString& subject, const string::LinearString& pattern) const {
std::set<unsigned> & res = *((std::set<unsigned>*) data);
res = this->match(subject, pattern);
}
void BoyerMooreHorspool::Visit(void*, const string::CyclicString&, const string::CyclicString&) const {
throw exception::AlibException("Unsupported string type CyclicString");
}
const BoyerMooreHorspool BoyerMooreHorspool::BOYER_MOORE_HORSPOOL;
} /* namespace exact */
} /* namespace stringology */
/*
* BoyerMooreHorspool.h
*
* Created on: 5. 11. 2014
* Author: Radomir Polach, Tomas Pecka
*/
#ifndef _BOYER_MOORE_HORSPOOL_H
#define _BOYER_MOORE_HORSPOOL_H
#include <string/String.h>
namespace stringology {
namespace exact {
/**
* Implementation of BMH for MI(E+\eps)-EVY course 2014
* To get rid of zeros in BCS table we ignore last haystack character
*/
class BoyerMooreHorspool : public string::VisitableStringBase::const_same_visitor_type {
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
static std::set<unsigned> match(const string::String& subject, const string::String& pattern);
static std::set<unsigned> match(const string::LinearString& subject, const string::LinearString& pattern);
private:
void Visit(void*, const string::Epsilon& subject, const string::Epsilon& pattern) const;
void Visit(void*, const string::LinearString& subject, const string::LinearString& pattern) const;
void Visit(void*, const string::CyclicString& subject, const string::CyclicString& pattern) const;
static const BoyerMooreHorspool BOYER_MOORE_HORSPOOL;
};
} /* namespace exact */
} /* namespace stringology */
#endif /* _BOYER_MOORE_HORSPOOL_H */
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <container/Container.h> #include <container/Container.h>
   
#include <string/naive/ExactMatch.h> #include <string/naive/ExactMatch.h>
#include <stringology/exact/BoyerMooreHorspool.h>
#include <stringology/exact/ExactMatchingAutomaton.h> #include <stringology/exact/ExactMatchingAutomaton.h>
#include <stringology/exact/BorderArray.h> #include <stringology/exact/BorderArray.h>
   
...@@ -25,6 +26,7 @@ int main(int argc, char* argv[]) { ...@@ -25,6 +26,7 @@ int main(int argc, char* argv[]) {
std::vector<std::string> allowed; std::vector<std::string> allowed;
allowed.push_back("exactMatchingAutomaton"); allowed.push_back("exactMatchingAutomaton");
allowed.push_back("exactMatch"); allowed.push_back("exactMatch");
allowed.push_back("boyerMooreHorspool");
allowed.push_back("borderArray"); allowed.push_back("borderArray");
TCLAP::ValuesConstraint<std::string> allowedVals( allowed ); TCLAP::ValuesConstraint<std::string> allowedVals( allowed );
   
...@@ -67,6 +69,12 @@ int main(int argc, char* argv[]) { ...@@ -67,6 +69,12 @@ int main(int argc, char* argv[]) {
std::set<unsigned> res = string::naive::ExactMatch::match(subject, pattern); std::set<unsigned> res = string::naive::ExactMatch::match(subject, pattern);
alib::XmlDataFactory::toStdout(res); alib::XmlDataFactory::toStdout(res);
return 0; return 0;
} else if( algorithm.getValue() == "boyerMooreHorspool") {
string::String subject = alib::XmlDataFactory::fromTokens<string::String>(subjectTokens);
string::String pattern = alib::XmlDataFactory::fromTokens<string::String>(patternTokens);
std::set<unsigned> res = stringology::exact::BoyerMooreHorspool::match(subject, pattern);
alib::XmlDataFactory::toStdout(res);
return 0;
} else if( algorithm.getValue() == "exactMatchingAutomaton") { } else if( algorithm.getValue() == "exactMatchingAutomaton") {
string::String pattern = alib::XmlDataFactory::fromTokens<string::String>(patternTokens); string::String pattern = alib::XmlDataFactory::fromTokens<string::String>(patternTokens);
automaton::Automaton automaton = stringology::exact::ExactMatchingAutomaton::construct(pattern); automaton::Automaton automaton = stringology::exact::ExactMatchingAutomaton::construct(pattern);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment