diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a2f6876bf41ee57360f9040b9f2b2678b2e1bec0 --- /dev/null +++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp @@ -0,0 +1,88 @@ +/* + * BoyerMooreHorspool.cpp + * + * Created on: 5. 11. 2014 + * Author: Radomir Polach, Tomas Pecka + */ + +#include "BoyerMooreHorspool.h" + +#include <exception/AlibException.h> +#include <string/LinearString.h> +#include <alphabet/Symbol.h> + +#include <map> + +namespace stringology { + +namespace exact { + +std::set<unsigned> BoyerMooreHorspool::match(const string::String& subject, const string::String& pattern) { + std::set<unsigned> data; + Accept((void*) &data, subject.getData(), pattern.getData(), BoyerMooreHorspool::BOYER_MOORE_HORSPOOL); + return data; +} + +std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString& string, const string::LinearString& pattern) +{ + std::set<unsigned> occ; + std::map<alphabet::Symbol, size_t> bcs; + + /* Initialization of BCS to the length of the needle. */ + for(const auto& symbol : string.getAlphabet()) + bcs.insert(std::make_pair(symbol, pattern.getContent().size())); + + /* Filling out BCS, ignoring last character. */ + for(size_t i = 0; i < pattern.getContent().size() - 1; i++) + bcs[pattern.getContent().at(i)] = pattern.getContent().size() - i - 1; + + /* + for(const auto& kv: bcs) + std::cout << std::string(kv.first) << " " << kv.second << std::endl; + for(const auto& s: string.getContent()) + std::cout << std::string(s);std::cout << std::endl; + */ + + size_t haystack_offset = 0; + while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) + { + size_t i = pattern.getContent().size(); + while(string.getContent().at(haystack_offset + i - 1) == pattern.getContent().at(i - 1)) + { + i--; + + if(i == 0) // Yay, there is match!!! + { + occ.insert(haystack_offset); + haystack_offset ++; + break; + } + } + + if(i != 0) + { + haystack_offset += bcs[string.getContent().at(haystack_offset + i - 1)]; + } + //std::cout << haystack_offset << std::endl; + } + return occ; +} + +void BoyerMooreHorspool::Visit(void*, const string::Epsilon&, const string::Epsilon&) const { + throw exception::AlibException("Unsupported string type Epsilon"); +} + +void BoyerMooreHorspool::Visit(void* data, const string::LinearString& subject, const string::LinearString& pattern) const { + std::set<unsigned> & res = *((std::set<unsigned>*) data); + res = this->match(subject, pattern); +} + +void BoyerMooreHorspool::Visit(void*, const string::CyclicString&, const string::CyclicString&) const { + throw exception::AlibException("Unsupported string type CyclicString"); +} + +const BoyerMooreHorspool BoyerMooreHorspool::BOYER_MOORE_HORSPOOL; + +} /* namespace exact */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.h b/alib2algo/src/stringology/exact/BoyerMooreHorspool.h new file mode 100644 index 0000000000000000000000000000000000000000..e42f44b3e10eae21dca2c898d1eaa1f64d83f595 --- /dev/null +++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.h @@ -0,0 +1,42 @@ +/* + * BoyerMooreHorspool.h + * + * Created on: 5. 11. 2014 + * Author: Radomir Polach, Tomas Pecka + */ + +#ifndef _BOYER_MOORE_HORSPOOL_H +#define _BOYER_MOORE_HORSPOOL_H + +#include <string/String.h> + +namespace stringology { + +namespace exact { + +/** + * Implementation of BMH for MI(E+\eps)-EVY course 2014 + * To get rid of zeros in BCS table we ignore last haystack character + */ +class BoyerMooreHorspool : public string::VisitableStringBase::const_same_visitor_type { +public: + /** + * Search for pattern in linear string. + * @return set set of occurences + */ + static std::set<unsigned> match(const string::String& subject, const string::String& pattern); + + static std::set<unsigned> match(const string::LinearString& subject, const string::LinearString& pattern); +private: + void Visit(void*, const string::Epsilon& subject, const string::Epsilon& pattern) const; + void Visit(void*, const string::LinearString& subject, const string::LinearString& pattern) const; + void Visit(void*, const string::CyclicString& subject, const string::CyclicString& pattern) const; + + static const BoyerMooreHorspool BOYER_MOORE_HORSPOOL; +}; + +} /* namespace exact */ + +} /* namespace stringology */ + +#endif /* _BOYER_MOORE_HORSPOOL_H */ diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index ebb6e83b19260e868a5b9bdeddfd2e8ff7b08bdf..57f71437c6d97fdd4db48b72fd23784955d4fc15 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -15,6 +15,7 @@ #include <container/Container.h> #include <string/naive/ExactMatch.h> +#include <stringology/exact/BoyerMooreHorspool.h> #include <stringology/exact/ExactMatchingAutomaton.h> #include <stringology/exact/BorderArray.h> @@ -25,6 +26,7 @@ int main(int argc, char* argv[]) { std::vector<std::string> allowed; allowed.push_back("exactMatchingAutomaton"); allowed.push_back("exactMatch"); + allowed.push_back("boyerMooreHorspool"); allowed.push_back("borderArray"); TCLAP::ValuesConstraint<std::string> allowedVals( allowed ); @@ -67,6 +69,12 @@ int main(int argc, char* argv[]) { std::set<unsigned> res = string::naive::ExactMatch::match(subject, pattern); alib::XmlDataFactory::toStdout(res); return 0; + } else if( algorithm.getValue() == "boyerMooreHorspool") { + string::String subject = alib::XmlDataFactory::fromTokens<string::String>(subjectTokens); + string::String pattern = alib::XmlDataFactory::fromTokens<string::String>(patternTokens); + std::set<unsigned> res = stringology::exact::BoyerMooreHorspool::match(subject, pattern); + alib::XmlDataFactory::toStdout(res); + return 0; } else if( algorithm.getValue() == "exactMatchingAutomaton") { string::String pattern = alib::XmlDataFactory::fromTokens<string::String>(patternTokens); automaton::Automaton automaton = stringology::exact::ExactMatchingAutomaton::construct(pattern);