From 0c3a03ec90828305d2e91a85aef411df394f8c0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Sat, 1 Nov 2014 19:20:14 +0100 Subject: [PATCH] algo: stringology: Border Array --- .../src/stringology/exact/BorderArray.cpp | 64 +++++++++++++++++++ alib2algo/src/stringology/exact/BorderArray.h | 41 ++++++++++++ .../stringology/exact/borderArrayTest.cpp | 23 +++++++ .../stringology/exact/borderArrayTest.h | 19 ++++++ alib2data/src/string/String.cpp | 4 +- astringology2/src/astringology.cpp | 11 +++- 6 files changed, 159 insertions(+), 3 deletions(-) create mode 100644 alib2algo/src/stringology/exact/BorderArray.cpp create mode 100644 alib2algo/src/stringology/exact/BorderArray.h create mode 100644 alib2algo/test-src/stringology/exact/borderArrayTest.cpp create mode 100644 alib2algo/test-src/stringology/exact/borderArrayTest.h diff --git a/alib2algo/src/stringology/exact/BorderArray.cpp b/alib2algo/src/stringology/exact/BorderArray.cpp new file mode 100644 index 0000000000..6d8ad7274b --- /dev/null +++ b/alib2algo/src/stringology/exact/BorderArray.cpp @@ -0,0 +1,64 @@ +/* + * BorderArray.cpp + * + * Created on: 1. 11. 2014 + * Author: Tomas Pecka + */ + +#include "BorderArray.h" + +#include <container/ObjectsVector.h> +#include <container/ObjectsPair.h> +#include <exception/AlibException.h> +#include <object/Object.h> +#include <primitive/Integer.h> +#include <string/LinearString.h> + +namespace stringology { + +namespace exact { + +std::vector<unsigned> BorderArray::construct(const string::String& string) { + std::vector<unsigned> out; + string.getData().Accept((void*) &out, BorderArray::BORDER_ARRAY); + return out; +} + +std::vector<unsigned> BorderArray::construct(const string::LinearString& string) { + const auto& w = string.getContent(); + std::vector<unsigned> res(w.size() + 1); + + res[0] = 0; + res[1] = 0; + for(size_t i = 1; i < w.size(); i++) { + unsigned b = res[i]; + while (b > 0 && w[i + 1 - 1] != w[b + 1 - 1]) + b = res[b]; + + if(w[i + 1 - 1] == w[b + 1 - 1]) + res[i + 1] = b + 1; + else + res[i + 1] = 0; + } + + return res; +} + +void BorderArray::Visit(void* data, const string::LinearString& string) const { + std::vector<unsigned> & out = *(std::vector<unsigned>*) data; + out = this->construct(string); +} + +void BorderArray::Visit(void*, const string::Epsilon&) const { + throw exception::AlibException("Unsupported string type Epsilon"); +} + +void BorderArray::Visit(void*, const string::CyclicString&) const { + throw exception::AlibException("Unsupported string type CyclicString"); +} + +const BorderArray BorderArray::BORDER_ARRAY; + +} /* namespace exact */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/BorderArray.h b/alib2algo/src/stringology/exact/BorderArray.h new file mode 100644 index 0000000000..c1283cbefa --- /dev/null +++ b/alib2algo/src/stringology/exact/BorderArray.h @@ -0,0 +1,41 @@ +/* + * BorderArray.h + * + * Created on: 1. 11. 2014 + * Author: Tomas Pecka + */ + +#ifndef _BORDER_ARRAY_H_ +#define _BORDER_ARRAY_H_ + +#include <vector> +#include <string/String.h> + +namespace stringology { + +namespace exact { + +class BorderArray : public string::VisitableStringBase::const_visitor_type { +public: + /** + * Computes border array of string + * @param string string to compute border array for + * @return Vector of length same as string, where i-th index corresponds to i-th element of string + */ + static std::vector<unsigned> construct(const string::String& string); + + static std::vector<unsigned> construct(const string::LinearString& string); + +private: + void Visit(void*, const string::Epsilon& pattern) const; + void Visit(void*, const string::LinearString& pattern) const; + void Visit(void*, const string::CyclicString& pattern) const; + + static const BorderArray BORDER_ARRAY; +}; + +} /* namespace exact */ + +} /* namespace stringology */ + +#endif /* _BORDER_ARRAY_H_ */ diff --git a/alib2algo/test-src/stringology/exact/borderArrayTest.cpp b/alib2algo/test-src/stringology/exact/borderArrayTest.cpp new file mode 100644 index 0000000000..ca607cdb10 --- /dev/null +++ b/alib2algo/test-src/stringology/exact/borderArrayTest.cpp @@ -0,0 +1,23 @@ +#include "borderArrayTest.h" + +#include "string/String.h" +#include "stringology/exact/BorderArray.h" + +#define CPPUNIT_IMPLY(x, y) CPPUNIT_ASSERT(!(x) || (y)) + +CPPUNIT_TEST_SUITE_REGISTRATION( borderArrayTest ); + +void borderArrayTest::setUp() { +} + +void borderArrayTest::tearDown() { +} + +void borderArrayTest::testBorderArray() { + string::String string = string::stringFrom("alfalfaalf"); + std::vector<unsigned> borderArray = stringology::exact::BorderArray::construct(string); + std::vector<unsigned> expected {0, 0, 0, 0, 1, 2, 3, 1, 1, 2, 3}; + + CPPUNIT_ASSERT(borderArray != expected); +} + diff --git a/alib2algo/test-src/stringology/exact/borderArrayTest.h b/alib2algo/test-src/stringology/exact/borderArrayTest.h new file mode 100644 index 0000000000..6d5fcb293c --- /dev/null +++ b/alib2algo/test-src/stringology/exact/borderArrayTest.h @@ -0,0 +1,19 @@ +#ifndef BORDER_ARRAY_TEST +#define BORDER_ARRAY_TEST + +#include <cppunit/extensions/HelperMacros.h> + +class borderArrayTest : public CppUnit::TestFixture +{ + CPPUNIT_TEST_SUITE( borderArrayTest ); + CPPUNIT_TEST( testBorderArray ); + CPPUNIT_TEST_SUITE_END(); + +public: + void setUp(); + void tearDown(); + + void testBorderArray(); +}; + +#endif // BORDER_ARRAY_TEST diff --git a/alib2data/src/string/String.cpp b/alib2data/src/string/String.cpp index 8fc68125b5..8d08af4309 100644 --- a/alib2data/src/string/String.cpp +++ b/alib2data/src/string/String.cpp @@ -11,11 +11,11 @@ namespace string { -string::String symbolFrom( const alphabet::Symbol& symbol ) { +string::String stringFrom( const alphabet::Symbol& symbol ) { return string::String { string::LinearString { std::vector<alphabet::Symbol> { symbol } } }; } -string::String symbolFrom( const std::string& string ) { +string::String stringFrom( const std::string& string ) { return string::String { string::LinearString { string } }; } diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index c5f9a00f34..ebb6e83b19 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -12,8 +12,11 @@ #include <exception/AlibException.h> #include <string/String.h> #include <automaton/Automaton.h> +#include <container/Container.h> + #include <string/naive/ExactMatch.h> #include <stringology/exact/ExactMatchingAutomaton.h> +#include <stringology/exact/BorderArray.h> int main(int argc, char* argv[]) { try { @@ -22,6 +25,7 @@ int main(int argc, char* argv[]) { std::vector<std::string> allowed; allowed.push_back("exactMatchingAutomaton"); allowed.push_back("exactMatch"); + allowed.push_back("borderArray"); TCLAP::ValuesConstraint<std::string> allowedVals( allowed ); TCLAP::ValueArg<std::string> algorithm( "a", "algorithm", "Execute algorithm", false, "exactMatch", &allowedVals); @@ -31,7 +35,7 @@ int main(int argc, char* argv[]) { cmd.add( subject ); TCLAP::ValueArg<std::string> pattern( "p", "pattern", "Pattern string from file", false, "-", "file"); - cmd.add( pattern ); + cmd.add( pattern ); cmd.parse(argc,argv); @@ -68,6 +72,11 @@ int main(int argc, char* argv[]) { automaton::Automaton automaton = stringology::exact::ExactMatchingAutomaton::construct(pattern); alib::XmlDataFactory::toStdout(automaton); return 0; + } else if( algorithm.getValue() == "borderArray") { + string::String subject = alib::XmlDataFactory::fromTokens<string::String>(subjectTokens); + std::vector<unsigned> borderArray = stringology::exact::BorderArray::construct(subject); + alib::XmlDataFactory::toStdout(borderArray); + return 0; } else { throw exception::AlibException( "Invalid algorithm" ); return 1; -- GitLab