From 1d66e5646f22ee3472688ad172ea8859d6e603ba Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Fri, 5 May 2017 13:20:31 +0200 Subject: [PATCH] remove BNDM from experimental --- .../BackwardNondeterministicDAWGMatching.cpp | 130 ------------------ .../BackwardNondeterministicDAWGMatching.hpp | 53 ------- ...ckwardNondeterministicDAWGMatchingTest.cpp | 55 -------- ...BackwardNondeterministicDAWGMatchingTest.h | 18 --- 4 files changed, 256 deletions(-) delete mode 100644 alib2algo_experimental/src/stringology/exact/BackwardNondeterministicDAWGMatching.cpp delete mode 100644 alib2algo_experimental/src/stringology/exact/BackwardNondeterministicDAWGMatching.hpp delete mode 100644 alib2algo_experimental/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.cpp delete mode 100644 alib2algo_experimental/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.h diff --git a/alib2algo_experimental/src/stringology/exact/BackwardNondeterministicDAWGMatching.cpp b/alib2algo_experimental/src/stringology/exact/BackwardNondeterministicDAWGMatching.cpp deleted file mode 100644 index f0262689ac..0000000000 --- a/alib2algo_experimental/src/stringology/exact/BackwardNondeterministicDAWGMatching.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Author: Radovan Cerveny - */ - -#include "BackwardNondeterministicDAWGMatching.hpp" - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <map> -#include <bitset> -#include <measure> - -namespace stringology { - -namespace exact { - -template < size_t BitmaskBitCount > -std::set < unsigned > BackwardNondeterministicDAWGMatching::matchTemplate ( const string::String & subject, const string::String & pattern ) { - return dispatch ( subject.getData ( ), pattern.getData ( ) ); -} - -template < size_t BitmaskBitCount > -std::set < unsigned > BackwardNondeterministicDAWGMatching::matchTemplate ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - - // Setup helper variables - using BitmaskType = std::bitset < BitmaskBitCount >; - bool patternIsLong = BitmaskBitCount < pattern.getContent ( ).size ( ); - size_t bitmaskLength = patternIsLong ? BitmaskBitCount : pattern.getContent ( ).size ( ); - - measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); - - std::map < DefaultSymbolType, BitmaskType > symbolBitmaskLookupTable; - - // Initialize the bitmasks with zeros for each symbol in the alphabet - for ( const auto & symbol : pattern.getAlphabet ( ) ) - symbolBitmaskLookupTable[symbol] = BitmaskType ( 0 ); - - // Mark the position in the bitmask for each symbol in the pattern - for ( size_t i = 0; i < bitmaskLength; i++ ) - symbolBitmaskLookupTable[pattern.getContent ( ).at ( i )].set ( bitmaskLength - i - 1 ); - - measurements::end ( ); - - measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); - - size_t posInSubject = 0; - BitmaskType currentBitmask; - - while ( posInSubject <= subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) ) { - size_t posInPattern = bitmaskLength; - size_t lastPosOfFactor = bitmaskLength; - - // Set the bitmask to all ones - currentBitmask.set ( ); - - while ( posInPattern > 0 && currentBitmask.any ( ) ) { - currentBitmask &= symbolBitmaskLookupTable[subject.getContent ( ).at ( posInSubject + posInPattern - 1 )]; - posInPattern--; - - // Test whether the most significant bit is set - if ( currentBitmask.test ( bitmaskLength - 1 ) ) { - if ( posInPattern > 0 ) { - lastPosOfFactor = posInPattern; - } else { - if ( !patternIsLong ) { - // Yay, there is match!!! - occ.insert ( posInSubject ); - } else { - // if the pattern is longer then BitmaskBitCount characters switch to brute force check - size_t k = bitmaskLength; - - while ( k < pattern.getContent ( ).size ( ) && pattern.getContent ( ).at ( k ) == subject.getContent ( ).at ( posInSubject + k ) ) k++; - - if ( k == pattern.getContent ( ).size ( ) ) - // Yay, there is match!!! - occ.insert ( posInSubject ); - } - } - } - - currentBitmask <<= 1; - } - - posInSubject += lastPosOfFactor; - } - - measurements::end ( ); - - return occ; -} - -std::set < unsigned > BackwardNondeterministicDAWGMatching::match ( const string::String & subject, const string::String & pattern ) { - return BackwardNondeterministicDAWGMatching::match32 ( subject, pattern ); -} - -std::set < unsigned > BackwardNondeterministicDAWGMatching::match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - return BackwardNondeterministicDAWGMatching::match32 ( subject, pattern ); -} - -std::set < unsigned > BackwardNondeterministicDAWGMatching::match32 ( const string::String & subject, const string::String & pattern ) { - return BackwardNondeterministicDAWGMatching::matchTemplate < 32 > ( subject, pattern ); -} - -std::set < unsigned > BackwardNondeterministicDAWGMatching::match32 ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - return BackwardNondeterministicDAWGMatching::matchTemplate < 32 > ( subject, pattern ); -} - -std::set < unsigned > BackwardNondeterministicDAWGMatching::match64 ( const string::String & subject, const string::String & pattern ) { - return BackwardNondeterministicDAWGMatching::matchTemplate < 64 > ( subject, pattern ); -} - -std::set < unsigned > BackwardNondeterministicDAWGMatching::match64 ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - return BackwardNondeterministicDAWGMatching::matchTemplate < 64 > ( subject, pattern ); -} - -std::set < unsigned > BackwardNondeterministicDAWGMatching::match128 ( const string::String & subject, const string::String & pattern ) { - return BackwardNondeterministicDAWGMatching::matchTemplate < 128 > ( subject, pattern ); -} - -std::set < unsigned > BackwardNondeterministicDAWGMatching::match128 ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - return BackwardNondeterministicDAWGMatching::matchTemplate < 128 > ( subject, pattern ); -} - -auto BackwardNondeterministicDAWGMatchingLinearStringLinearString = BackwardNondeterministicDAWGMatching::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( BackwardNondeterministicDAWGMatching::match ); - -} /* namespace exact */ - -} /* namespace stringology */ diff --git a/alib2algo_experimental/src/stringology/exact/BackwardNondeterministicDAWGMatching.hpp b/alib2algo_experimental/src/stringology/exact/BackwardNondeterministicDAWGMatching.hpp deleted file mode 100644 index b3b6b1f339..0000000000 --- a/alib2algo_experimental/src/stringology/exact/BackwardNondeterministicDAWGMatching.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Author: Radovan Cerveny - */ - -#ifndef STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_HPP__ -#define STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_HPP__ - -#include <string/String.h> -#include <string/StringFeatures.h> -#include <core/multipleDispatch.hpp> - -#include <set> - -namespace stringology { - -namespace exact { - -/** - * Implementation of Backward Nondeterministic DAWG Matching using bit parallelism with 32/64/128bit bitmask and brute force switch for longer patterns. - */ -class BackwardNondeterministicDAWGMatching : public std::DoubleDispatch < BackwardNondeterministicDAWGMatching, std::set < unsigned >, const string::StringBase &, const string::StringBase & > { -private: - /** - * Search for pattern in linear string. - * @return set set of occurences - */ - template <size_t BitmaskBitCount> - static std::set < unsigned > matchTemplate ( const string::String & subject, const string::String & pattern ); - - template <size_t BitmaskBitCount > - static std::set < unsigned > matchTemplate ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); -public: - - // Defaults to 32 bits - static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); - - static std::set < unsigned > match32 ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match32 ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); - - static std::set < unsigned > match64 ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match64 ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); - - static std::set < unsigned > match128 ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match128 ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); - -}; - -} /* namespace exact */ - -} /* namespace stringology */ - -#endif /* STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_HPP__ */ diff --git a/alib2algo_experimental/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.cpp b/alib2algo_experimental/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.cpp deleted file mode 100644 index e7fe488512..0000000000 --- a/alib2algo_experimental/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "BackwardNondeterministicDAWGMatchingTest.h" - -#include "string/String.h" -#include "stringology/exact/BackwardNondeterministicDAWGMatching.hpp" -#include "stringology/exact/ExactFactorMatch.h" - -#include "string/generate/RandomStringFactory.h" -#include "string/generate/RandomSubstringFactory.h" - -CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( BackwardNondeterministicDAWGMatchingTest, "stringology" ); -CPPUNIT_TEST_SUITE_REGISTRATION ( BackwardNondeterministicDAWGMatchingTest ); - -void BackwardNondeterministicDAWGMatchingTest::setUp ( ) { -} - -void BackwardNondeterministicDAWGMatchingTest::tearDown ( ) { -} - -void BackwardNondeterministicDAWGMatchingTest::testBNDM ( ) { - - std::vector<std::string> subjects; - std::vector<std::string> patterns; - std::vector<std::set<unsigned>> expectedOccs; - - subjects.push_back("a"); patterns.push_back("a"); expectedOccs.push_back({0}); - subjects.push_back("a"); patterns.push_back("b"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfa"); expectedOccs.push_back({0}); - subjects.push_back("alfalfalfa"); patterns.push_back("blfalfalfa"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfb"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({0}); - subjects.push_back("alfalfalfaalfalfalfaabfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0}); - subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8}); - subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9}); - - - for(size_t i = 0; i < subjects.size(); ++i) { - string::String subject = string::stringFrom ( subjects[i] ); - string::String pattern = string::stringFrom ( patterns[i] ); - std::set < unsigned > res = stringology::exact::BackwardNondeterministicDAWGMatching::match ( subject, pattern ); - CPPUNIT_ASSERT ( res == expectedOccs[i] ); - res = stringology::exact::BackwardNondeterministicDAWGMatching::match64 ( subject, pattern ); - CPPUNIT_ASSERT ( res == expectedOccs[i] ); - res = stringology::exact::BackwardNondeterministicDAWGMatching::match128 ( subject, pattern ); - CPPUNIT_ASSERT ( res == expectedOccs[i] ); - std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl; - } - - auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64 * 64, 512, false, true); - auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 32 * 32, longSubject ); - std::set < unsigned > res = stringology::exact::BackwardNondeterministicDAWGMatching::match ( longSubject, longPattern ); - std::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); - std::cout << "long: " << res << std::endl; - CPPUNIT_ASSERT ( res == resRef); -} diff --git a/alib2algo_experimental/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.h b/alib2algo_experimental/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.h deleted file mode 100644 index 016be2e42a..0000000000 --- a/alib2algo_experimental/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_ -#define BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_ - -#include <cppunit/extensions/HelperMacros.h> - -class BackwardNondeterministicDAWGMatchingTest : public CppUnit::TestFixture { - CPPUNIT_TEST_SUITE ( BackwardNondeterministicDAWGMatchingTest ); - CPPUNIT_TEST ( testBNDM ); - CPPUNIT_TEST_SUITE_END ( ); - -public: - void setUp ( ); - void tearDown ( ); - - void testBNDM ( ); -}; - -#endif // BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_ -- GitLab