From a2c1733702666053a8f61635fda121a113f41b36 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Wed, 17 May 2017 15:41:02 +0200 Subject: [PATCH] move dawg and oracle matching to algo --- .../query/BackwardDAWGMatching.cpp | 69 +++++++++++++++++ .../stringology/query}/BackwardDAWGMatching.h | 20 ++--- .../query/BackwardOracleMatching.cpp | 65 ++++++++++++++++ .../query}/BackwardOracleMatching.h | 20 ++--- .../stringology/query/DAWGMatcherTest.cpp | 55 ++++++++++++++ .../stringology/query}/DAWGMatcherTest.h | 0 .../stringology/query/OracleMatcherTest.cpp | 53 +++++++++++++ .../stringology/query/OracleMatcherTest.h | 19 +++++ .../exact/BackwardDAWGMatching.cpp | 76 ------------------- .../exact/BackwardOracleMatching.cpp | 72 ------------------ .../stringology/exact/DAWGMatcherTest.cpp | 52 ------------- .../stringology/exact/OracleMatcherTest.cpp | 50 ------------ .../stringology/exact/OracleMatcherTest.h | 19 ----- astringology2/src/astringology.cpp | 12 +-- 14 files changed, 289 insertions(+), 293 deletions(-) create mode 100644 alib2algo/src/stringology/query/BackwardDAWGMatching.cpp rename {alib2algo_experimental/src/stringology/exact => alib2algo/src/stringology/query}/BackwardDAWGMatching.h (53%) create mode 100644 alib2algo/src/stringology/query/BackwardOracleMatching.cpp rename {alib2algo_experimental/src/stringology/exact => alib2algo/src/stringology/query}/BackwardOracleMatching.h (53%) create mode 100644 alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp rename {alib2algo_experimental/test-src/stringology/exact => alib2algo/test-src/stringology/query}/DAWGMatcherTest.h (100%) create mode 100644 alib2algo/test-src/stringology/query/OracleMatcherTest.cpp create mode 100644 alib2algo/test-src/stringology/query/OracleMatcherTest.h delete mode 100644 alib2algo_experimental/src/stringology/exact/BackwardDAWGMatching.cpp delete mode 100644 alib2algo_experimental/src/stringology/exact/BackwardOracleMatching.cpp delete mode 100644 alib2algo_experimental/test-src/stringology/exact/DAWGMatcherTest.cpp delete mode 100644 alib2algo_experimental/test-src/stringology/exact/OracleMatcherTest.cpp delete mode 100644 alib2algo_experimental/test-src/stringology/exact/OracleMatcherTest.h diff --git a/alib2algo/src/stringology/query/BackwardDAWGMatching.cpp b/alib2algo/src/stringology/query/BackwardDAWGMatching.cpp new file mode 100644 index 0000000000..e93db0dd56 --- /dev/null +++ b/alib2algo/src/stringology/query/BackwardDAWGMatching.cpp @@ -0,0 +1,69 @@ +/* + * Author: Radovan Cerveny + */ + +#include "BackwardDAWGMatching.h" +#include "stringology/matching/DAWGMatcherConstruction.h" +#include "stringology/properties/BackboneLength.h" + +#include <string/LinearString.h> +#include <alphabet/Symbol.h> + +#include <algorithm> +#include <map> +#include <measure> + +namespace stringology { + +namespace query { + +std::set < unsigned > BackwardDAWGMatching::match ( const string::String & subject, const automaton::Automaton & suffixAutomaton ) { + return dispatch ( subject.getData ( ), suffixAutomaton.getData ( ) ); +} + +std::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString < > & subject, const automaton::DFA < > & suffixAutomaton ) { + std::set < unsigned > occ; + + size_t patternSize = stringology::properties::BackboneLength::length ( suffixAutomaton ); + size_t subjectSize = subject.getContent ( ).size ( ); + + const DefaultStateType failState = DefaultStateType ( -1 ); + + size_t posInSubject = 0; + + while ( posInSubject <= subjectSize - patternSize ) { + DefaultStateType currentState = suffixAutomaton.getInitialState ( ); + + size_t posInPattern = patternSize; + size_t lastPrefixPos = posInPattern; + + while ( posInPattern > 0 && currentState != failState ) { + auto transition = suffixAutomaton.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); + + if ( transition == suffixAutomaton.getTransitions ( ).end ( ) ) + currentState = failState; + else + currentState = transition->second; + + posInPattern--; + + // found a prefix of nonreversed pattern that does not correspond to the entire pattern + if ( ( posInPattern != 0 ) && ( suffixAutomaton.getFinalStates ( ).find ( currentState ) != suffixAutomaton.getFinalStates ( ).end ( ) ) ) + lastPrefixPos = posInPattern; + } + + if ( currentState != failState ) + // Yay, there is match!!! + occ.insert ( posInSubject ); + + posInSubject += lastPrefixPos; + } + + return occ; +} + +auto BackwardDAWGMatchingLinearStringLinearString = BackwardDAWGMatching::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, automaton::DFA < > > ( BackwardDAWGMatching::match ); + +} /* namespace query */ + +} /* namespace stringology */ diff --git a/alib2algo_experimental/src/stringology/exact/BackwardDAWGMatching.h b/alib2algo/src/stringology/query/BackwardDAWGMatching.h similarity index 53% rename from alib2algo_experimental/src/stringology/exact/BackwardDAWGMatching.h rename to alib2algo/src/stringology/query/BackwardDAWGMatching.h index a1aac21748..9b2bc62846 100644 --- a/alib2algo_experimental/src/stringology/exact/BackwardDAWGMatching.h +++ b/alib2algo/src/stringology/query/BackwardDAWGMatching.h @@ -7,29 +7,31 @@ #include <string/String.h> #include <string/StringFeatures.h> +#include <automaton/Automaton.h> +#include <automaton/AutomatonFeatures.h> #include <core/multipleDispatch.hpp> #include <set> namespace stringology { -namespace exact { +namespace query { /** * Implementation of Backward DAWG Matching. */ -class BackwardDAWGMatching : public std::DoubleDispatch < BackwardDAWGMatching, std::set < unsigned >, const string::StringBase &, const string::StringBase & > { +class BackwardDAWGMatching : public std::DoubleDispatch < BackwardDAWGMatching, std::set < unsigned >, const string::StringBase &, const automaton::AutomatonBase & > { public: - /** - * Search for pattern in linear string. - * @return set set of occurences - */ - static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); + /** + * Search for pattern in linear string. + * @return set set of occurences + */ + static std::set < unsigned > match ( const string::String & subject, const automaton::Automaton & suffixAutomaton ); + static std::set < unsigned > match ( const string::LinearString < > & subject, const automaton::DFA < > & suffixAutomaton ); }; -} /* namespace exact */ +} /* namespace query */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/query/BackwardOracleMatching.cpp b/alib2algo/src/stringology/query/BackwardOracleMatching.cpp new file mode 100644 index 0000000000..57da6ea3b5 --- /dev/null +++ b/alib2algo/src/stringology/query/BackwardOracleMatching.cpp @@ -0,0 +1,65 @@ +/* + * Author: Radovan Cerveny + */ + +#include "BackwardOracleMatching.h" +#include "stringology/matching/OracleMatcherConstruction.h" +#include "stringology/properties/BackboneLength.h" + +#include <string/LinearString.h> +#include <alphabet/Symbol.h> + +#include <algorithm> +#include <map> +#include <measure> + +namespace stringology { + +namespace query { + +std::set < unsigned > BackwardOracleMatching::match ( const string::String & subject, const automaton::Automaton & factorOracle ) { + return dispatch ( subject.getData ( ), factorOracle.getData ( ) ); +} + +std::set < unsigned > BackwardOracleMatching::match ( const string::LinearString < > & subject, const automaton::DFA < > & factorOracle ) { + std::set < unsigned > occ; + + size_t patternSize = stringology::properties::BackboneLength::length ( factorOracle ); + size_t subjectSize = subject.getContent ( ).size ( ); + + const DefaultStateType failState = DefaultStateType ( -1 ); + + size_t posInSubject = 0; + + while ( posInSubject <= subjectSize - patternSize ) { + + DefaultStateType currentState = factorOracle.getInitialState ( ); + + size_t posInPattern = patternSize; + + while ( posInPattern > 0 && currentState != failState ) { + auto transition = factorOracle.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); + + if ( transition == factorOracle.getTransitions ( ).end ( ) ) + currentState = failState; + else + currentState = transition->second; + + posInPattern--; + } + + if ( currentState != failState ) + // Yay, there is match!!! + occ.insert ( posInSubject ); + + posInSubject += posInPattern + 1; + } + + return occ; +} + +auto BackwardOracleMatchingLinearStringLinearString = BackwardOracleMatching::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, automaton::DFA < > > ( BackwardOracleMatching::match ); + +} /* namespace query */ + +} /* namespace stringology */ diff --git a/alib2algo_experimental/src/stringology/exact/BackwardOracleMatching.h b/alib2algo/src/stringology/query/BackwardOracleMatching.h similarity index 53% rename from alib2algo_experimental/src/stringology/exact/BackwardOracleMatching.h rename to alib2algo/src/stringology/query/BackwardOracleMatching.h index 6f2320fc12..c6df8549c6 100644 --- a/alib2algo_experimental/src/stringology/exact/BackwardOracleMatching.h +++ b/alib2algo/src/stringology/query/BackwardOracleMatching.h @@ -7,29 +7,31 @@ #include <string/String.h> #include <string/StringFeatures.h> +#include <automaton/Automaton.h> +#include <automaton/AutomatonFeatures.h> #include <core/multipleDispatch.hpp> #include <set> namespace stringology { -namespace exact { +namespace query { /** * Implementation of Backward Oracle Matching. */ -class BackwardOracleMatching : public std::DoubleDispatch <BackwardOracleMatching, std::set < unsigned >, const string::StringBase &, const string::StringBase & > { +class BackwardOracleMatching : public std::DoubleDispatch <BackwardOracleMatching, std::set < unsigned >, const string::StringBase &, const automaton::AutomatonBase & > { public: - /** - * Search for pattern in linear string. - * @return set set of occurences - */ - static std::set < unsigned > match ( const string::String & subject, const string::String & pattern ); - static std::set < unsigned > match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ); + /** + * Search for pattern in linear string. + * @return set set of occurences + */ + static std::set < unsigned > match ( const string::String & subject, const automaton::Automaton & factorOracle ); + static std::set < unsigned > match ( const string::LinearString < > & subject, const automaton::DFA < > & factorOracle ); }; -} /* namespace exact */ +} /* namespace query */ } /* namespace stringology */ diff --git a/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp b/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp new file mode 100644 index 0000000000..0a724ab8b0 --- /dev/null +++ b/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp @@ -0,0 +1,55 @@ +#include "DAWGMatcherTest.h" + +#include "string/LinearString.h" +#include "stringology/matching/DAWGMatcherConstruction.h" +#include "stringology/query/BackwardDAWGMatching.h" +#include "stringology/exact/ExactFactorMatch.h" + +#include "string/generate/RandomStringFactory.h" +#include "string/generate/RandomSubstringFactory.h" + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( DAWGMatcherTest, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( DAWGMatcherTest ); + +void DAWGMatcherTest::setUp ( ) { +} + +void DAWGMatcherTest::tearDown ( ) { +} + +void DAWGMatcherTest::testBackwardDAWGMatching ( ) { + std::vector<std::string> subjects; + std::vector<std::string> patterns; + std::vector<std::set<unsigned>> expectedOccs; + + subjects.push_back("a"); patterns.push_back("a"); expectedOccs.push_back({0}); + subjects.push_back("a"); patterns.push_back("b"); expectedOccs.push_back({}); + subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfa"); expectedOccs.push_back({0}); + subjects.push_back("alfalfalfa"); patterns.push_back("blfalfalfa"); expectedOccs.push_back({}); + subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfb"); expectedOccs.push_back({}); + subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({0}); + subjects.push_back("alfalfalfaalfalfalfaabfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({}); + subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0}); + subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8}); + subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9}); + subjects.push_back("aaaaaaaaaa"); patterns.push_back("aa"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8}); + + + for(size_t i = 0; i < subjects.size(); ++i) { + string::String subject = string::stringFrom ( subjects[i] ); + string::String pattern = string::stringFrom ( patterns[i] ); + automaton::Automaton suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( pattern ); + std::set < unsigned > res = stringology::query::BackwardDAWGMatching::match ( subject, suffixAutomaton ); + std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl; + CPPUNIT_ASSERT ( res == expectedOccs[i] ); + } + + auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true); + auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 5, longSubject ); + automaton::DFA < > suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( longPattern ); + std::set < unsigned > res = stringology::query::BackwardDAWGMatching::match ( longSubject, suffixAutomaton ); + std::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); + std::cout << "long: " << res << std::endl; + CPPUNIT_ASSERT ( res == resRef); +} + diff --git a/alib2algo_experimental/test-src/stringology/exact/DAWGMatcherTest.h b/alib2algo/test-src/stringology/query/DAWGMatcherTest.h similarity index 100% rename from alib2algo_experimental/test-src/stringology/exact/DAWGMatcherTest.h rename to alib2algo/test-src/stringology/query/DAWGMatcherTest.h diff --git a/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp b/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp new file mode 100644 index 0000000000..e537c3a2fd --- /dev/null +++ b/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp @@ -0,0 +1,53 @@ +#include "OracleMatcherTest.h" + +#include "string/LinearString.h" +#include "stringology/matching/OracleMatcherConstruction.h" +#include "stringology/query/BackwardOracleMatching.h" +#include "stringology/exact/ExactFactorMatch.h" + +#include "string/generate/RandomStringFactory.h" +#include "string/generate/RandomSubstringFactory.h" + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( OracleMatcherTest, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( OracleMatcherTest ); + +void OracleMatcherTest::setUp ( ) { +} + +void OracleMatcherTest::tearDown ( ) { +} + +void OracleMatcherTest::testBackwardOracleMatching ( ) { + std::vector<std::string> subjects; + std::vector<std::string> patterns; + std::vector<std::set<unsigned>> expectedOccs; + + subjects.push_back("a"); patterns.push_back("a"); expectedOccs.push_back({0}); + subjects.push_back("a"); patterns.push_back("b"); expectedOccs.push_back({}); + subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfa"); expectedOccs.push_back({0}); + subjects.push_back("alfalfalfa"); patterns.push_back("blfalfalfa"); expectedOccs.push_back({}); + subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfb"); expectedOccs.push_back({}); + subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({0}); + subjects.push_back("alfalfalfaalfalfalfaabfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({}); + subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0}); + subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8}); + subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9}); + + + for(size_t i = 0; i < subjects.size(); ++i) { + string::String subject = string::stringFrom ( subjects[i] ); + string::String pattern = string::stringFrom ( patterns[i] ); + automaton::Automaton oracleAutomaton = stringology::matching::OracleMatcherConstruction::construct ( pattern ); + std::set < unsigned > res = stringology::query::BackwardOracleMatching::match ( subject, oracleAutomaton ); + std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl; + CPPUNIT_ASSERT ( res == expectedOccs[i] ); + } + + auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true); + auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 5, longSubject ); + automaton::DFA < > oracleAutomaton = stringology::matching::OracleMatcherConstruction::construct ( longPattern ); + std::set < unsigned > res = stringology::query::BackwardOracleMatching::match ( longSubject, oracleAutomaton ); + std::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); + std::cout << "long: " << res << std::endl; + CPPUNIT_ASSERT ( res == resRef); +} diff --git a/alib2algo/test-src/stringology/query/OracleMatcherTest.h b/alib2algo/test-src/stringology/query/OracleMatcherTest.h new file mode 100644 index 0000000000..886fc99358 --- /dev/null +++ b/alib2algo/test-src/stringology/query/OracleMatcherTest.h @@ -0,0 +1,19 @@ +#ifndef ORACLE_MATCHER_TEST_H_ +#define ORACLE_MATCHER_TEST_H_ + +#include <cppunit/extensions/HelperMacros.h> + +class OracleMatcherTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE ( OracleMatcherTest ); + CPPUNIT_TEST ( testBackwardOracleMatching ); + CPPUNIT_TEST_SUITE_END ( ); + +public: + void setUp ( ); + void tearDown ( ); + + void testFactorOracleConstruction ( ); + void testBackwardOracleMatching ( ); +}; + +#endif // ORACLE_MATCHER_TEST_H_ diff --git a/alib2algo_experimental/src/stringology/exact/BackwardDAWGMatching.cpp b/alib2algo_experimental/src/stringology/exact/BackwardDAWGMatching.cpp deleted file mode 100644 index e9423e82a7..0000000000 --- a/alib2algo_experimental/src/stringology/exact/BackwardDAWGMatching.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Author: Radovan Cerveny - */ - -#include "BackwardDAWGMatching.h" -#include "stringology/matching/DAWGMatcherConstruction.h" - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <algorithm> -#include <map> -#include <bitset> -#include <measure> - -namespace stringology { - -namespace exact { - -std::set < unsigned > BackwardDAWGMatching::match ( const string::String & subject, const string::String & pattern ) { - return dispatch ( subject.getData ( ), pattern.getData ( ) ); -} - -std::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - - measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); - - automaton::DFA<> suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( pattern ); - - measurements::end ( ); - - measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); - - const DefaultStateType failState = DefaultStateType ( -1 ); - - size_t posInSubject = 0; - - while ( posInSubject <= subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) ) { - DefaultStateType currentState = suffixAutomaton.getInitialState ( ); - - size_t posInPattern = pattern.getContent ( ).size ( ); - size_t lastPrefixPos = posInPattern; - - while ( posInPattern > 0 && currentState != failState ) { - auto transition = suffixAutomaton.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); - - if ( transition == suffixAutomaton.getTransitions ( ).end ( ) ) - currentState = failState; - else - currentState = transition->second; - - posInPattern--; - - // found a prefix of nonreversed pattern that does not correspond to the entire pattern - if ( ( posInPattern != 0 ) && ( suffixAutomaton.getFinalStates ( ).find ( currentState ) != suffixAutomaton.getFinalStates ( ).end ( ) ) ) - lastPrefixPos = posInPattern; - } - - if ( currentState != failState ) - // Yay, there is match!!! - occ.insert ( posInSubject ); - - posInSubject += lastPrefixPos; - } - - measurements::end ( ); - - return occ; -} - -auto BackwardDAWGMatchingLinearStringLinearString = BackwardDAWGMatching::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( BackwardDAWGMatching::match ); - -} /* namespace exact */ - -} /* namespace stringology */ diff --git a/alib2algo_experimental/src/stringology/exact/BackwardOracleMatching.cpp b/alib2algo_experimental/src/stringology/exact/BackwardOracleMatching.cpp deleted file mode 100644 index 7fdff52298..0000000000 --- a/alib2algo_experimental/src/stringology/exact/BackwardOracleMatching.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Author: Radovan Cerveny - */ - -#include "BackwardOracleMatching.h" -#include "stringology/matching/OracleMatcherConstruction.h" - -#include <string/LinearString.h> -#include <alphabet/Symbol.h> - -#include <algorithm> -#include <map> -#include <bitset> -#include <measure> - -namespace stringology { - -namespace exact { - -std::set < unsigned > BackwardOracleMatching::match ( const string::String & subject, const string::String & pattern ) { - return dispatch ( subject.getData ( ), pattern.getData ( ) ); -} - -std::set < unsigned > BackwardOracleMatching::match ( const string::LinearString < > & subject, const string::LinearString < > & pattern ) { - std::set < unsigned > occ; - - measurements::start ( "Preprocess", measurements::Type::PREPROCESS ); - - automaton::DFA<> factorOracle = stringology::matching::OracleMatcherConstruction::construct ( pattern ); - - measurements::end ( ); - - measurements::start ( "Algorithm", measurements::Type::ALGORITHM ); - - const DefaultStateType failState = DefaultStateType ( -1 ); - - size_t posInSubject = 0; - - while ( posInSubject <= subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) ) { - - DefaultStateType currentState = factorOracle.getInitialState ( ); - - size_t posInPattern = pattern.getContent ( ).size ( ); - - while ( posInPattern > 0 && currentState != failState ) { - auto transition = factorOracle.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } ); - - if ( transition == factorOracle.getTransitions ( ).end ( ) ) - currentState = failState; - else - currentState = transition->second; - - posInPattern--; - } - - if ( currentState != failState ) - // Yay, there is match!!! - occ.insert ( posInSubject ); - - posInSubject += posInPattern + 1; - } - - measurements::end ( ); - - return occ; -} - -auto BackwardOracleMatchingLinearStringLinearString = BackwardOracleMatching::RegistratorWrapper < std::set < unsigned >, string::LinearString < >, string::LinearString < > > ( BackwardOracleMatching::match ); - -} /* namespace exact */ - -} /* namespace stringology */ diff --git a/alib2algo_experimental/test-src/stringology/exact/DAWGMatcherTest.cpp b/alib2algo_experimental/test-src/stringology/exact/DAWGMatcherTest.cpp deleted file mode 100644 index 94063aa444..0000000000 --- a/alib2algo_experimental/test-src/stringology/exact/DAWGMatcherTest.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "DAWGMatcherTest.h" - -#include "string/LinearString.h" -#include "stringology/exact/BackwardDAWGMatching.h" -#include "stringology/exact/ExactFactorMatch.h" - -#include "string/generate/RandomStringFactory.h" -#include "string/generate/RandomSubstringFactory.h" - -CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( DAWGMatcherTest, "stringology" ); -CPPUNIT_TEST_SUITE_REGISTRATION ( DAWGMatcherTest ); - -void DAWGMatcherTest::setUp ( ) { -} - -void DAWGMatcherTest::tearDown ( ) { -} - -void DAWGMatcherTest::testBackwardDAWGMatching ( ) { - std::vector<std::string> subjects; - std::vector<std::string> patterns; - std::vector<std::set<unsigned>> expectedOccs; - - subjects.push_back("a"); patterns.push_back("a"); expectedOccs.push_back({0}); - subjects.push_back("a"); patterns.push_back("b"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfa"); expectedOccs.push_back({0}); - subjects.push_back("alfalfalfa"); patterns.push_back("blfalfalfa"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfb"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({0}); - subjects.push_back("alfalfalfaalfalfalfaabfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0}); - subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8}); - subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9}); - subjects.push_back("aaaaaaaaaa"); patterns.push_back("aa"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8}); - - - for(size_t i = 0; i < subjects.size(); ++i) { - string::String subject = string::stringFrom ( subjects[i] ); - string::String pattern = string::stringFrom ( patterns[i] ); - std::set < unsigned > res = stringology::exact::BackwardDAWGMatching::match ( subject, pattern ); - std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl; - CPPUNIT_ASSERT ( res == expectedOccs[i] ); - } - - auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true); - auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 32, longSubject ); - std::set < unsigned > res = stringology::exact::BackwardDAWGMatching::match ( longSubject, longPattern ); - std::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); - std::cout << "long: " << res << std::endl; - CPPUNIT_ASSERT ( res == resRef); -} - diff --git a/alib2algo_experimental/test-src/stringology/exact/OracleMatcherTest.cpp b/alib2algo_experimental/test-src/stringology/exact/OracleMatcherTest.cpp deleted file mode 100644 index b0c3a7d4e6..0000000000 --- a/alib2algo_experimental/test-src/stringology/exact/OracleMatcherTest.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "OracleMatcherTest.h" - -#include "string/LinearString.h" -#include "stringology/exact/BackwardOracleMatching.h" -#include "stringology/exact/ExactFactorMatch.h" - -#include "string/generate/RandomStringFactory.h" -#include "string/generate/RandomSubstringFactory.h" - -CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( OracleMatcherTest, "stringology" ); -CPPUNIT_TEST_SUITE_REGISTRATION ( OracleMatcherTest ); - -void OracleMatcherTest::setUp ( ) { -} - -void OracleMatcherTest::tearDown ( ) { -} - -void OracleMatcherTest::testBackwardOracleMatching ( ) { - std::vector<std::string> subjects; - std::vector<std::string> patterns; - std::vector<std::set<unsigned>> expectedOccs; - - subjects.push_back("a"); patterns.push_back("a"); expectedOccs.push_back({0}); - subjects.push_back("a"); patterns.push_back("b"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfa"); expectedOccs.push_back({0}); - subjects.push_back("alfalfalfa"); patterns.push_back("blfalfalfa"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfb"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({0}); - subjects.push_back("alfalfalfaalfalfalfaabfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({}); - subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0}); - subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8}); - subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9}); - - - for(size_t i = 0; i < subjects.size(); ++i) { - string::String subject = string::stringFrom ( subjects[i] ); - string::String pattern = string::stringFrom ( patterns[i] ); - std::set < unsigned > res = stringology::exact::BackwardOracleMatching::match ( subject, pattern ); - std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl; - CPPUNIT_ASSERT ( res == expectedOccs[i] ); - } - - auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true); - auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 32, longSubject ); - std::set < unsigned > res = stringology::exact::BackwardOracleMatching::match ( longSubject, longPattern ); - std::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); - std::cout << "long: " << res << std::endl; - CPPUNIT_ASSERT ( res == resRef); -} diff --git a/alib2algo_experimental/test-src/stringology/exact/OracleMatcherTest.h b/alib2algo_experimental/test-src/stringology/exact/OracleMatcherTest.h deleted file mode 100644 index f1d4980908..0000000000 --- a/alib2algo_experimental/test-src/stringology/exact/OracleMatcherTest.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef ORACLE_MATCHER_TEST_H_ -#define ORACLE_MATCHER_TEST_H_ - -#include <cppunit/extensions/HelperMacros.h> - -class OracleMatcherTest : public CppUnit::TestFixture { - CPPUNIT_TEST_SUITE ( OracleMatcherTest ); - CPPUNIT_TEST ( testBackwardOracleMatching ); - CPPUNIT_TEST_SUITE_END ( ); - -public: - void setUp ( ); - void tearDown ( ); - - void testFactorOracleConstruction ( ); - void testBackwardOracleMatching ( ); -}; - -#endif // ORACLE_MATCHER_TEST_H_ diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index 3f3159f4fe..eb0bfe8ea4 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -25,8 +25,8 @@ #include <stringology/exact/ReversedBoyerMooreHorspool.h> #include <stringology/exact/DeadZoneUsingBadCharacterShift.h> #include <stringology/query/BNDMOccurrences.h> -#include <stringology/exact/BackwardOracleMatching.h> -#include <stringology/exact/BackwardDAWGMatching.h> +#include <stringology/query/BackwardOracleMatching.h> +#include <stringology/query/BackwardDAWGMatching.h> #include <stringology/matching/ExactMatchingAutomaton.h> #include <stringology/indexing/NondeterministicExactFactorAutomaton.h> #include <stringology/indexing/ExactSubsequenceAutomaton.h> @@ -182,12 +182,12 @@ int main ( int argc, char * argv[] ) { alib::XmlDataFactory::toStdout ( res ); } else if ( algorithm.getValue ( ) == "backwardOracleMatching" ) { string::String subject = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) ); - string::String pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); + automaton::Automaton pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); measurements::end ( ); measurements::start ( "Algorithm", measurements::Type::MAIN ); - std::set < unsigned > res = stringology::exact::BackwardOracleMatching::match ( subject, pattern ); + std::set < unsigned > res = stringology::query::BackwardOracleMatching::match ( subject, pattern ); measurements::end ( ); measurements::start ( "Output write", measurements::Type::AUXILIARY ); @@ -195,12 +195,12 @@ int main ( int argc, char * argv[] ) { alib::XmlDataFactory::toStdout ( res ); } else if ( algorithm.getValue ( ) == "backwardDAWGMatching" ) { string::String subject = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) ); - string::String pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); + automaton::Automaton pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); measurements::end ( ); measurements::start ( "Algorithm", measurements::Type::MAIN ); - std::set < unsigned > res = stringology::exact::BackwardDAWGMatching::match ( subject, pattern ); + std::set < unsigned > res = stringology::query::BackwardDAWGMatching::match ( subject, pattern ); measurements::end ( ); measurements::start ( "Output write", measurements::Type::AUXILIARY ); -- GitLab