Skip to content
Snippets Groups Projects
Commit e41c9c0c authored by Radovan Červený's avatar Radovan Červený
Browse files

suffix automaton construction fix, backward dawg matching implemented

parent ba0c2010
No related branches found
No related tags found
1 merge request!16BP_cervera3 - algorithms
/*
* Author: Radovan Cerveny
*/
#include "BackwardDAWGMatching.hpp"
#include "SuffixAutomaton.hpp"
#include <exception/AlibException.h>
#include <string/LinearString.h>
#include <alphabet/Symbol.h>
#include <algorithm>
#include <map>
#include <bitset>
#include <measure>
namespace stringology {
namespace exact {
std::set < unsigned > BackwardDAWGMatching::match ( const string::String & subject, const string::String & pattern ) {
return getInstance ( ).dispatch ( subject.getData ( ), pattern.getData ( ) );
}
std::set < unsigned > BackwardDAWGMatching::match ( const string::LinearString & subject, const string::LinearString & pattern ) {
std::set < unsigned > occ;
measurements::start ( "Preprocess", measurements::Type::PREPROCESS );
auto patternData = pattern.getContent ( );
reverse ( patternData.begin ( ), patternData.end ( ) );
const string::LinearString reversedPattern ( std::move ( patternData ) );
automaton::DFA suffixAutomaton = SuffixAutomaton::construct ( reversedPattern );
measurements::end ( );
measurements::start ( "Algorithm", measurements::Type::ALGORITHM );
const automaton::State failState = automaton::State ( -1 );
size_t posInSubject = 0;
while ( posInSubject <= subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) ) {
automaton::State currentState = suffixAutomaton.getInitialState ( );
size_t posInPattern = reversedPattern.getContent ( ).size ( );
while ( posInPattern > 0 && currentState != failState ) {
auto transition = suffixAutomaton.getTransitions ( ).find ( { currentState, subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) } );
if ( transition == suffixAutomaton.getTransitions ( ).end ( ) )
currentState = failState;
else
currentState = transition->second;
posInPattern--;
}
if ( currentState != failState )
// Yay, there is match!!!
occ.insert ( posInSubject );
posInSubject += posInPattern + 1;
}
measurements::end ( );
return occ;
}
auto BackwardDAWGMatchingLinearStringLinearString = BackwardDAWGMatching::RegistratorWrapper < std::set < unsigned >, string::LinearString, string::LinearString > ( BackwardDAWGMatching::getInstance ( ), BackwardDAWGMatching::match );
} /* namespace exact */
} /* namespace stringology */
/*
* Author: Radovan Cerveny
*/
#ifndef STRINGOLOGY_BACKWARD_ORACLE_MATCHING_HPP__
#define STRINGOLOGY_BACKWARD_ORACLE_MATCHING_HPP__
#include <string/String.h>
#include <string/StringFeatures.h>
#include <core/multipleDispatch.hpp>
#include <set>
namespace stringology {
namespace exact {
/**
* Implementation of Backward DAWG Matching.
*/
class BackwardDAWGMatching : public std::DoubleDispatch < std::set < unsigned >, string::StringBase, string::StringBase > {
private:
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
static std::set < unsigned > match ( const string::String & subject, const string::String & pattern );
static std::set < unsigned > match ( const string::LinearString & subject, const string::LinearString & pattern );
static BackwardDAWGMatching & getInstance ( ) {
static BackwardDAWGMatching res;
return res;
}
};
} /* namespace exact */
} /* namespace stringology */
#endif /* STRINGOLOGY_BACKWARD_ORACLE_MATCHING_HPP__ */
......@@ -108,13 +108,18 @@ void SuffixAutomaton::suffixAutomatonAddSymbol ( automaton::DFA & suffixAutomato
for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( qState ) )
suffixAutomaton.addTransition ( cloneState, transition.first.second, transition.second );
 
while ( kState != automaton::State ( -1 ) && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) != suffixAutomaton.getTransitions ( ).end ( ) && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second == qState ) {
while ( kState != automaton::State ( -1 )
&& suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) != suffixAutomaton.getTransitions ( ).end ( )
&& suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second == qState ) {
suffixAutomaton.removeTransition ( kState, symbol, qState );
suffixAutomaton.addTransition ( kState, symbol, cloneState );
kState = suffixLinks.find ( kState )->second.first;
}
suffixLinks.find ( qState )->second.first = cloneState;
suffixLinks.find ( newState )->second.first = cloneState;
}
}
lastState = newState;
}
 
......
......@@ -72,8 +72,8 @@ void FactorOracleAutomatonTest::testBackwardOracleMatching ( ) {
CPPUNIT_ASSERT ( res == expectedOccs[i] );
}
 
auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64 * 64, 512, false, true);
auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 32 * 32, longSubject );
auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true);
auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 32, longSubject );
std::set < unsigned > res = stringology::exact::BackwardOracleMatching::match ( longSubject, longPattern );
std::cout << "long: " << res << std::endl;
CPPUNIT_ASSERT ( res.size() > 0 );
......
......@@ -2,6 +2,7 @@
 
#include "string/LinearString.h"
#include "stringology/exact/SuffixAutomaton.hpp"
#include "stringology/exact/BackwardDAWGMatching.hpp"
 
#include "string/generate/RandomStringFactory.h"
#include "string/generate/RandomSubstringFactory.h"
......@@ -47,3 +48,37 @@ void SuffixAutomatonTest::testSuffixAutomatonConstruction ( ) {
 
CPPUNIT_ASSERT ( suffixAutomaton == refSuffixAutomaton );
}
void SuffixAutomatonTest::testBackwardDAWGMatching ( ) {
std::vector<std::string> subjects;
std::vector<std::string> patterns;
std::vector<std::set<unsigned>> expectedOccs;
subjects.push_back("a"); patterns.push_back("a"); expectedOccs.push_back({0});
subjects.push_back("a"); patterns.push_back("b"); expectedOccs.push_back({});
subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfa"); expectedOccs.push_back({0});
subjects.push_back("alfalfalfa"); patterns.push_back("blfalfalfa"); expectedOccs.push_back({});
subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfb"); expectedOccs.push_back({});
subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({0});
subjects.push_back("alfalfalfaalfalfalfaabfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({});
subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0});
subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8});
subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9});
for(size_t i = 0; i < subjects.size(); ++i) {
string::String subject = string::stringFrom ( subjects[i] );
string::String pattern = string::stringFrom ( patterns[i] );
std::set < unsigned > res = stringology::exact::BackwardDAWGMatching::match ( subject, pattern );
std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res << std::endl;
CPPUNIT_ASSERT ( res == expectedOccs[i] );
}
auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true);
auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 32, longSubject );
std::set < unsigned > res = stringology::exact::BackwardDAWGMatching::match ( longSubject, longPattern );
std::cout << "long: " << res << std::endl;
CPPUNIT_ASSERT ( res.size() > 0 );
}
......@@ -6,6 +6,7 @@
class SuffixAutomatonTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE ( SuffixAutomatonTest );
CPPUNIT_TEST ( testSuffixAutomatonConstruction );
CPPUNIT_TEST ( testBackwardDAWGMatching );
CPPUNIT_TEST_SUITE_END ( );
 
public:
......@@ -13,6 +14,8 @@ public:
void tearDown ( );
 
void testSuffixAutomatonConstruction ( );
void testBackwardDAWGMatching ( );
};
 
#endif // SUFFIX_AUTOMATON_TEST_HPP_
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment