Skip to content
Snippets Groups Projects
Commit 38af1164 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

- pass alphabet to bcs and automata compute algos

parent 696081cb
No related branches found
No related tags found
No related merge requests found
......@@ -14,15 +14,13 @@ namespace arbology {
 
namespace exact {
 
std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::RankedTreeWrapper & pattern ) {
return getInstance ( ).dispatch ( alphabet, pattern.getData ( ) );
}
std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::RankedTreeWrapper & pattern ) {
return bcs ( pattern.getAlphabet ( ), pattern );
return getInstance ( ).dispatch ( pattern.getData ( ) );
}
 
std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::PrefixRankedBarPattern & pattern ) {
std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) {
const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( );
std::map < alphabet::RankedSymbol, size_t > bcs;
 
// initialisation of bcs table to the size of the pattern
......@@ -75,10 +73,6 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const
 
auto BadCharacterShiftTablePrefixRankedBarPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( BadCharacterShiftTable::getInstance ( ), BadCharacterShiftTable::bcs );
 
std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) {
return bcs ( pattern.getAlphabet ( ), pattern );
}
} /* namespace exact */
 
} /* namespace arbology */
......@@ -24,7 +24,7 @@ namespace exact {
* Computation of BCS table for BMH from MI(E+\eps)-EVY course 2014
* To get rid of zeros in BCS table we ignore last haystack character
*/
class BadCharacterShiftTable : public std::SingleDispatchFirstStaticParam < std::map < alphabet::RankedSymbol, size_t >, const std::set < alphabet::RankedSymbol > &, tree::RankedTreeBase > {
class BadCharacterShiftTable : public std::SingleDispatch < std::map < alphabet::RankedSymbol, size_t >, tree::RankedTreeBase > {
public:
/**
* Search for pattern in linear string.
......@@ -32,16 +32,8 @@ public:
*/
static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern );
 
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
static std::map < alphabet::RankedSymbol, size_t > bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::RankedTreeWrapper & pattern );
static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern );
 
static std::map < alphabet::RankedSymbol, size_t > bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::PrefixRankedBarPattern & pattern );
static BadCharacterShiftTable & getInstance ( ) {
static BadCharacterShiftTable res;
 
......
......@@ -33,7 +33,7 @@ auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarTree = BoyerMooreHorspo
 
std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ) {
std::set < unsigned > occ;
std::map < alphabet::RankedSymbol, size_t > bcs = BadCharacterShiftTable::bcs ( subject.getAlphabet ( ), pattern );
std::map < alphabet::RankedSymbol, size_t > bcs = BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern
std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
 
// index to the subject
......
......@@ -140,7 +140,9 @@ bool Accept::accept(const automaton::DPDA& automaton, const string::LinearString
 
break;
}
if(transition == transitions.end()) return false;
for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back();
 
state = transition->second.first;
......
......@@ -41,8 +41,8 @@ std::set<unsigned> Occurrences::occurrences(const automaton::DFA& automaton, con
 
auto transitions = automaton.getTransitionsFromState(state);
auto next = transitions.find(std::make_pair(state, symbol));
if(next == transitions.end()) { //makes sence to reset the automaton when there is "unknown" symbol on the input
state = automaton.getInitialState();
if(next == transitions.end()) {
throw exception::AlibException("Transition not present");
} else {
state = next->second;
}
......@@ -116,7 +116,10 @@ std::set<unsigned> Occurrences::occurrences(const automaton::DPDA& automaton, co
 
break;
}
if(transition == transitions.end()) return {};
if(transition == transitions.end())
throw exception::AlibException("Transition not present");
for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back();
 
state = transition->second.first;
......
......@@ -27,7 +27,8 @@ label::Label Result::result(const automaton::DFA& automaton, const string::Linea
for(const alphabet::Symbol& symbol : string.getContent()) {
auto transitions = automaton.getTransitionsFromState(state);
auto next = transitions.find(std::make_pair(state, symbol));
if(next == transitions.end()) return label::Label(label::LabelSetLabel(std::set<label::Label>{}));
if(next == transitions.end())
throw exception::AlibException("Transition not present");
state = next->second;
}
return state.getName();
......@@ -60,7 +61,10 @@ label::Label Result::result(const automaton::DPDA& automaton, const string::Line
 
break;
}
if(transition == transitions.end()) return label::Label(label::LabelSetLabel(std::set<label::Label>{}));
if(transition == transitions.end())
throw exception::AlibException("Transition not present");
for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back();
 
state = transition->second.first;
......
......@@ -15,15 +15,12 @@ namespace stringology {
 
namespace exact {
 
std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<alphabet::Symbol>& alphabet, const string::String& pattern) {
return getInstance().dispatch(alphabet, pattern.getData());
}
std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::String& pattern) {
return bcs(pattern.getAlphabet(), pattern);
return getInstance().dispatch(pattern.getData());
}
 
std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<alphabet::Symbol>& alphabet, const string::LinearString& pattern) {
std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::LinearString& pattern) {
const std::set<alphabet::Symbol>& alphabet = pattern.getAlphabet();
std::map<alphabet::Symbol, size_t> bcs;
 
/* Initialization of BCS to the length of the needle. */
......@@ -46,10 +43,6 @@ std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<al
 
auto BadCharacterShiftTableLinearString = BadCharacterShiftTable::RegistratorWrapper<std::map<alphabet::Symbol, size_t>, string::LinearString>(BadCharacterShiftTable::getInstance(), BadCharacterShiftTable::bcs);
 
std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::LinearString& pattern) {
return bcs(pattern.getAlphabet(), pattern);
}
} /* namespace exact */
 
} /* namespace stringology */
......@@ -23,7 +23,7 @@ namespace exact {
* Computation of BCS table for BMH from MI(E+\eps)-EVY course 2014
* To get rid of zeros in BCS table we ignore last haystack character
*/
class BadCharacterShiftTable : public std::SingleDispatchFirstStaticParam < std::map < alphabet::Symbol, size_t >, const std::set < alphabet::Symbol > &, string::StringBase > {
class BadCharacterShiftTable : public std::SingleDispatch < std::map < alphabet::Symbol, size_t >, string::StringBase > {
public:
/**
* Search for pattern in linear string.
......@@ -31,16 +31,8 @@ public:
*/
static std::map < alphabet::Symbol, size_t > bcs ( const string::String & pattern );
 
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
static std::map < alphabet::Symbol, size_t > bcs ( const std::set < alphabet::Symbol > & alphabet, const string::String & pattern );
static std::map < alphabet::Symbol, size_t > bcs ( const string::LinearString & pattern );
 
static std::map < alphabet::Symbol, size_t > bcs ( const std::set < alphabet::Symbol > & alphabet, const string::LinearString & pattern );
static BadCharacterShiftTable & getInstance ( ) {
static BadCharacterShiftTable res;
 
......
......@@ -25,18 +25,18 @@ std::set<unsigned> BoyerMooreHorspool::match(const string::String& subject, cons
std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString& string, const string::LinearString& pattern)
{
std::set<unsigned> occ;
std::map<alphabet::Symbol, size_t> bcs = BadCharacterShiftTable::bcs(string.getAlphabet(), pattern);
std::map<alphabet::Symbol, size_t> bcs = BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern
 
size_t haystack_offset = 0;
while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) {
size_t i = pattern.getContent().size();
while(i > 0 && string.getContent().at(haystack_offset + i - 1) == pattern.getContent().at(i - 1)) {
while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) {
i--;
}
 
// Yay, there is match!!!
if(i == 0) occ.insert(haystack_offset);
haystack_offset += bcs[string.getContent().at(haystack_offset + pattern.getContent().size() - 1)];
haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]];
//std::cout << haystack_offset << std::endl;
}
return occ;
......
......@@ -209,8 +209,8 @@ function runTestPattern {
outputResults
}
 
runTestSubtree "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarTree -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
runTestSubtree "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarTree -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestSubtree "Exact Subtree Automaton" "./arun2 -t occurrences -a <(./aarbology2 -a exactSubtreeMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
 
runTestPattern "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
runTestPattern "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
runTestPattern "Exact Knuth Morris Pratt" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
......@@ -163,6 +163,6 @@ function runTest {
outputResults
}
 
runTest "Exact Boyer Moore Horspool" "./astringology2 -a boyerMooreHorspool -s \"\$SUBJECT_FILE\" -p \"\$PATTERN_FILE\" | ./astat2 -p size --set"
runTest "Exact Matching Automaton" "./arun2 -t occurrences -a <(./astringology2 -a exactMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
runTest "Exact Boyer Moore Horspool" "./astringology2 -a boyerMooreHorspool -s \"\$SUBJECT_FILE\" -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./astat2 -p size --set"
runTest "Exact Matching Automaton" "./arun2 -t occurrences -a <(./astringology2 -a exactMatchingAutomaton -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
 
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment