From 38af11645e0cc5f965d602579f812ace689dbd40 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Fri, 9 Oct 2015 15:40:00 +0200 Subject: [PATCH] - pass alphabet to bcs and automata compute algos --- .../src/arbology/exact/BadCharacterShiftTable.cpp | 14 ++++---------- .../src/arbology/exact/BadCharacterShiftTable.h | 10 +--------- .../src/arbology/exact/BoyerMooreHorspool.cpp | 2 +- alib2algo/src/automaton/run/Accept.cpp | 2 ++ alib2algo/src/automaton/run/Occurrences.cpp | 9 ++++++--- alib2algo/src/automaton/run/Result.cpp | 8 ++++++-- .../stringology/exact/BadCharacterShiftTable.cpp | 13 +++---------- .../src/stringology/exact/BadCharacterShiftTable.h | 10 +--------- .../src/stringology/exact/BoyerMooreHorspool.cpp | 6 +++--- tests.aarbology.sh | 4 ++-- tests.astringology.sh | 4 ++-- 11 files changed, 31 insertions(+), 51 deletions(-) diff --git a/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp b/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp index dabeda654b..6a77943a82 100644 --- a/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp +++ b/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp @@ -14,15 +14,13 @@ namespace arbology { namespace exact { -std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::RankedTreeWrapper & pattern ) { - return getInstance ( ).dispatch ( alphabet, pattern.getData ( ) ); -} - std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::RankedTreeWrapper & pattern ) { - return bcs ( pattern.getAlphabet ( ), pattern ); + return getInstance ( ).dispatch ( pattern.getData ( ) ); } -std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::PrefixRankedBarPattern & pattern ) { +std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) { + const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( ); + std::map < alphabet::RankedSymbol, size_t > bcs; // initialisation of bcs table to the size of the pattern @@ -75,10 +73,6 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const auto BadCharacterShiftTablePrefixRankedBarPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( BadCharacterShiftTable::getInstance ( ), BadCharacterShiftTable::bcs ); -std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) { - return bcs ( pattern.getAlphabet ( ), pattern ); -} - } /* namespace exact */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/exact/BadCharacterShiftTable.h b/alib2algo/src/arbology/exact/BadCharacterShiftTable.h index 4534d4feb4..256c2cecbf 100644 --- a/alib2algo/src/arbology/exact/BadCharacterShiftTable.h +++ b/alib2algo/src/arbology/exact/BadCharacterShiftTable.h @@ -24,7 +24,7 @@ namespace exact { * Computation of BCS table for BMH from MI(E+\eps)-EVY course 2014 * To get rid of zeros in BCS table we ignore last haystack character */ -class BadCharacterShiftTable : public std::SingleDispatchFirstStaticParam < std::map < alphabet::RankedSymbol, size_t >, const std::set < alphabet::RankedSymbol > &, tree::RankedTreeBase > { +class BadCharacterShiftTable : public std::SingleDispatch < std::map < alphabet::RankedSymbol, size_t >, tree::RankedTreeBase > { public: /** * Search for pattern in linear string. @@ -32,16 +32,8 @@ public: */ static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern ); - /** - * Search for pattern in linear string. - * @return set set of occurences - */ - static std::map < alphabet::RankedSymbol, size_t > bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::RankedTreeWrapper & pattern ); - static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern ); - static std::map < alphabet::RankedSymbol, size_t > bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::PrefixRankedBarPattern & pattern ); - static BadCharacterShiftTable & getInstance ( ) { static BadCharacterShiftTable res; diff --git a/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp index e5cf94f113..468de4a994 100644 --- a/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp +++ b/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp @@ -33,7 +33,7 @@ auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarTree = BoyerMooreHorspo std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ) { std::set < unsigned > occ; - std::map < alphabet::RankedSymbol, size_t > bcs = BadCharacterShiftTable::bcs ( subject.getAlphabet ( ), pattern ); + std::map < alphabet::RankedSymbol, size_t > bcs = BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject ); // index to the subject diff --git a/alib2algo/src/automaton/run/Accept.cpp b/alib2algo/src/automaton/run/Accept.cpp index da4e7e7896..280c2ab34d 100644 --- a/alib2algo/src/automaton/run/Accept.cpp +++ b/alib2algo/src/automaton/run/Accept.cpp @@ -140,7 +140,9 @@ bool Accept::accept(const automaton::DPDA& automaton, const string::LinearString break; } + if(transition == transitions.end()) return false; + for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back(); state = transition->second.first; diff --git a/alib2algo/src/automaton/run/Occurrences.cpp b/alib2algo/src/automaton/run/Occurrences.cpp index 1dd72424c5..bd92b23a5d 100644 --- a/alib2algo/src/automaton/run/Occurrences.cpp +++ b/alib2algo/src/automaton/run/Occurrences.cpp @@ -41,8 +41,8 @@ std::set<unsigned> Occurrences::occurrences(const automaton::DFA& automaton, con auto transitions = automaton.getTransitionsFromState(state); auto next = transitions.find(std::make_pair(state, symbol)); - if(next == transitions.end()) { //makes sence to reset the automaton when there is "unknown" symbol on the input - state = automaton.getInitialState(); + if(next == transitions.end()) { + throw exception::AlibException("Transition not present"); } else { state = next->second; } @@ -116,7 +116,10 @@ std::set<unsigned> Occurrences::occurrences(const automaton::DPDA& automaton, co break; } - if(transition == transitions.end()) return {}; + + if(transition == transitions.end()) + throw exception::AlibException("Transition not present"); + for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back(); state = transition->second.first; diff --git a/alib2algo/src/automaton/run/Result.cpp b/alib2algo/src/automaton/run/Result.cpp index 6c6e476179..ab2a81a0fd 100644 --- a/alib2algo/src/automaton/run/Result.cpp +++ b/alib2algo/src/automaton/run/Result.cpp @@ -27,7 +27,8 @@ label::Label Result::result(const automaton::DFA& automaton, const string::Linea for(const alphabet::Symbol& symbol : string.getContent()) { auto transitions = automaton.getTransitionsFromState(state); auto next = transitions.find(std::make_pair(state, symbol)); - if(next == transitions.end()) return label::Label(label::LabelSetLabel(std::set<label::Label>{})); + if(next == transitions.end()) + throw exception::AlibException("Transition not present"); state = next->second; } return state.getName(); @@ -60,7 +61,10 @@ label::Label Result::result(const automaton::DPDA& automaton, const string::Line break; } - if(transition == transitions.end()) return label::Label(label::LabelSetLabel(std::set<label::Label>{})); + + if(transition == transitions.end()) + throw exception::AlibException("Transition not present"); + for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back(); state = transition->second.first; diff --git a/alib2algo/src/stringology/exact/BadCharacterShiftTable.cpp b/alib2algo/src/stringology/exact/BadCharacterShiftTable.cpp index e3a9c6ad28..49fc99ce98 100644 --- a/alib2algo/src/stringology/exact/BadCharacterShiftTable.cpp +++ b/alib2algo/src/stringology/exact/BadCharacterShiftTable.cpp @@ -15,15 +15,12 @@ namespace stringology { namespace exact { -std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<alphabet::Symbol>& alphabet, const string::String& pattern) { - return getInstance().dispatch(alphabet, pattern.getData()); -} - std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::String& pattern) { - return bcs(pattern.getAlphabet(), pattern); + return getInstance().dispatch(pattern.getData()); } -std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<alphabet::Symbol>& alphabet, const string::LinearString& pattern) { +std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::LinearString& pattern) { + const std::set<alphabet::Symbol>& alphabet = pattern.getAlphabet(); std::map<alphabet::Symbol, size_t> bcs; /* Initialization of BCS to the length of the needle. */ @@ -46,10 +43,6 @@ std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<al auto BadCharacterShiftTableLinearString = BadCharacterShiftTable::RegistratorWrapper<std::map<alphabet::Symbol, size_t>, string::LinearString>(BadCharacterShiftTable::getInstance(), BadCharacterShiftTable::bcs); -std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::LinearString& pattern) { - return bcs(pattern.getAlphabet(), pattern); -} - } /* namespace exact */ } /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/BadCharacterShiftTable.h b/alib2algo/src/stringology/exact/BadCharacterShiftTable.h index d840582268..f53ca94298 100644 --- a/alib2algo/src/stringology/exact/BadCharacterShiftTable.h +++ b/alib2algo/src/stringology/exact/BadCharacterShiftTable.h @@ -23,7 +23,7 @@ namespace exact { * Computation of BCS table for BMH from MI(E+\eps)-EVY course 2014 * To get rid of zeros in BCS table we ignore last haystack character */ -class BadCharacterShiftTable : public std::SingleDispatchFirstStaticParam < std::map < alphabet::Symbol, size_t >, const std::set < alphabet::Symbol > &, string::StringBase > { +class BadCharacterShiftTable : public std::SingleDispatch < std::map < alphabet::Symbol, size_t >, string::StringBase > { public: /** * Search for pattern in linear string. @@ -31,16 +31,8 @@ public: */ static std::map < alphabet::Symbol, size_t > bcs ( const string::String & pattern ); - /** - * Search for pattern in linear string. - * @return set set of occurences - */ - static std::map < alphabet::Symbol, size_t > bcs ( const std::set < alphabet::Symbol > & alphabet, const string::String & pattern ); - static std::map < alphabet::Symbol, size_t > bcs ( const string::LinearString & pattern ); - static std::map < alphabet::Symbol, size_t > bcs ( const std::set < alphabet::Symbol > & alphabet, const string::LinearString & pattern ); - static BadCharacterShiftTable & getInstance ( ) { static BadCharacterShiftTable res; diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp index 836c27c7fe..5678acaab7 100644 --- a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp +++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp @@ -25,18 +25,18 @@ std::set<unsigned> BoyerMooreHorspool::match(const string::String& subject, cons std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString& string, const string::LinearString& pattern) { std::set<unsigned> occ; - std::map<alphabet::Symbol, size_t> bcs = BadCharacterShiftTable::bcs(string.getAlphabet(), pattern); + std::map<alphabet::Symbol, size_t> bcs = BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern size_t haystack_offset = 0; while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) { size_t i = pattern.getContent().size(); - while(i > 0 && string.getContent().at(haystack_offset + i - 1) == pattern.getContent().at(i - 1)) { + while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) { i--; } // Yay, there is match!!! if(i == 0) occ.insert(haystack_offset); - haystack_offset += bcs[string.getContent().at(haystack_offset + pattern.getContent().size() - 1)]; + haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]]; //std::cout << haystack_offset << std::endl; } return occ; diff --git a/tests.aarbology.sh b/tests.aarbology.sh index d0dbdedf63..d1a5fc9d28 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -209,8 +209,8 @@ function runTestPattern { outputResults } -runTestSubtree "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarTree -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set" +runTestSubtree "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarTree -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestSubtree "Exact Subtree Automaton" "./arun2 -t occurrences -a <(./aarbology2 -a exactSubtreeMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set" -runTestPattern "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set" +runTestPattern "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set" runTestPattern "Exact Knuth Morris Pratt" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set" diff --git a/tests.astringology.sh b/tests.astringology.sh index 974d5f288d..4dda5001c2 100755 --- a/tests.astringology.sh +++ b/tests.astringology.sh @@ -163,6 +163,6 @@ function runTest { outputResults } -runTest "Exact Boyer Moore Horspool" "./astringology2 -a boyerMooreHorspool -s \"\$SUBJECT_FILE\" -p \"\$PATTERN_FILE\" | ./astat2 -p size --set" -runTest "Exact Matching Automaton" "./arun2 -t occurrences -a <(./astringology2 -a exactMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set" +runTest "Exact Boyer Moore Horspool" "./astringology2 -a boyerMooreHorspool -s \"\$SUBJECT_FILE\" -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./astat2 -p size --set" +runTest "Exact Matching Automaton" "./arun2 -t occurrences -a <(./astringology2 -a exactMatchingAutomaton -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set" -- GitLab