From 38af11645e0cc5f965d602579f812ace689dbd40 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Fri, 9 Oct 2015 15:40:00 +0200
Subject: [PATCH] - pass alphabet to bcs and automata compute algos

---
 .../src/arbology/exact/BadCharacterShiftTable.cpp  | 14 ++++----------
 .../src/arbology/exact/BadCharacterShiftTable.h    | 10 +---------
 .../src/arbology/exact/BoyerMooreHorspool.cpp      |  2 +-
 alib2algo/src/automaton/run/Accept.cpp             |  2 ++
 alib2algo/src/automaton/run/Occurrences.cpp        |  9 ++++++---
 alib2algo/src/automaton/run/Result.cpp             |  8 ++++++--
 .../stringology/exact/BadCharacterShiftTable.cpp   | 13 +++----------
 .../src/stringology/exact/BadCharacterShiftTable.h | 10 +---------
 .../src/stringology/exact/BoyerMooreHorspool.cpp   |  6 +++---
 tests.aarbology.sh                                 |  4 ++--
 tests.astringology.sh                              |  4 ++--
 11 files changed, 31 insertions(+), 51 deletions(-)

diff --git a/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp b/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp
index dabeda654b..6a77943a82 100644
--- a/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp
+++ b/alib2algo/src/arbology/exact/BadCharacterShiftTable.cpp
@@ -14,15 +14,13 @@ namespace arbology {
 
 namespace exact {
 
-std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::RankedTreeWrapper & pattern ) {
-	return getInstance ( ).dispatch ( alphabet, pattern.getData ( ) );
-}
-
 std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::RankedTreeWrapper & pattern ) {
-	return bcs ( pattern.getAlphabet ( ), pattern );
+	return getInstance ( ).dispatch ( pattern.getData ( ) );
 }
 
-std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::PrefixRankedBarPattern & pattern ) {
+std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) {
+	const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( );
+
 	std::map < alphabet::RankedSymbol, size_t > bcs;
 
 	 // initialisation of bcs table to the size of the pattern
@@ -75,10 +73,6 @@ std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const
 
 auto BadCharacterShiftTablePrefixRankedBarPattern = BadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( BadCharacterShiftTable::getInstance ( ), BadCharacterShiftTable::bcs );
 
-std::map < alphabet::RankedSymbol, size_t > BadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) {
-	return bcs ( pattern.getAlphabet ( ), pattern );
-}
-
 } /* namespace exact */
 
 } /* namespace arbology */
diff --git a/alib2algo/src/arbology/exact/BadCharacterShiftTable.h b/alib2algo/src/arbology/exact/BadCharacterShiftTable.h
index 4534d4feb4..256c2cecbf 100644
--- a/alib2algo/src/arbology/exact/BadCharacterShiftTable.h
+++ b/alib2algo/src/arbology/exact/BadCharacterShiftTable.h
@@ -24,7 +24,7 @@ namespace exact {
  * Computation of BCS table for BMH from MI(E+\eps)-EVY course 2014
  * To get rid of zeros in BCS table we ignore last haystack character
  */
-class BadCharacterShiftTable : public std::SingleDispatchFirstStaticParam < std::map < alphabet::RankedSymbol, size_t >, const std::set < alphabet::RankedSymbol > &, tree::RankedTreeBase > {
+class BadCharacterShiftTable : public std::SingleDispatch < std::map < alphabet::RankedSymbol, size_t >, tree::RankedTreeBase > {
 public:
 	/**
 	 * Search for pattern in linear string.
@@ -32,16 +32,8 @@ public:
 	 */
 	static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern );
 
-	/**
-	 * Search for pattern in linear string.
-	 * @return set set of occurences
-	 */
-	static std::map < alphabet::RankedSymbol, size_t > bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::RankedTreeWrapper & pattern );
-
 	static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern );
 
-	static std::map < alphabet::RankedSymbol, size_t > bcs ( const std::set < alphabet::RankedSymbol > & alphabet, const tree::PrefixRankedBarPattern & pattern );
-
 	static BadCharacterShiftTable & getInstance ( ) {
 		static BadCharacterShiftTable res;
 
diff --git a/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp
index e5cf94f113..468de4a994 100644
--- a/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp
+++ b/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp
@@ -33,7 +33,7 @@ auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarTree = BoyerMooreHorspo
 
 std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ) {
 	std::set < unsigned > occ;
-	std::map < alphabet::RankedSymbol, size_t > bcs = BadCharacterShiftTable::bcs ( subject.getAlphabet ( ), pattern );
+	std::map < alphabet::RankedSymbol, size_t > bcs = BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern
 	std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
 
 	 // index to the subject
diff --git a/alib2algo/src/automaton/run/Accept.cpp b/alib2algo/src/automaton/run/Accept.cpp
index da4e7e7896..280c2ab34d 100644
--- a/alib2algo/src/automaton/run/Accept.cpp
+++ b/alib2algo/src/automaton/run/Accept.cpp
@@ -140,7 +140,9 @@ bool Accept::accept(const automaton::DPDA& automaton, const string::LinearString
 
 			break;
 		}
+
 		if(transition == transitions.end()) return false;
+
 		for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back();
 
 		state = transition->second.first;
diff --git a/alib2algo/src/automaton/run/Occurrences.cpp b/alib2algo/src/automaton/run/Occurrences.cpp
index 1dd72424c5..bd92b23a5d 100644
--- a/alib2algo/src/automaton/run/Occurrences.cpp
+++ b/alib2algo/src/automaton/run/Occurrences.cpp
@@ -41,8 +41,8 @@ std::set<unsigned> Occurrences::occurrences(const automaton::DFA& automaton, con
 
 		auto transitions = automaton.getTransitionsFromState(state);
 		auto next = transitions.find(std::make_pair(state, symbol));
-		if(next == transitions.end()) { //makes sence to reset the automaton when there is "unknown" symbol on the input
-			state = automaton.getInitialState();
+		if(next == transitions.end()) {
+			throw exception::AlibException("Transition not present");
 		} else {
 			state = next->second;
 		}
@@ -116,7 +116,10 @@ std::set<unsigned> Occurrences::occurrences(const automaton::DPDA& automaton, co
 
 			break;
 		}
-		if(transition == transitions.end()) return {};
+
+		if(transition == transitions.end())
+			throw exception::AlibException("Transition not present");
+
 		for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back();
 
 		state = transition->second.first;
diff --git a/alib2algo/src/automaton/run/Result.cpp b/alib2algo/src/automaton/run/Result.cpp
index 6c6e476179..ab2a81a0fd 100644
--- a/alib2algo/src/automaton/run/Result.cpp
+++ b/alib2algo/src/automaton/run/Result.cpp
@@ -27,7 +27,8 @@ label::Label Result::result(const automaton::DFA& automaton, const string::Linea
 	for(const alphabet::Symbol& symbol : string.getContent()) {
 		auto transitions = automaton.getTransitionsFromState(state);
 		auto next = transitions.find(std::make_pair(state, symbol));
-		if(next == transitions.end()) return label::Label(label::LabelSetLabel(std::set<label::Label>{}));
+		if(next == transitions.end())
+			throw exception::AlibException("Transition not present");
 		state = next->second;
 	}
 	return state.getName();
@@ -60,7 +61,10 @@ label::Label Result::result(const automaton::DPDA& automaton, const string::Line
 
 			break;
 		}
-		if(transition == transitions.end()) return label::Label(label::LabelSetLabel(std::set<label::Label>{}));
+
+		if(transition == transitions.end())
+			throw exception::AlibException("Transition not present");
+
 		for(auto pop : std::get<2>(transition->first)) pushdownStore.pop_back();
 
 		state = transition->second.first;
diff --git a/alib2algo/src/stringology/exact/BadCharacterShiftTable.cpp b/alib2algo/src/stringology/exact/BadCharacterShiftTable.cpp
index e3a9c6ad28..49fc99ce98 100644
--- a/alib2algo/src/stringology/exact/BadCharacterShiftTable.cpp
+++ b/alib2algo/src/stringology/exact/BadCharacterShiftTable.cpp
@@ -15,15 +15,12 @@ namespace stringology {
 
 namespace exact {
 
-std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<alphabet::Symbol>& alphabet, const string::String& pattern) {
-	return getInstance().dispatch(alphabet, pattern.getData());
-}
-
 std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::String& pattern) {
-	return bcs(pattern.getAlphabet(), pattern);
+	return getInstance().dispatch(pattern.getData());
 }
 
-std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<alphabet::Symbol>& alphabet, const string::LinearString& pattern) {
+std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::LinearString& pattern) {
+	const std::set<alphabet::Symbol>& alphabet = pattern.getAlphabet();
 	std::map<alphabet::Symbol, size_t> bcs;
 
 	/* Initialization of BCS to the length of the needle. */
@@ -46,10 +43,6 @@ std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const std::set<al
 
 auto BadCharacterShiftTableLinearString = BadCharacterShiftTable::RegistratorWrapper<std::map<alphabet::Symbol, size_t>, string::LinearString>(BadCharacterShiftTable::getInstance(), BadCharacterShiftTable::bcs);
 
-std::map<alphabet::Symbol, size_t> BadCharacterShiftTable::bcs(const string::LinearString& pattern) {
-	return bcs(pattern.getAlphabet(), pattern);
-}
-
 } /* namespace exact */
 
 } /* namespace stringology */
diff --git a/alib2algo/src/stringology/exact/BadCharacterShiftTable.h b/alib2algo/src/stringology/exact/BadCharacterShiftTable.h
index d840582268..f53ca94298 100644
--- a/alib2algo/src/stringology/exact/BadCharacterShiftTable.h
+++ b/alib2algo/src/stringology/exact/BadCharacterShiftTable.h
@@ -23,7 +23,7 @@ namespace exact {
  * Computation of BCS table for BMH from MI(E+\eps)-EVY course 2014
  * To get rid of zeros in BCS table we ignore last haystack character
  */
-class BadCharacterShiftTable : public std::SingleDispatchFirstStaticParam < std::map < alphabet::Symbol, size_t >, const std::set < alphabet::Symbol > &, string::StringBase > {
+class BadCharacterShiftTable : public std::SingleDispatch < std::map < alphabet::Symbol, size_t >, string::StringBase > {
 public:
 	/**
 	 * Search for pattern in linear string.
@@ -31,16 +31,8 @@ public:
 	 */
 	static std::map < alphabet::Symbol, size_t > bcs ( const string::String & pattern );
 
-	/**
-	 * Search for pattern in linear string.
-	 * @return set set of occurences
-	 */
-	static std::map < alphabet::Symbol, size_t > bcs ( const std::set < alphabet::Symbol > & alphabet, const string::String & pattern );
-
 	static std::map < alphabet::Symbol, size_t > bcs ( const string::LinearString & pattern );
 
-	static std::map < alphabet::Symbol, size_t > bcs ( const std::set < alphabet::Symbol > & alphabet, const string::LinearString & pattern );
-
 	static BadCharacterShiftTable & getInstance ( ) {
 		static BadCharacterShiftTable res;
 
diff --git a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp
index 836c27c7fe..5678acaab7 100644
--- a/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp
+++ b/alib2algo/src/stringology/exact/BoyerMooreHorspool.cpp
@@ -25,18 +25,18 @@ std::set<unsigned> BoyerMooreHorspool::match(const string::String& subject, cons
 std::set<unsigned> BoyerMooreHorspool::match(const string::LinearString& string, const string::LinearString& pattern)
 {
 	std::set<unsigned> occ;
-	std::map<alphabet::Symbol, size_t> bcs = BadCharacterShiftTable::bcs(string.getAlphabet(), pattern);
+	std::map<alphabet::Symbol, size_t> bcs = BadCharacterShiftTable::bcs(pattern); //NOTE: the subjects alphabet must be a subset or equal to the pattern
 
 	size_t haystack_offset = 0;
 	while(haystack_offset + pattern.getContent().size() <= string.getContent().size()) {
 		size_t i = pattern.getContent().size();
-		while(i > 0 && string.getContent().at(haystack_offset + i - 1) == pattern.getContent().at(i - 1)) {
+		while(i > 0 && string.getContent()[haystack_offset + i - 1] == pattern.getContent()[i - 1]) {
 			i--;
 		}
 
 		// Yay, there is match!!!
 		if(i == 0) occ.insert(haystack_offset);
-		haystack_offset += bcs[string.getContent().at(haystack_offset + pattern.getContent().size() - 1)];
+		haystack_offset += bcs[string.getContent()[haystack_offset + pattern.getContent().size() - 1]];
 		//std::cout << haystack_offset << std::endl;
 	}
 	return occ;
diff --git a/tests.aarbology.sh b/tests.aarbology.sh
index d0dbdedf63..d1a5fc9d28 100755
--- a/tests.aarbology.sh
+++ b/tests.aarbology.sh
@@ -209,8 +209,8 @@ function runTestPattern {
 	outputResults
 }
 
-runTestSubtree "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarTree -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
+runTestSubtree "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarTree -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
 runTestSubtree "Exact Subtree Automaton" "./arun2 -t occurrences -a <(./aarbology2 -a exactSubtreeMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
 
-runTestPattern "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
+runTestPattern "Exact Boyer Moore Horspool" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
 runTestPattern "Exact Knuth Morris Pratt" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
diff --git a/tests.astringology.sh b/tests.astringology.sh
index 974d5f288d..4dda5001c2 100755
--- a/tests.astringology.sh
+++ b/tests.astringology.sh
@@ -163,6 +163,6 @@ function runTest {
 	outputResults
 }
 
-runTest "Exact Boyer Moore Horspool" "./astringology2 -a boyerMooreHorspool -s \"\$SUBJECT_FILE\" -p \"\$PATTERN_FILE\" | ./astat2 -p size --set"
-runTest "Exact Matching Automaton" "./arun2 -t occurrences -a <(./astringology2 -a exactMatchingAutomaton -p \"\$PATTERN_FILE\" | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
+runTest "Exact Boyer Moore Horspool" "./astringology2 -a boyerMooreHorspool -s \"\$SUBJECT_FILE\" -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./astat2 -p size --set"
+runTest "Exact Matching Automaton" "./arun2 -t occurrences -a <(./astringology2 -a exactMatchingAutomaton -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
 
-- 
GitLab