From 36c66ddead7fe44d7cc9fbda93515d37616c717d Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Fri, 12 Feb 2016 12:32:32 +0100
Subject: [PATCH] deadzone on trees on PrefixRanked notation

---
 ...neUsingBadCharacterShiftAndBorderArray.cpp | 51 +++++++++++++++++++
 ...ZoneUsingBadCharacterShiftAndBorderArray.h |  4 ++
 tests.aarbology.sh                            |  5 +-
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.cpp b/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.cpp
index 6592f0b27d..156c6b9712 100644
--- a/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.cpp
+++ b/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.cpp
@@ -14,6 +14,8 @@
 #include <tree/Tree.h>
 #include <tree/ranked/PrefixRankedBarTree.h>
 #include <tree/ranked/PrefixRankedBarPattern.h>
+#include <tree/ranked/PrefixRankedTree.h>
+#include <tree/ranked/PrefixRankedPattern.h>
 #include <alphabet/RankedSymbol.h>
 
 #include <map>
@@ -76,6 +78,55 @@ void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsign
 
 auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedBarTreePrefixRankedBarPattern = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( DeadZoneUsingBadCharacterShiftAndBorderArray::getInstance ( ), DeadZoneUsingBadCharacterShiftAndBorderArray::match );
 
+std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) {
+	return match ( subject, tree::PrefixRankedPattern ( pattern ) );
+}
+
+auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedTreePrefixRankedTree = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedTree > ( DeadZoneUsingBadCharacterShiftAndBorderArray::getInstance ( ), DeadZoneUsingBadCharacterShiftAndBorderArray::match );
+
+std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ) {
+	std::set < unsigned > occ;
+	std::map < alphabet::RankedSymbol, size_t > bbcs = ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
+	std::vector < size_t > fba = BorderArrayNaive::ba ( pattern );
+	std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
+
+	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
+	return occ;
+}
+
+void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern, std::vector < size_t > & fba, std::map < alphabet::RankedSymbol, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
+	if ( low >= high ) return;
+
+	int i = ( low + high ) / 2;
+
+	 // index to the pattern
+	unsigned j = 0;
+
+	 // offset to the subject
+	unsigned offset = i;
+
+	while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
+		if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
+			 // match of symbol
+			offset++;
+		else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
+			 // match of variable with subtree
+			offset = subjectSubtreeJumpTable[offset];
+		else
+			break;
+
+		j++;
+	}
+
+	 // match was found
+	if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
+
+	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
+	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
+}
+
+auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedTreePrefixRankedPattern = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( DeadZoneUsingBadCharacterShiftAndBorderArray::getInstance ( ), DeadZoneUsingBadCharacterShiftAndBorderArray::match );
+
 } /* namespace exact */
 
 } /* namespace arbology */
diff --git a/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.h b/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.h
index 50a4354da8..020eff5239 100644
--- a/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.h
+++ b/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.h
@@ -33,6 +33,10 @@ public:
 	static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern );
 	static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern, std::vector < size_t > & fba, std::map < alphabet::RankedSymbol, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high );
 
+	static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern );
+	static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern );
+	static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern, std::vector < size_t > & fba, std::map < alphabet::RankedSymbol, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high );
+
 	static DeadZoneUsingBadCharacterShiftAndBorderArray & getInstance ( ) {
 		static DeadZoneUsingBadCharacterShiftAndBorderArray res;
 
diff --git a/tests.aarbology.sh b/tests.aarbology.sh
index 3912b4f007..792efe4916 100755
--- a/tests.aarbology.sh
+++ b/tests.aarbology.sh
@@ -257,8 +257,6 @@ function runTestPatternEnds {
 	outputResults
 }
 
-runTestPattern "Exact Dead Zone Using Bad Character Shift And Border Array (Pattern PrefixRankedBar)" "./aarbology2 -a deadZoneUsingBadCharacterShiftAndBorderArray -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
-
 runTestPatternEnds "Exact Pattern Matching Automaton (PrefixRanked)" "./aarbology2 -a exactPatternMatchingAutomaton -p <(./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <( ./alphabetManip2 -o get -i \"\$SUBJECT_FILE\" ) ) ) | ./adeterminize2 | ./arun2 -t occurrences -a - -i <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" | ./acast2 -t LinearString ) | ./astat2 -p size --set"
 
 RAND_SIZE_SUBJECT=100
@@ -273,5 +271,8 @@ runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRankedBar)" "
 runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
 runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRankedBar)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
 runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRanked)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
+runTestPattern "Exact Dead Zone Using Bad Character Shift And Border Array (Pattern PrefixRanked)" "./aarbology2 -a deadZoneUsingBadCharacterShiftAndBorderArray -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
+runTestPattern "Exact Dead Zone Using Bad Character Shift And Border Array (Pattern PrefixRankedBar)" "./aarbology2 -a deadZoneUsingBadCharacterShiftAndBorderArray -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
+
 runTestPattern "Exact Pattern Matching Automaton (Pattern Tree)" "./arun2 -t occurrences -a <(./aarbology2 -a exactPatternMatchingAutomaton -p <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\")) | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size --set"
 runTestPattern "Exact Pattern Matching Automaton (PrefixRankedBar)" "./aarbology2 -a exactPatternMatchingAutomaton -p <(./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <( ./alphabetManip2 -o get -i \"\$SUBJECT_FILE\" ) ) ) | ./adeterminize2 | ./arun2 -t occurrences -a - -i <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" | ./acast2 -t LinearString ) | ./astat2 -p size --set"
-- 
GitLab