From a0866678c47dfd36e3cb1139f206368c226437bc Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Sat, 23 Jul 2016 21:49:34 +0200
Subject: [PATCH] add reversed variants for nonlinear bltpm

---
 .../exact/ReversedBadCharacterShiftTable.cpp  |  32 +++--
 .../exact/ReversedBadCharacterShiftTable.h    |   2 +
 .../exact/ReversedBoyerMooreHorspool.cpp      | 118 ++++++++++++++++++
 .../exact/ReversedBoyerMooreHorspool.h        |   2 +
 tests.aarbology.sh                            |   2 +
 5 files changed, 147 insertions(+), 9 deletions(-)

diff --git a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp
index e8e345eb76..96171faaf2 100644
--- a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp
+++ b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.cpp
@@ -8,7 +8,9 @@
 #include "ReversedBadCharacterShiftTable.h"
 
 #include <tree/ranked/PrefixRankedBarPattern.h>
+#include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
 #include <tree/ranked/PrefixRankedPattern.h>
+#include <tree/ranked/PrefixRankedNonlinearPattern.h>
 
 namespace arbology {
 
@@ -19,13 +21,19 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 }
 
 std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedBarPattern & pattern ) {
+	return bcs ( tree::PrefixRankedBarNonlinearPattern ( pattern ) );
+}
+
+auto ReversedBadCharacterShiftTablePrefixRankedBarPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( ReversedBadCharacterShiftTable::bcs );
+
+std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedBarNonlinearPattern & pattern ) {
 	const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( );
 
 	std::map < alphabet::RankedSymbol, size_t > bcs;
 
 	 // initialisation of bcs table to the size of the pattern
 	for ( const alphabet::RankedSymbol & symbol : alphabet ) {
-		if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue;
+		if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( symbol ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue;
 
 		bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) );
 	}
@@ -41,7 +49,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 	// limit the shift by occurrence of the last variable
 
 	for ( const alphabet::RankedSymbol & symbol : alphabet ) {
-		if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue;
+		if ( ( symbol == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( symbol ) ) || ( symbol == pattern.getVariablesBar ( ) ) ) continue;
 
 		size_t tmp = firstSBarOffset;
 
@@ -61,7 +69,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 
 	 // limit the shift by position of symbols within the pattern
 	for ( unsigned i = pattern.getContent ( ).size ( ) - 1; i >= 1; i-- ) { // first symbol is not concerned
-		if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) ) continue;
+		if ( ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) || ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) || ( pattern.getContent ( )[i] == pattern.getVariablesBar ( ) ) ) continue;
 
 		size_t tmp = i;
 
@@ -72,16 +80,22 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 	return bcs;
 }
 
-auto ReversedBadCharacterShiftTablePrefixRankedBarPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarPattern > ( ReversedBadCharacterShiftTable::bcs );
+auto ReversedBadCharacterShiftTablePrefixRankedBarNonlinearPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedBarNonlinearPattern > ( ReversedBadCharacterShiftTable::bcs );
 
 std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedPattern & pattern ) {
+	return bcs ( tree::PrefixRankedNonlinearPattern ( pattern ) );
+}
+
+auto ReversedBadCharacterShiftTablePrefixRankedPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedPattern > ( ReversedBadCharacterShiftTable::bcs );
+
+std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs ( const tree::PrefixRankedNonlinearPattern & pattern ) {
 	const std::set < alphabet::RankedSymbol > & alphabet = pattern.getAlphabet ( );
 
 	std::map < alphabet::RankedSymbol, size_t > bcs;
 
 	 // initialisation of bcs table to the size of the pattern
 	for ( const alphabet::RankedSymbol & symbol : alphabet ) {
-		if ( symbol == pattern.getSubtreeWildcard ( ) ) continue;
+		if ( symbol == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( symbol ) ) continue;
 
 		bcs.insert ( std::make_pair ( symbol, pattern.getContent ( ).size ( ) ) );
 	}
@@ -91,14 +105,14 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 	unsigned firstSOffset = pattern.getContent ( ).size ( ) + 1;
 
 	for ( int i = ( int ) pattern.getContent ( ).size ( ) - 1; i >= 0; i-- )
-		if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) )
+		if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) )
 			firstSOffset = i;
 
 	if ( firstSOffset == 0 ) firstSOffset = 1;
 
 	 // limit the shift by occurrence of the last variable
 	for ( const alphabet::RankedSymbol & symbol : alphabet ) {
-		if ( symbol == pattern.getSubtreeWildcard ( ) ) continue;
+		if ( symbol == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( symbol ) ) continue;
 
 		if ( bcs[symbol] > firstSOffset )
 			bcs[symbol] = firstSOffset;
@@ -106,7 +120,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 
 	 // limit the shift by position of symbols within the pattern
 	for ( unsigned i = pattern.getContent ( ).size ( ) - 1; i >= 1; i-- ) { // first symbol is not concerned
-		if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) ) continue;
+		if ( pattern.getContent ( )[i] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[i] ) ) continue;
 		size_t tmp = i;
 
 		if ( bcs[pattern.getContent ( )[i]] > tmp )
@@ -116,7 +130,7 @@ std::map < alphabet::RankedSymbol, size_t > ReversedBadCharacterShiftTable::bcs
 	return bcs;
 }
 
-auto ReversedBadCharacterShiftTablePrefixRankedPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedPattern > ( ReversedBadCharacterShiftTable::bcs );
+auto ReversedBadCharacterShiftTablePrefixRankedNonlinearPattern = ReversedBadCharacterShiftTable::RegistratorWrapper < std::map < alphabet::RankedSymbol, size_t >, tree::PrefixRankedNonlinearPattern > ( ReversedBadCharacterShiftTable::bcs );
 
 } /* namespace exact */
 
diff --git a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h
index 909c4301ca..2699bfe9d8 100644
--- a/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h
+++ b/alib2algo/src/arbology/exact/ReversedBadCharacterShiftTable.h
@@ -33,7 +33,9 @@ public:
 	static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::RankedTreeWrapper & pattern );
 
 	static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarPattern & pattern );
+	static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedBarNonlinearPattern & pattern );
 	static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedPattern & pattern );
+	static std::map < alphabet::RankedSymbol, size_t > bcs ( const tree::PrefixRankedNonlinearPattern & pattern );
 
 };
 
diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp
index c6ddbfb88e..6d79deba94 100644
--- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp
+++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp
@@ -8,12 +8,15 @@
 #include "ReversedBoyerMooreHorspool.h"
 #include "ReversedBadCharacterShiftTable.h"
 #include "SubtreeJumpTable.h"
+#include "ExactSubtreeRepeatsNaive.h"
 
 #include <tree/Tree.h>
 #include <tree/ranked/PrefixRankedTree.h>
 #include <tree/ranked/PrefixRankedBarTree.h>
 #include <tree/ranked/PrefixRankedPattern.h>
 #include <tree/ranked/PrefixRankedBarPattern.h>
+#include <tree/ranked/PrefixRankedNonlinearPattern.h>
+#include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
 #include <alphabet/RankedSymbol.h>
 
 #include <map>
@@ -75,6 +78,64 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRank
 
 auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( ReversedBoyerMooreHorspool::match );
 
+std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarNonlinearPattern & pattern ) {
+	std::set < unsigned > occ;
+	std::map < alphabet::RankedSymbol, size_t > bcs = ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
+	std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
+	std::map < alphabet::RankedSymbol, alphabet::Symbol > variablesSetting;
+
+	tree::PrefixRankedBarTree repeats = ExactSubtreeRepeatsNaive::repeats ( subject );
+
+	 // index to the subject
+	int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
+
+	 // main loop of the algorithm over all possible indexes where the pattern can start
+	while ( i >= 0 ) {
+		 // clear the current state of variable to subtree repeat
+		variablesSetting.clear();
+
+		// index to the pattern
+		unsigned j = 0;
+
+		 // offset to the subject
+		unsigned offset = i;
+
+		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
+			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
+				 // match of symbol
+				offset = offset + 1;
+				j = j + 1;
+			} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) {
+				 // check nonlinear variable
+				if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
+					auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] );
+
+					if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second )
+						break;
+
+					variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) );
+				}
+
+				 // match of variable with subtree
+				offset = subjectSubtreeJumpTable[offset];
+				j = j + 2;
+			} else {
+				break;
+			}
+		}
+
+		 // match was found
+		if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
+
+		 // shift heuristics
+		i -= bcs[subject.getContent ( )[i]];
+	}
+
+	return occ;
+}
+
+auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarNonlinearPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarNonlinearPattern > ( ReversedBoyerMooreHorspool::match );
+
 std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) {
 	return match ( subject, tree::PrefixRankedPattern ( pattern ) );
 }
@@ -123,6 +184,63 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRank
 
 auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( ReversedBoyerMooreHorspool::match );
 
+std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedNonlinearPattern & pattern ) {
+	std::set < unsigned > occ;
+	std::map < alphabet::RankedSymbol, size_t > bcs = ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
+	std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
+	std::map < alphabet::RankedSymbol, alphabet::Symbol > variablesSetting;
+
+	tree::PrefixRankedTree repeats = ExactSubtreeRepeatsNaive::repeats ( subject );
+
+	 // index to the subject
+	int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
+
+	 // main loop of the algorithm over all possible indexes where the pattern can start
+	while ( i >= 0 ) {
+		 // clear the current state of variable to subtree repeat
+		variablesSetting.clear();
+
+		 // index to the pattern
+		unsigned j = 0;
+
+		 // offset to the subject
+		unsigned offset = i;
+
+		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
+			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
+				 // match of symbol
+				offset = offset + 1;
+			else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
+				 // check nonlinear variable
+				if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
+					auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] );
+
+					if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second )
+						break;
+
+					variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) );
+				}
+
+				 // match of variable with subtree
+				offset = subjectSubtreeJumpTable[offset];
+			} else
+				break;
+
+			j = j + 1;
+		}
+
+		 // match was found
+		if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
+
+		 // shift heristics
+		i -= bcs[subject.getContent ( )[i]];
+	}
+
+	return occ;
+}
+
+auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedNonlinearPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedNonlinearPattern > ( ReversedBoyerMooreHorspool::match );
+
 } /* namespace exact */
 
 } /* namespace arbology */
diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h
index 9837c35d37..ebde38651c 100644
--- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h
+++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h
@@ -30,8 +30,10 @@ public:
 
 	static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern );
 	static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern );
+	static std::set < unsigned > match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarNonlinearPattern & pattern );
 	static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern );
 	static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern );
+	static std::set < unsigned > match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedNonlinearPattern & pattern );
 
 };
 
diff --git a/tests.aarbology.sh b/tests.aarbology.sh
index 7fa8f9b88a..fee36759d7 100755
--- a/tests.aarbology.sh
+++ b/tests.aarbology.sh
@@ -367,6 +367,8 @@ runTestNonlinearPattern "Exact Pattern Match (NonlinearPattern PrefixRankedBar)"
 runTestNonlinearPattern "Exact Boyer Moore Horspool (NonlinearPattern PrefixRankedBar)" "./aarbology2 -a boyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
 runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
 runTestPattern "Exact Reversed Boyer Moore Horspool (Pattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
+runTestNonlinearPattern "Exact Reversed Boyer Moore Horspool (NonlinearPattern PrefixRankedBar)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
+runTestNonlinearPattern "Exact Reversed Boyer Moore Horspool (NonlinearPattern PrefixRanked)" "./aarbology2 -a reversedBoyerMooreHorspool -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedNonlinearPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
 runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRankedBar)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
 runTestPattern "Exact Knuth Morris Pratt (Pattern PrefixRanked)" "./aarbology2 -a knuthMorrisPratt -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size --set"
 runTestPattern "Exact Dead Zone Using Bad Character Shift And Border Array (Pattern PrefixRanked)" "./aarbology2 -a deadZoneUsingBadCharacterShiftAndBorderArray -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) -p <( ./acast2 -t PrefixRankedPattern -i <(./alphabetManip2 -o add -i \"\$PATTERN_FILE\" -a <(./alphabetManip2 -o get -i \"\$SUBJECT_FILE\"))) | ./astat2 -p size --set"
-- 
GitLab