From 27f2cd4384a927b5a971e84d594ab979dca7499c Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Tue, 30 Jan 2018 08:49:49 +0100
Subject: [PATCH] nonlinear tree pattern automaton space improvement

---
 .../ExactNonlinearTreePatternAutomaton.h      | 33 ++++++++++++++-----
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.h b/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.h
index 6fdb164606..13ceb138d3 100644
--- a/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.h
+++ b/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.h
@@ -28,13 +28,13 @@ class ExactNonlinearTreePatternAutomaton {
 	static automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > constructInternal ( const tree::PrefixRankedTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables );
 
 	template < class SymbolType, class RankType >
-	static void constructTail ( automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > & res, const tree::PrefixRankedTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, unsigned subtreeId, typename ext::vector < common::ranked_symbol < SymbolType, RankType > >::const_iterator rankedSymbolsIter, unsigned i, typename ext::vector < common::ranked_symbol < unsigned, RankType > >::const_iterator subtreeRepeatsIter );
+	static void constructTail ( automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > & res, const tree::PrefixRankedTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, unsigned subtreeId, bool nonlinearJumps, typename ext::vector < common::ranked_symbol < SymbolType, RankType > >::const_iterator rankedSymbolsIter, unsigned i, typename ext::vector < common::ranked_symbol < unsigned, RankType > >::const_iterator subtreeRepeatsIter );
 
 	template < class SymbolType, class RankType >
 	static automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > constructInternal ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const common::ranked_symbol < SymbolType, RankType > & variablesBar );
 
 	template < class SymbolType, class RankType >
-	static void constructTail ( automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > & res, const tree::PrefixRankedBarTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const common::ranked_symbol < SymbolType, RankType > & variablesBar, unsigned subtreeId, typename ext::vector < common::ranked_symbol < SymbolType, RankType > >::const_iterator rankedSymbolsIter, unsigned i, typename ext::vector < common::ranked_symbol < unsigned, RankType > >::const_iterator subtreeRepeatsIter );
+	static void constructTail ( automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > & res, const tree::PrefixRankedBarTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const common::ranked_symbol < SymbolType, RankType > & variablesBar, unsigned subtreeId, bool nonlinearJumps, typename ext::vector < common::ranked_symbol < SymbolType, RankType > >::const_iterator rankedSymbolsIter, unsigned i, typename ext::vector < common::ranked_symbol < unsigned, RankType > >::const_iterator subtreeRepeatsIter );
 
 public:
 	/**
@@ -53,7 +53,7 @@ public:
 };
 
 template < class SymbolType, class RankType >
-void ExactNonlinearTreePatternAutomaton::constructTail ( automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > & res, const tree::PrefixRankedTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, unsigned subtreeId, typename ext::vector < common::ranked_symbol < SymbolType, RankType > >::const_iterator rankedSymbolsIter, unsigned i, typename ext::vector < common::ranked_symbol < unsigned, RankType > >::const_iterator subtreeRepeatsIter ) {
+void ExactNonlinearTreePatternAutomaton::constructTail ( automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > & res, const tree::PrefixRankedTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, unsigned subtreeId, bool nonlinearJumps, typename ext::vector < common::ranked_symbol < SymbolType, RankType > >::const_iterator rankedSymbolsIter, unsigned i, typename ext::vector < common::ranked_symbol < unsigned, RankType > >::const_iterator subtreeRepeatsIter ) {
 	ext::deque < std::pair < size_t, unsigned > > subtreeJumps;
 	ext::deque < unsigned > subtreeRepeatsStack;
 
@@ -74,7 +74,7 @@ void ExactNonlinearTreePatternAutomaton::constructTail ( automaton::InputDrivenN
 			res.addTransition ( source, subtreeWildcard, currentState );
 
 			for ( const common::ranked_symbol < SymbolType, RankType > & nonlinearVariable : nonlinearVariables )
-				if ( nonlinearVariable != currentNonlinearVariable || subtreeId == subtreeRepeatsStack.back ( ) )
+				if ( nonlinearVariable != currentNonlinearVariable || ( subtreeId == subtreeRepeatsStack.back ( ) && nonlinearJumps ) )
 					res.addTransition ( source, nonlinearVariable, currentState );
 
 			if ( subtreeJumps.size ( ) ) {
@@ -94,6 +94,10 @@ automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, cha
 
 	tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( tree );
 
+	std::vector < unsigned > repeatsFrequency ( repeats.getContent ( ).size ( ), 0 );
+	for ( const common::ranked_symbol < unsigned, RankType > & repeat : repeats.getContent ( ) )
+		++ repeatsFrequency [ repeat.getSymbol ( ) ];
+
 	for ( const common::ranked_symbol < SymbolType, RankType > & symbol : tree.getAlphabet ( ) ) {
 		res.addInputSymbol ( symbol );
 		res.setPushdownStoreOperation ( symbol, ext::vector < char > ( 1, S ), ext::vector < char > ( ( size_t ) symbol.getRank ( ), S ) );
@@ -135,12 +139,16 @@ automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, cha
 					res.addTransition ( source, nonlinearVariable, currentState );
 				else {
 					unsigned subtreeId = subtreeRepeatsStack.back ( );
+					bool multiRepeat = repeatsFrequency [ subtreeId ] > 1;
+					if ( ! multiRepeat )
+						subtreeId = repeats.getContent ( ) [ 0 ].getSymbol ( );
+
 					ext::pair < unsigned, unsigned > targetState = ext::make_pair ( i, subtreeId + 1 );
 
 					res.addState ( targetState );
 					res.addTransition ( source, nonlinearVariable, targetState );
 
-					constructTail ( res, tree, subtreeWildcard, currentNonlinearVariable, nonlinearVariables, subtreeId, rankedSymbolsIter, i, subtreeRepeatsIter );
+					constructTail ( res, tree, subtreeWildcard, currentNonlinearVariable, nonlinearVariables, subtreeId, multiRepeat, rankedSymbolsIter, i, subtreeRepeatsIter );
 				}
 
 			subtreeJumps.pop_back ( );
@@ -159,7 +167,7 @@ automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, cha
 }
 
 template < class SymbolType, class RankType >
-void ExactNonlinearTreePatternAutomaton::constructTail ( automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > & res, const tree::PrefixRankedBarTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const common::ranked_symbol < SymbolType, RankType > & variablesBar, unsigned subtreeId, typename ext::vector < common::ranked_symbol < SymbolType, RankType > >::const_iterator rankedSymbolsIter, unsigned i, typename ext::vector < common::ranked_symbol < unsigned, RankType > >::const_iterator subtreeRepeatsIter ) {
+void ExactNonlinearTreePatternAutomaton::constructTail ( automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, char, ext::pair < unsigned, unsigned > > & res, const tree::PrefixRankedBarTree < SymbolType, RankType > & tree, const common::ranked_symbol < SymbolType, RankType > & subtreeWildcard, const common::ranked_symbol < SymbolType, RankType > & currentNonlinearVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const common::ranked_symbol < SymbolType, RankType > & variablesBar, unsigned subtreeId, bool nonlinearJumps, typename ext::vector < common::ranked_symbol < SymbolType, RankType > >::const_iterator rankedSymbolsIter, unsigned i, typename ext::vector < common::ranked_symbol < unsigned, RankType > >::const_iterator subtreeRepeatsIter ) {
 	ext::deque < unsigned > subtreeJumps;
 	ext::deque < unsigned > subtreeRepeatsStack;
 
@@ -183,7 +191,7 @@ void ExactNonlinearTreePatternAutomaton::constructTail ( automaton::InputDrivenN
 				res.addTransition ( middle, variablesBar, currentState );
 
 				for ( const common::ranked_symbol < SymbolType, RankType > & nonlinearVariable : nonlinearVariables )
-					if ( nonlinearVariable != currentNonlinearVariable || subtreeId == subtreeRepeatsStack.back ( ) )
+					if ( nonlinearVariable != currentNonlinearVariable || ( subtreeId == subtreeRepeatsStack.back ( ) && nonlinearJumps ) )
 						res.addTransition ( source, nonlinearVariable, middle );
 
 				subtreeJumps.pop_back ( );
@@ -203,6 +211,11 @@ automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, cha
 
 	tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( tree );
 
+	std::vector < unsigned > repeatsFrequency ( repeats.getContent ( ).size ( ), 0 );
+	for ( const common::ranked_symbol < unsigned, RankType > & repeat : repeats.getContent ( ) )
+		if ( ! repeats.getBars ( ).count ( repeat ) )
+			++ repeatsFrequency [ repeat.getSymbol ( ) ];
+
 	for ( const common::ranked_symbol < SymbolType, RankType > & symbol : tree.getAlphabet ( ) ) {
 		res.addInputSymbol ( symbol );
 		if ( tree.getBars ( ).count ( symbol ) )
@@ -252,6 +265,10 @@ automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, cha
 					res.addTransition ( source, nonlinearVariable, middle );
 				else {
 					unsigned subtreeId = subtreeRepeatsStack.back ( );
+					bool multiRepeat = repeatsFrequency [ subtreeId ] > 1;
+					if ( ! multiRepeat )
+						subtreeId = repeats.getContent ( ) [ 0 ].getSymbol ( );
+
 					ext::pair < unsigned, unsigned > targetState = ext::make_pair ( i, subtreeId + 1 );
 					ext::pair < unsigned, unsigned > middleState = ext::make_pair ( ~0 - i, subtreeId + 1 );
 
@@ -261,7 +278,7 @@ automaton::InputDrivenNPDA < common::ranked_symbol < SymbolType, RankType >, cha
 					res.addTransition ( source, nonlinearVariable, middleState );
 					res.addTransition ( middleState, variablesBar, targetState );
 
-					constructTail ( res, tree, subtreeWildcard, currentNonlinearVariable, nonlinearVariables, variablesBar, subtreeId, rankedSymbolsIter, i, subtreeRepeatsIter );
+					constructTail ( res, tree, subtreeWildcard, currentNonlinearVariable, nonlinearVariables, variablesBar, subtreeId, multiRepeat, rankedSymbolsIter, i, subtreeRepeatsIter );
 				}
 
 			subtreeJumps.pop_back ( );
-- 
GitLab