From d34777acbf8a521781c2db4e89c54ff2bd49cd9a Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Thu, 26 Oct 2017 13:52:00 +0200
Subject: [PATCH] suffix automaton creation in stringology binary

---
 .../stringology/indexing/ExactSuffixAutomaton.h  | 16 +++++++++++-----
 astringology2/src/astringology.cpp               |  6 +++++-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h
index c190c5ea1a..fa2ecfde36 100644
--- a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h
+++ b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h
@@ -8,6 +8,8 @@
 #include <automaton/FSM/DFA.h>
 #include <string/LinearString.h>
 
+#include <global/GlobalData.h>
+
 namespace stringology {
 
 namespace indexing {
@@ -32,8 +34,15 @@ automaton::DFA < SymbolType, unsigned > ExactSuffixAutomaton::construct ( const
 	std::vector < std::pair < unsigned, int > > suffixLinks = { { ( unsigned ) -1, 0 } }; //vector is fine, the state number is exactly the index to the vector
 	unsigned lastState = 0;
 
-	for ( const SymbolType & symbol : pattern.getContent ( ) )
+	if ( common::GlobalData::verbose )
+		std::clog << "String size " << pattern.getContent ( ).size ( ) << std::endl;
+
+	for ( const SymbolType & symbol : pattern.getContent ( ) ) {
+		if ( common::GlobalData::verbose && lastState % 1000 == 0 )
+			std::clog << "Progress " << lastState << std::endl;
+
 		suffixAutomatonAddSymbol ( suffixAutomaton, symbol, suffixLinks, lastState );
+	}
 
 	while ( lastState != ( unsigned ) -1 ) {
 		suffixAutomaton.addFinalState ( lastState );
@@ -79,10 +88,7 @@ void ExactSuffixAutomaton::suffixAutomatonAddSymbol ( automaton::DFA < SymbolTyp
 			for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( qState ) )
 				suffixAutomaton.addTransition ( cloneState, transition.first.second, transition.second );
 
-			while ( kState != ( unsigned ) -1
-				&& suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) != suffixAutomaton.getTransitions ( ).end ( )
-				&& suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second == qState ) {
-				suffixAutomaton.removeTransition ( kState, symbol, qState );
+			while ( kState != ( unsigned ) -1 && suffixAutomaton.removeTransition ( kState, symbol, qState ) ) {
 				suffixAutomaton.addTransition ( kState, symbol, cloneState );
 				kState = suffixLinks [ kState ].first;
 			}
diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp
index 4a05c94f1c..b40fa1c087 100644
--- a/astringology2/src/astringology.cpp
+++ b/astringology2/src/astringology.cpp
@@ -45,6 +45,7 @@ int main ( int argc, char * argv[] ) {
 		allowed.push_back ( "bndmMatcher" );
 		allowed.push_back ( "compressedBitParallelIndex" );
 		allowed.push_back ( "suffixArray" );
+		allowed.push_back ( "suffixAutomaton" );
 
 		allowed.push_back ( "borderArray" );
 		allowed.push_back ( "badCharacterShiftTable" );
@@ -95,7 +96,8 @@ int main ( int argc, char * argv[] ) {
 		  || algorithm.getValue ( ) == "positionHeap"
 		  || algorithm.getValue ( ) == "bitParallelIndex"
 		  || algorithm.getValue ( ) == "compressedBitParallelIndex"
-		  || algorithm.getValue ( ) == "suffixArray" ) {
+		  || algorithm.getValue ( ) == "suffixArray"
+		  || algorithm.getValue ( ) == "suffixAutomaton" ) {
 			std::string input;
 			if ( subjectInput.getValue ( ).size ( ) == 0 )
 				input = "-";
@@ -193,6 +195,8 @@ int main ( int argc, char * argv[] ) {
 			cliCommand = "execute stringology::indexing::CompressedBitParallelIndexConstruction $subject > $output";
 		} else if ( algorithm.getValue ( ) == "suffixArray" ) {
 			cliCommand = "execute stringology::indexing::SuffixArrayNaive $subject > $output";
+		} else if ( algorithm.getValue ( ) == "suffixAutomaton" ) {
+			cliCommand = "execute stringology::indexing::ExactSuffixAutomaton $subject > $output";
 
 		} else if ( algorithm.getValue ( ) == "bndmMatcher" ) {
 			cliCommand = "execute stringology::matching::BNDMMatcherConstruction $pattern > $output";
-- 
GitLab