From cfd78d2d4dd06651ec506ed2157806251833da06 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Thu, 26 Oct 2017 13:52:39 +0200
Subject: [PATCH] add suffix automaton query algo

---
 .../query/SuffixAutomatonFactors.cpp          | 19 +++++
 .../query/SuffixAutomatonFactors.h            | 76 +++++++++++++++++++
 aquery2/src/aquery.cpp                        |  4 +
 3 files changed, 99 insertions(+)
 create mode 100644 alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp
 create mode 100644 alib2algo/src/stringology/query/SuffixAutomatonFactors.h

diff --git a/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp b/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp
new file mode 100644
index 0000000000..7d457596d7
--- /dev/null
+++ b/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp
@@ -0,0 +1,19 @@
+/*
+ * SuffixAutomatonFactors.cpp
+ *
+ *  Created on: 10. 1. 2017
+ *      Author: Jan Travnicek
+ */
+
+#include "SuffixAutomatonFactors.h"
+#include <registration/AlgoRegistration.hpp>
+
+namespace stringology {
+
+namespace query {
+
+auto SuffixAutomatonFactorsLinearString = registration::AbstractRegister < SuffixAutomatonFactors, ext::set < unsigned >, const automaton::DFA < > &, const string::LinearString < > & > ( SuffixAutomatonFactors::query );
+
+} /* namespace query */
+
+} /* namespace stringology */
diff --git a/alib2algo/src/stringology/query/SuffixAutomatonFactors.h b/alib2algo/src/stringology/query/SuffixAutomatonFactors.h
new file mode 100644
index 0000000000..42a0340b78
--- /dev/null
+++ b/alib2algo/src/stringology/query/SuffixAutomatonFactors.h
@@ -0,0 +1,76 @@
+/*
+ * SuffixAutomatonFactors.h
+ *
+ *  Created on: 10. 1. 2017
+ *      Author: Jan Travnicek
+ */
+
+#ifndef SUFFIX_AUTOMATON_FACTORS_H_
+#define SUFFIX_AUTOMATON_FACTORS_H_
+
+#include <automaton/FSM/DFA.h>
+#include <string/LinearString.h>
+
+#include <automaton/run/Run.h>
+#
+
+namespace stringology {
+
+namespace query {
+
+/**
+ * Query suffix trie for given string.
+ *
+ * Source: ??
+ */
+
+class SuffixAutomatonFactors {
+public:
+	/**
+	 * Query a suffix automaton
+	 * @param suffix automaton to query
+	 * @param string string to query by
+	 * @return occurences of factors
+	 */
+	template < class SymbolType, class StateType >
+	static ext::set < unsigned > query ( const automaton::DFA < SymbolType, StateType > & suffixAutomaton, const string::LinearString < SymbolType > & string );
+
+};
+
+template < class SymbolType, class StateType >
+ext::set < unsigned > SuffixAutomatonFactors::query ( const automaton::DFA < SymbolType, StateType > & suffixAutomaton, const string::LinearString < SymbolType > & string ) {
+	ext::tuple < bool, StateType, ext::set < unsigned > > run = automaton::run::Run::calculateState ( suffixAutomaton, string );
+	if ( ! std::get < 0 > ( run ) )
+		return { };
+
+	std::deque < std::pair < StateType, unsigned > > open = { { std::get < 1 > ( run ), 0u } };
+	ext::vector < unsigned > tmp;
+	unsigned max = 0;
+	while ( ! open.empty ( ) ) {
+		std::pair < StateType, unsigned > cur = std::move ( open.back ( ) );
+		open.pop_back ( );
+
+		if ( suffixAutomaton.getFinalStates ( ).count ( cur.first ) )
+			tmp.push_back ( cur.second );
+
+		if ( cur.second > max )
+			max = cur.second;
+
+		for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( cur.first ) )
+			open.emplace_back ( transition.second, cur.second + 1 );
+	}
+
+	unsigned subjectSize = max + string.getContent ( ).size ( );
+
+	ext::set < unsigned > res;
+	for ( unsigned dist : tmp )
+		res.insert ( subjectSize - dist );
+
+	return res;
+}
+
+} /* namespace query */
+
+} /* namespace stringology */
+
+#endif /* SUFFIX_AUTOMATON_FACTORS_H_ */
diff --git a/aquery2/src/aquery.cpp b/aquery2/src/aquery.cpp
index ca1733ebfd..2f5fdfe86c 100644
--- a/aquery2/src/aquery.cpp
+++ b/aquery2/src/aquery.cpp
@@ -25,6 +25,7 @@ int main ( int argc, char * argv[] ) {
 		std::vector < std::string > allowed;
 		allowed.push_back ( "suffixTrieFactors" );
 		allowed.push_back ( "suffixArrayFactors" );
+		allowed.push_back ( "suffixAutomatonFactors" );
 		allowed.push_back ( "positionHeapFactors" );
 		allowed.push_back ( "bitParallelismFactors" );
 		allowed.push_back ( "compressedBitParallelismFactors" );
@@ -68,6 +69,7 @@ int main ( int argc, char * argv[] ) {
 
 		if ( query.getValue ( ) == "suffixTrieFactors"
 		  || query.getValue ( ) == "suffixArrayFactors"
+		  || query.getValue ( ) == "suffixAutomatonFactors"
 		  || query.getValue ( ) == "positionHeapFactors"
 		  || query.getValue ( ) == "bitParallelismFactors"
 		  || query.getValue ( ) == "compressedBitParallelismFactors"
@@ -96,6 +98,8 @@ int main ( int argc, char * argv[] ) {
 			cliCommand = "execute stringology::query::SuffixTrieFactors $index $pattern > $output";
 		} else if ( query.getValue ( ) == "suffixArrayFactors" ) {
 			cliCommand = "execute stringology::query::SuffixArrayFactors $index $pattern > $output";
+		} else if ( query.getValue ( ) == "suffixAutomatonFactors" ) {
+			cliCommand = "execute stringology::query::SuffixAutomatonFactors $index $pattern > $output";
 		} else if ( query.getValue ( ) == "positionHeapFactors" ) {
 			cliCommand = "execute stringology::query::PositionHeapFactors $index $pattern > $output";
 		} else if ( query.getValue ( ) == "bitParallelismFactors" ) {
-- 
GitLab