From cfd78d2d4dd06651ec506ed2157806251833da06 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 26 Oct 2017 13:52:39 +0200 Subject: [PATCH] add suffix automaton query algo --- .../query/SuffixAutomatonFactors.cpp | 19 +++++ .../query/SuffixAutomatonFactors.h | 76 +++++++++++++++++++ aquery2/src/aquery.cpp | 4 + 3 files changed, 99 insertions(+) create mode 100644 alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp create mode 100644 alib2algo/src/stringology/query/SuffixAutomatonFactors.h diff --git a/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp b/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp new file mode 100644 index 0000000000..7d457596d7 --- /dev/null +++ b/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp @@ -0,0 +1,19 @@ +/* + * SuffixAutomatonFactors.cpp + * + * Created on: 10. 1. 2017 + * Author: Jan Travnicek + */ + +#include "SuffixAutomatonFactors.h" +#include <registration/AlgoRegistration.hpp> + +namespace stringology { + +namespace query { + +auto SuffixAutomatonFactorsLinearString = registration::AbstractRegister < SuffixAutomatonFactors, ext::set < unsigned >, const automaton::DFA < > &, const string::LinearString < > & > ( SuffixAutomatonFactors::query ); + +} /* namespace query */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/query/SuffixAutomatonFactors.h b/alib2algo/src/stringology/query/SuffixAutomatonFactors.h new file mode 100644 index 0000000000..42a0340b78 --- /dev/null +++ b/alib2algo/src/stringology/query/SuffixAutomatonFactors.h @@ -0,0 +1,76 @@ +/* + * SuffixAutomatonFactors.h + * + * Created on: 10. 1. 2017 + * Author: Jan Travnicek + */ + +#ifndef SUFFIX_AUTOMATON_FACTORS_H_ +#define SUFFIX_AUTOMATON_FACTORS_H_ + +#include <automaton/FSM/DFA.h> +#include <string/LinearString.h> + +#include <automaton/run/Run.h> +# + +namespace stringology { + +namespace query { + +/** + * Query suffix trie for given string. + * + * Source: ?? + */ + +class SuffixAutomatonFactors { +public: + /** + * Query a suffix automaton + * @param suffix automaton to query + * @param string string to query by + * @return occurences of factors + */ + template < class SymbolType, class StateType > + static ext::set < unsigned > query ( const automaton::DFA < SymbolType, StateType > & suffixAutomaton, const string::LinearString < SymbolType > & string ); + +}; + +template < class SymbolType, class StateType > +ext::set < unsigned > SuffixAutomatonFactors::query ( const automaton::DFA < SymbolType, StateType > & suffixAutomaton, const string::LinearString < SymbolType > & string ) { + ext::tuple < bool, StateType, ext::set < unsigned > > run = automaton::run::Run::calculateState ( suffixAutomaton, string ); + if ( ! std::get < 0 > ( run ) ) + return { }; + + std::deque < std::pair < StateType, unsigned > > open = { { std::get < 1 > ( run ), 0u } }; + ext::vector < unsigned > tmp; + unsigned max = 0; + while ( ! open.empty ( ) ) { + std::pair < StateType, unsigned > cur = std::move ( open.back ( ) ); + open.pop_back ( ); + + if ( suffixAutomaton.getFinalStates ( ).count ( cur.first ) ) + tmp.push_back ( cur.second ); + + if ( cur.second > max ) + max = cur.second; + + for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( cur.first ) ) + open.emplace_back ( transition.second, cur.second + 1 ); + } + + unsigned subjectSize = max + string.getContent ( ).size ( ); + + ext::set < unsigned > res; + for ( unsigned dist : tmp ) + res.insert ( subjectSize - dist ); + + return res; +} + +} /* namespace query */ + +} /* namespace stringology */ + +#endif /* SUFFIX_AUTOMATON_FACTORS_H_ */ diff --git a/aquery2/src/aquery.cpp b/aquery2/src/aquery.cpp index ca1733ebfd..2f5fdfe86c 100644 --- a/aquery2/src/aquery.cpp +++ b/aquery2/src/aquery.cpp @@ -25,6 +25,7 @@ int main ( int argc, char * argv[] ) { std::vector < std::string > allowed; allowed.push_back ( "suffixTrieFactors" ); allowed.push_back ( "suffixArrayFactors" ); + allowed.push_back ( "suffixAutomatonFactors" ); allowed.push_back ( "positionHeapFactors" ); allowed.push_back ( "bitParallelismFactors" ); allowed.push_back ( "compressedBitParallelismFactors" ); @@ -68,6 +69,7 @@ int main ( int argc, char * argv[] ) { if ( query.getValue ( ) == "suffixTrieFactors" || query.getValue ( ) == "suffixArrayFactors" + || query.getValue ( ) == "suffixAutomatonFactors" || query.getValue ( ) == "positionHeapFactors" || query.getValue ( ) == "bitParallelismFactors" || query.getValue ( ) == "compressedBitParallelismFactors" @@ -96,6 +98,8 @@ int main ( int argc, char * argv[] ) { cliCommand = "execute stringology::query::SuffixTrieFactors $index $pattern > $output"; } else if ( query.getValue ( ) == "suffixArrayFactors" ) { cliCommand = "execute stringology::query::SuffixArrayFactors $index $pattern > $output"; + } else if ( query.getValue ( ) == "suffixAutomatonFactors" ) { + cliCommand = "execute stringology::query::SuffixAutomatonFactors $index $pattern > $output"; } else if ( query.getValue ( ) == "positionHeapFactors" ) { cliCommand = "execute stringology::query::PositionHeapFactors $index $pattern > $output"; } else if ( query.getValue ( ) == "bitParallelismFactors" ) { -- GitLab