diff --git a/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp b/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d457596d7174279b98fce32c893d1847f0c5665 --- /dev/null +++ b/alib2algo/src/stringology/query/SuffixAutomatonFactors.cpp @@ -0,0 +1,19 @@ +/* + * SuffixAutomatonFactors.cpp + * + * Created on: 10. 1. 2017 + * Author: Jan Travnicek + */ + +#include "SuffixAutomatonFactors.h" +#include <registration/AlgoRegistration.hpp> + +namespace stringology { + +namespace query { + +auto SuffixAutomatonFactorsLinearString = registration::AbstractRegister < SuffixAutomatonFactors, ext::set < unsigned >, const automaton::DFA < > &, const string::LinearString < > & > ( SuffixAutomatonFactors::query ); + +} /* namespace query */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/query/SuffixAutomatonFactors.h b/alib2algo/src/stringology/query/SuffixAutomatonFactors.h new file mode 100644 index 0000000000000000000000000000000000000000..42a0340b780cdba9742b03b4be5f27c98fe75e46 --- /dev/null +++ b/alib2algo/src/stringology/query/SuffixAutomatonFactors.h @@ -0,0 +1,76 @@ +/* + * SuffixAutomatonFactors.h + * + * Created on: 10. 1. 2017 + * Author: Jan Travnicek + */ + +#ifndef SUFFIX_AUTOMATON_FACTORS_H_ +#define SUFFIX_AUTOMATON_FACTORS_H_ + +#include <automaton/FSM/DFA.h> +#include <string/LinearString.h> + +#include <automaton/run/Run.h> +# + +namespace stringology { + +namespace query { + +/** + * Query suffix trie for given string. + * + * Source: ?? + */ + +class SuffixAutomatonFactors { +public: + /** + * Query a suffix automaton + * @param suffix automaton to query + * @param string string to query by + * @return occurences of factors + */ + template < class SymbolType, class StateType > + static ext::set < unsigned > query ( const automaton::DFA < SymbolType, StateType > & suffixAutomaton, const string::LinearString < SymbolType > & string ); + +}; + +template < class SymbolType, class StateType > +ext::set < unsigned > SuffixAutomatonFactors::query ( const automaton::DFA < SymbolType, StateType > & suffixAutomaton, const string::LinearString < SymbolType > & string ) { + ext::tuple < bool, StateType, ext::set < unsigned > > run = automaton::run::Run::calculateState ( suffixAutomaton, string ); + if ( ! std::get < 0 > ( run ) ) + return { }; + + std::deque < std::pair < StateType, unsigned > > open = { { std::get < 1 > ( run ), 0u } }; + ext::vector < unsigned > tmp; + unsigned max = 0; + while ( ! open.empty ( ) ) { + std::pair < StateType, unsigned > cur = std::move ( open.back ( ) ); + open.pop_back ( ); + + if ( suffixAutomaton.getFinalStates ( ).count ( cur.first ) ) + tmp.push_back ( cur.second ); + + if ( cur.second > max ) + max = cur.second; + + for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( cur.first ) ) + open.emplace_back ( transition.second, cur.second + 1 ); + } + + unsigned subjectSize = max + string.getContent ( ).size ( ); + + ext::set < unsigned > res; + for ( unsigned dist : tmp ) + res.insert ( subjectSize - dist ); + + return res; +} + +} /* namespace query */ + +} /* namespace stringology */ + +#endif /* SUFFIX_AUTOMATON_FACTORS_H_ */ diff --git a/aquery2/src/aquery.cpp b/aquery2/src/aquery.cpp index ca1733ebfd7aa4b9aa3188c633d0e82fa3fb9b11..2f5fdfe86c2966c3938bdc5cf04abfe47e975f28 100644 --- a/aquery2/src/aquery.cpp +++ b/aquery2/src/aquery.cpp @@ -25,6 +25,7 @@ int main ( int argc, char * argv[] ) { std::vector < std::string > allowed; allowed.push_back ( "suffixTrieFactors" ); allowed.push_back ( "suffixArrayFactors" ); + allowed.push_back ( "suffixAutomatonFactors" ); allowed.push_back ( "positionHeapFactors" ); allowed.push_back ( "bitParallelismFactors" ); allowed.push_back ( "compressedBitParallelismFactors" ); @@ -68,6 +69,7 @@ int main ( int argc, char * argv[] ) { if ( query.getValue ( ) == "suffixTrieFactors" || query.getValue ( ) == "suffixArrayFactors" + || query.getValue ( ) == "suffixAutomatonFactors" || query.getValue ( ) == "positionHeapFactors" || query.getValue ( ) == "bitParallelismFactors" || query.getValue ( ) == "compressedBitParallelismFactors" @@ -96,6 +98,8 @@ int main ( int argc, char * argv[] ) { cliCommand = "execute stringology::query::SuffixTrieFactors $index $pattern > $output"; } else if ( query.getValue ( ) == "suffixArrayFactors" ) { cliCommand = "execute stringology::query::SuffixArrayFactors $index $pattern > $output"; + } else if ( query.getValue ( ) == "suffixAutomatonFactors" ) { + cliCommand = "execute stringology::query::SuffixAutomatonFactors $index $pattern > $output"; } else if ( query.getValue ( ) == "positionHeapFactors" ) { cliCommand = "execute stringology::query::PositionHeapFactors $index $pattern > $output"; } else if ( query.getValue ( ) == "bitParallelismFactors" ) {