From d34777acbf8a521781c2db4e89c54ff2bd49cd9a Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 26 Oct 2017 13:52:00 +0200 Subject: [PATCH] suffix automaton creation in stringology binary --- .../stringology/indexing/ExactSuffixAutomaton.h | 16 +++++++++++----- astringology2/src/astringology.cpp | 6 +++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h index c190c5ea1a..fa2ecfde36 100644 --- a/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h +++ b/alib2algo/src/stringology/indexing/ExactSuffixAutomaton.h @@ -8,6 +8,8 @@ #include <automaton/FSM/DFA.h> #include <string/LinearString.h> +#include <global/GlobalData.h> + namespace stringology { namespace indexing { @@ -32,8 +34,15 @@ automaton::DFA < SymbolType, unsigned > ExactSuffixAutomaton::construct ( const std::vector < std::pair < unsigned, int > > suffixLinks = { { ( unsigned ) -1, 0 } }; //vector is fine, the state number is exactly the index to the vector unsigned lastState = 0; - for ( const SymbolType & symbol : pattern.getContent ( ) ) + if ( common::GlobalData::verbose ) + std::clog << "String size " << pattern.getContent ( ).size ( ) << std::endl; + + for ( const SymbolType & symbol : pattern.getContent ( ) ) { + if ( common::GlobalData::verbose && lastState % 1000 == 0 ) + std::clog << "Progress " << lastState << std::endl; + suffixAutomatonAddSymbol ( suffixAutomaton, symbol, suffixLinks, lastState ); + } while ( lastState != ( unsigned ) -1 ) { suffixAutomaton.addFinalState ( lastState ); @@ -79,10 +88,7 @@ void ExactSuffixAutomaton::suffixAutomatonAddSymbol ( automaton::DFA < SymbolTyp for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( qState ) ) suffixAutomaton.addTransition ( cloneState, transition.first.second, transition.second ); - while ( kState != ( unsigned ) -1 - && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) != suffixAutomaton.getTransitions ( ).end ( ) - && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second == qState ) { - suffixAutomaton.removeTransition ( kState, symbol, qState ); + while ( kState != ( unsigned ) -1 && suffixAutomaton.removeTransition ( kState, symbol, qState ) ) { suffixAutomaton.addTransition ( kState, symbol, cloneState ); kState = suffixLinks [ kState ].first; } diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index 4a05c94f1c..b40fa1c087 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -45,6 +45,7 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "bndmMatcher" ); allowed.push_back ( "compressedBitParallelIndex" ); allowed.push_back ( "suffixArray" ); + allowed.push_back ( "suffixAutomaton" ); allowed.push_back ( "borderArray" ); allowed.push_back ( "badCharacterShiftTable" ); @@ -95,7 +96,8 @@ int main ( int argc, char * argv[] ) { || algorithm.getValue ( ) == "positionHeap" || algorithm.getValue ( ) == "bitParallelIndex" || algorithm.getValue ( ) == "compressedBitParallelIndex" - || algorithm.getValue ( ) == "suffixArray" ) { + || algorithm.getValue ( ) == "suffixArray" + || algorithm.getValue ( ) == "suffixAutomaton" ) { std::string input; if ( subjectInput.getValue ( ).size ( ) == 0 ) input = "-"; @@ -193,6 +195,8 @@ int main ( int argc, char * argv[] ) { cliCommand = "execute stringology::indexing::CompressedBitParallelIndexConstruction $subject > $output"; } else if ( algorithm.getValue ( ) == "suffixArray" ) { cliCommand = "execute stringology::indexing::SuffixArrayNaive $subject > $output"; + } else if ( algorithm.getValue ( ) == "suffixAutomaton" ) { + cliCommand = "execute stringology::indexing::ExactSuffixAutomaton $subject > $output"; } else if ( algorithm.getValue ( ) == "bndmMatcher" ) { cliCommand = "execute stringology::matching::BNDMMatcherConstruction $pattern > $output"; -- GitLab