Skip to content
Snippets Groups Projects
Commit 6192d262 authored by Tomáš Pecka's avatar Tomáš Pecka Committed by Tomáš Pecka
Browse files

algo: Split NondeterministicExactSuffixEpsilonAutomaton into NFA and EpsilonNFA variants

parent 9fbf893f
No related branches found
No related tags found
1 merge request!96Bp shushiri rebase
...@@ -7,6 +7,6 @@ ...@@ -7,6 +7,6 @@
   
namespace { namespace {
   
auto SuffixAutomatonNondeterministicLinearString = registration::AbstractRegister < stringology::indexing::NondeterministicExactSuffixAutomaton, automaton::EpsilonNFA < DefaultSymbolType, unsigned >, const string::LinearString < > & > ( stringology::indexing::NondeterministicExactSuffixAutomaton::construct ); auto SuffixAutomatonNondeterministicLinearString = registration::AbstractRegister < stringology::indexing::NondeterministicExactSuffixAutomaton, automaton::NFA < DefaultSymbolType, unsigned >, const string::LinearString < > & > ( stringology::indexing::NondeterministicExactSuffixAutomaton::construct );
   
} /* namespace */ } /* namespace */
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#ifndef NONDETERMINISTIC_EXACT_SUFFIX_AUTOMATON_H_ #ifndef NONDETERMINISTIC_EXACT_SUFFIX_AUTOMATON_H_
#define NONDETERMINISTIC_EXACT_SUFFIX_AUTOMATON_H_ #define NONDETERMINISTIC_EXACT_SUFFIX_AUTOMATON_H_
   
#include <automaton/FSM/EpsilonNFA.h> #include <automaton/FSM/NFA.h>
#include <string/LinearString.h> #include <string/LinearString.h>
   
namespace stringology { namespace stringology {
...@@ -19,13 +19,13 @@ public: ...@@ -19,13 +19,13 @@ public:
* @return nondeterministic suffix automaton for given pattern. * @return nondeterministic suffix automaton for given pattern.
*/ */
template < class SymbolType > template < class SymbolType >
static automaton::EpsilonNFA < SymbolType, unsigned > construct ( const string::LinearString < SymbolType > & pattern ); static automaton::NFA < SymbolType, unsigned > construct ( const string::LinearString < SymbolType > & pattern );
   
}; };
   
template < class SymbolType > template < class SymbolType >
automaton::EpsilonNFA < SymbolType, unsigned > NondeterministicExactSuffixAutomaton::construct ( const string::LinearString < SymbolType > & pattern ) { automaton::NFA < SymbolType, unsigned > NondeterministicExactSuffixAutomaton::construct ( const string::LinearString < SymbolType > & pattern ) {
automaton::EpsilonNFA < SymbolType, unsigned > nfaSuffixAutomaton ( 0 ); automaton::NFA < SymbolType, unsigned > nfaSuffixAutomaton ( 0 );
   
nfaSuffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) ); nfaSuffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) );
   
...@@ -33,10 +33,10 @@ automaton::EpsilonNFA < SymbolType, unsigned > NondeterministicExactSuffixAutoma ...@@ -33,10 +33,10 @@ automaton::EpsilonNFA < SymbolType, unsigned > NondeterministicExactSuffixAutoma
for ( const SymbolType & symbol : pattern.getContent ( ) ) { for ( const SymbolType & symbol : pattern.getContent ( ) ) {
nfaSuffixAutomaton.addState ( ++ i ); nfaSuffixAutomaton.addState ( ++ i );
nfaSuffixAutomaton.addTransition ( i - 1, symbol, i ); nfaSuffixAutomaton.addTransition ( i - 1, symbol, i );
nfaSuffixAutomaton.addTransition ( 0, i ); nfaSuffixAutomaton.addTransition ( 0, symbol, i );
} }
   
nfaSuffixAutomaton.addFinalState ( i ); nfaSuffixAutomaton.setFinalStates ( { 0, i } );
   
return nfaSuffixAutomaton; return nfaSuffixAutomaton;
} }
......
/*
* Author: Radovan Cerveny
*/
#include "NondeterministicExactSuffixEpsilonAutomaton.h"
#include <registration/AlgoRegistration.hpp>
namespace {
auto SuffixAutomatonNondeterministicLinearString = registration::AbstractRegister < stringology::indexing::NondeterministicExactSuffixEpsilonAutomaton, automaton::EpsilonNFA < DefaultSymbolType, unsigned >, const string::LinearString < > & > ( stringology::indexing::NondeterministicExactSuffixEpsilonAutomaton::construct );
} /* namespace */
/*
* Author: Radovan Cerveny
*/
#ifndef NONDETERMINISTIC_EXACT_SUFFIX_EPSILON_AUTOMATON_H_
#define NONDETERMINISTIC_EXACT_SUFFIX_EPSILON_AUTOMATON_H_
#include <automaton/FSM/EpsilonNFA.h>
#include <string/LinearString.h>
namespace stringology {
namespace indexing {
class NondeterministicExactSuffixEpsilonAutomaton {
public:
/**
* Nondeterministic construction of nondeterministic suffix automaton for given pattern.
* @return nondeterministic suffix automaton for given pattern.
*/
template < class SymbolType >
static automaton::EpsilonNFA < SymbolType, unsigned > construct ( const string::LinearString < SymbolType > & pattern );
};
template < class SymbolType >
automaton::EpsilonNFA < SymbolType, unsigned > NondeterministicExactSuffixEpsilonAutomaton::construct ( const string::LinearString < SymbolType > & pattern ) {
automaton::EpsilonNFA < SymbolType, unsigned > nfaSuffixAutomaton ( 0 );
nfaSuffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) );
unsigned i = 0;
for ( const SymbolType & symbol : pattern.getContent ( ) ) {
nfaSuffixAutomaton.addState ( ++ i );
nfaSuffixAutomaton.addTransition ( i - 1, symbol, i );
nfaSuffixAutomaton.addTransition ( 0, i );
}
nfaSuffixAutomaton.addFinalState ( i );
return nfaSuffixAutomaton;
}
} /* namespace indexing */
} /* namespace stringology */
#endif /* NONDETERMINISTIC_EXACT_SUFFIX_EPSILON_AUTOMATON_H_ */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include <automaton/simplify/Minimize.h> #include <automaton/simplify/Minimize.h>
#include <automaton/simplify/Rename.h> #include <automaton/simplify/Rename.h>
#include <automaton/simplify/EpsilonRemoverIncoming.h> #include <automaton/simplify/EpsilonRemoverIncoming.h>
#include <stringology/indexing/NondeterministicExactSuffixAutomaton.h> #include <stringology/indexing/NondeterministicExactSuffixEpsilonAutomaton.h>
#include <stringology/indexing/ExactSuffixAutomaton.h> #include <stringology/indexing/ExactSuffixAutomaton.h>
   
namespace stringology { namespace stringology {
...@@ -36,7 +36,7 @@ indexes::stringology::SuffixAutomaton < SymbolType > NaiveDAWGMatcherConstructio ...@@ -36,7 +36,7 @@ indexes::stringology::SuffixAutomaton < SymbolType > NaiveDAWGMatcherConstructio
reverse ( patternData.begin ( ), patternData.end ( ) ); reverse ( patternData.begin ( ), patternData.end ( ) );
string::LinearString < SymbolType > reversedPattern ( pattern.getAlphabet ( ), std::move ( patternData ) ); string::LinearString < SymbolType > reversedPattern ( pattern.getAlphabet ( ), std::move ( patternData ) );
   
automaton::EpsilonNFA < SymbolType, unsigned > nfaSuffixAutomaton = stringology::indexing::NondeterministicExactSuffixAutomaton::construct ( reversedPattern ); automaton::EpsilonNFA < SymbolType, unsigned > nfaSuffixAutomaton = stringology::indexing::NondeterministicExactSuffixEpsilonAutomaton::construct ( reversedPattern );
automaton::DFA < SymbolType, unsigned > res = automaton::simplify::Rename::rename ( automaton::simplify::Minimize::minimize ( automaton::determinize::Determinize::determinize ( automaton::simplify::EpsilonRemoverIncoming::remove ( nfaSuffixAutomaton ) ) ) ); automaton::DFA < SymbolType, unsigned > res = automaton::simplify::Rename::rename ( automaton::simplify::Minimize::minimize ( automaton::determinize::Determinize::determinize ( automaton::simplify::EpsilonRemoverIncoming::remove ( nfaSuffixAutomaton ) ) ) );
   
return indexes::stringology::SuffixAutomaton < SymbolType > ( std::move ( res ), pattern.getContent ( ).size ( ) ); return indexes::stringology::SuffixAutomaton < SymbolType > ( std::move ( res ), pattern.getContent ( ).size ( ) );
......
...@@ -2,6 +2,14 @@ ...@@ -2,6 +2,14 @@
   
#include "string/LinearString.h" #include "string/LinearString.h"
#include "stringology/indexing/ExactSuffixAutomaton.h" #include "stringology/indexing/ExactSuffixAutomaton.h"
#include "stringology/indexing/NondeterministicExactSuffixAutomaton.h"
#include "stringology/indexing/NondeterministicExactSuffixEpsilonAutomaton.h"
#include "automaton/determinize/Determinize.h"
#include "automaton/simplify/Minimize.h"
#include "automaton/simplify/Normalize.h"
#include "automaton/simplify/Trim.h"
#include "automaton/simplify/EpsilonRemoverIncoming.h"
   
TEST_CASE ( "Exact Suffix Automaton", "[unit][algo][stringology][indexing]" ) { TEST_CASE ( "Exact Suffix Automaton", "[unit][algo][stringology][indexing]" ) {
SECTION ( "Test construction" ) { SECTION ( "Test construction" ) {
...@@ -9,6 +17,8 @@ TEST_CASE ( "Exact Suffix Automaton", "[unit][algo][stringology][indexing]" ) { ...@@ -9,6 +17,8 @@ TEST_CASE ( "Exact Suffix Automaton", "[unit][algo][stringology][indexing]" ) {
string::LinearString < char > pattern ( "atatac" ); string::LinearString < char > pattern ( "atatac" );
   
indexes::stringology::SuffixAutomaton < char > suffixAutomaton = stringology::indexing::ExactSuffixAutomaton::construct ( pattern ); indexes::stringology::SuffixAutomaton < char > suffixAutomaton = stringology::indexing::ExactSuffixAutomaton::construct ( pattern );
automaton::NFA < char, unsigned > sa = stringology::indexing::NondeterministicExactSuffixAutomaton::construct ( pattern );
automaton::EpsilonNFA < char, unsigned > ensa = stringology::indexing::NondeterministicExactSuffixEpsilonAutomaton::construct ( pattern );
   
automaton::DFA < char, unsigned > tmp ( 0 ); automaton::DFA < char, unsigned > tmp ( 0 );
   
...@@ -33,9 +43,32 @@ TEST_CASE ( "Exact Suffix Automaton", "[unit][algo][stringology][indexing]" ) { ...@@ -33,9 +43,32 @@ TEST_CASE ( "Exact Suffix Automaton", "[unit][algo][stringology][indexing]" ) {
tmp.addTransition ( 5, 'c', 6 ); tmp.addTransition ( 5, 'c', 6 );
   
indexes::stringology::SuffixAutomaton < char > refSuffixAutomaton ( std::move ( tmp ), 6 ); indexes::stringology::SuffixAutomaton < char > refSuffixAutomaton ( std::move ( tmp ), 6 );
REQUIRE ( suffixAutomaton == refSuffixAutomaton );
const automaton::DFA < char, unsigned > & sa_dfa = suffixAutomaton.getAutomaton ( );
CHECK ( automaton::simplify::Normalize::normalize ( automaton::simplify::Minimize::minimize ( automaton::simplify::Trim::trim ( automaton::determinize::Determinize::determinize ( sa ) ) ) ) ==
automaton::simplify::Normalize::normalize ( automaton::simplify::Minimize::minimize ( automaton::simplify::Trim::trim ( automaton::determinize::Determinize::determinize ( sa_dfa ) ) ) ) );
   
CHECK ( suffixAutomaton == refSuffixAutomaton ); CHECK ( automaton::simplify::Normalize::normalize ( automaton::simplify::Minimize::minimize ( automaton::simplify::Trim::trim ( automaton::determinize::Determinize::determinize ( automaton::simplify::EpsilonRemoverIncoming::remove ( ensa ) ) ) ) ) ==
automaton::simplify::Normalize::normalize ( automaton::simplify::Minimize::minimize ( automaton::simplify::Trim::trim ( automaton::determinize::Determinize::determinize ( sa_dfa ) ) ) ) );
   
core::normalize < indexes::stringology::SuffixAutomaton < char > >::eval ( std::move ( refSuffixAutomaton ) ); core::normalize < indexes::stringology::SuffixAutomaton < char > >::eval ( std::move ( refSuffixAutomaton ) );
} }
SECTION ( "Test equality" ) {
auto pattern = GENERATE ( string::LinearString < char > ( "" ), string::LinearString < char > ( "acacaca" ), string::LinearString < char > ( "hello world" ) );
indexes::stringology::SuffixAutomaton < char > suffixAutomaton = stringology::indexing::ExactSuffixAutomaton::construct ( pattern );
automaton::NFA < char, unsigned > sa = stringology::indexing::NondeterministicExactSuffixAutomaton::construct ( pattern );
automaton::EpsilonNFA < char, unsigned > ensa = stringology::indexing::NondeterministicExactSuffixEpsilonAutomaton::construct ( pattern );
const automaton::DFA < char, unsigned > & sa_dfa = suffixAutomaton.getAutomaton ( );
CHECK ( automaton::simplify::Normalize::normalize ( automaton::simplify::Minimize::minimize ( automaton::simplify::Trim::trim ( automaton::determinize::Determinize::determinize ( sa ) ) ) ) ==
automaton::simplify::Normalize::normalize ( automaton::simplify::Minimize::minimize ( automaton::simplify::Trim::trim ( automaton::determinize::Determinize::determinize ( sa_dfa ) ) ) ) );
CHECK ( automaton::simplify::Normalize::normalize ( automaton::simplify::Minimize::minimize ( automaton::simplify::Trim::trim ( automaton::determinize::Determinize::determinize ( automaton::simplify::EpsilonRemoverIncoming::remove ( ensa ) ) ) ) ) ==
automaton::simplify::Normalize::normalize ( automaton::simplify::Minimize::minimize ( automaton::simplify::Trim::trim ( automaton::determinize::Determinize::determinize ( sa_dfa ) ) ) ) );
}
} }
...@@ -6,9 +6,7 @@ ...@@ -6,9 +6,7 @@
#define EXACT_COVERS_COMPUTATION_H #define EXACT_COVERS_COMPUTATION_H
   
#include <string/LinearString.h> #include <string/LinearString.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <automaton/FSM/NFA.h> #include <automaton/FSM/NFA.h>
#include <automaton/simplify/efficient/EpsilonRemoverIncoming.h>
#include <stringology/indexing/NondeterministicExactSuffixAutomaton.h> #include <stringology/indexing/NondeterministicExactSuffixAutomaton.h>
   
namespace stringology::cover { namespace stringology::cover {
...@@ -29,8 +27,7 @@ public: ...@@ -29,8 +27,7 @@ public:
template < class SymbolType > template < class SymbolType >
ext::set < string::LinearString < SymbolType > > ExactCoversComputation::compute ( const string::LinearString < SymbolType > & pattern ) { ext::set < string::LinearString < SymbolType > > ExactCoversComputation::compute ( const string::LinearString < SymbolType > & pattern ) {
   
automaton::EpsilonNFA < SymbolType, unsigned > suffixNDAwithEpsilonTransitions = stringology::indexing::NondeterministicExactSuffixAutomaton::construct ( pattern ); automaton::NFA < SymbolType, unsigned > suffixNDA = stringology::indexing::NondeterministicExactSuffixAutomaton::construct ( pattern );
automaton::NFA < SymbolType, unsigned > suffixNDA = automaton::simplify::efficient::EpsilonRemoverIncoming::remove ( suffixNDAwithEpsilonTransitions );
   
ext::set < unsigned > previousState ( { suffixNDA.getInitialState ( ) } ); ext::set < unsigned > previousState ( { suffixNDA.getInitialState ( ) } );
ext::vector < SymbolType > inputString = pattern.getContent ( ); ext::vector < SymbolType > inputString = pattern.getContent ( );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment