Skip to content
Snippets Groups Projects
Commit 2951f910 authored by Radovan Červený's avatar Radovan Červený
Browse files

naive (maybe temporary) construction of suffix automaton

parent 6b949661
No related branches found
No related tags found
1 merge request!16BP_cervera3 - algorithms
......@@ -5,26 +5,23 @@
#include "FactorOracleAutomaton.hpp"
#include <exception/AlibException.h>
#include <string/LinearString.h>
#include <string/Epsilon.h>
#include <deque>
 
namespace stringology {
 
namespace exact {
 
automaton::Automaton FactorOracleAutomaton::construct ( const string::String & text ) {
return getInstance ( ).dispatch ( text.getData ( ) );
automaton::Automaton FactorOracleAutomaton::construct ( const string::String & pattern ) {
return getInstance ( ).dispatch ( pattern.getData ( ) );
}
 
automaton::DFA FactorOracleAutomaton::construct ( const string::LinearString & text ) {
automaton::DFA FactorOracleAutomaton::construct ( const string::LinearString & pattern ) {
automaton::DFA oracle ( automaton::State ( 0 ) );
 
std::map < automaton::State, automaton::State > supplyFunction { { automaton::State ( 0 ), automaton::State ( -1 ) } };
 
oracle.setInputAlphabet ( text.getAlphabet ( ) );
oracle.setInputAlphabet ( pattern.getAlphabet ( ) );
 
for ( const alphabet::Symbol & symbol : text.getContent ( ) )
for ( const alphabet::Symbol & symbol : pattern.getContent ( ) )
oracleAddLetter ( oracle, symbol, supplyFunction );
 
return oracle;
......
/*
* Author: Radovan Cerveny
*/
#include "SuffixAutomaton.hpp"
#include <exception/AlibException.h>
#include <string/Epsilon.h>
#include <label/LabelSetLabel.h>
#include "../../automaton/determinize/Determinize.h"
#include "../../automaton/simplify/Minimize.h"
#include "../../automaton/simplify/EpsilonRemoverIncoming.h"
namespace stringology {
namespace exact {
automaton::Automaton SuffixAutomaton::construct ( const string::String & pattern ) {
return getInstance ( ).dispatch ( pattern.getData ( ) );
}
automaton::DFA SuffixAutomaton::construct ( const string::LinearString & pattern ) {
automaton::EpsilonNFA nfaSuffixAutomaton ( automaton::State ( 0 ) );
nfaSuffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) );
int i = 0;
for ( const alphabet::Symbol & symbol : pattern.getContent ( ) ) {
i++;
nfaSuffixAutomaton.addState ( automaton::State ( i ) );
nfaSuffixAutomaton.addTransition ( automaton::State ( i - 1 ), symbol, automaton::State ( i ) );
nfaSuffixAutomaton.addTransition ( automaton::State ( 0 ), automaton::State ( i ) );
}
nfaSuffixAutomaton.addFinalState ( automaton::State ( i ) );
automaton::DFA minimalSuffixAutomaton = automaton::simplify::Minimize::minimize ( automaton::determinize::Determinize::determinize ( automaton::simplify::EpsilonRemoverIncoming::remove ( nfaSuffixAutomaton ) ) );
automaton::State failState = automaton::State ( label::Label ( label::LabelSetLabel ( { } ) ) );
auto transitionsToFailState = minimalSuffixAutomaton.getTransitionsToState ( failState );
for ( const auto & transition : transitionsToFailState )
minimalSuffixAutomaton.removeTransition ( transition.first.first, transition.first.second, transition.second );
minimalSuffixAutomaton.removeState ( failState );
return minimalSuffixAutomaton;
}
auto SuffixAutomatonLinearString = SuffixAutomaton::RegistratorWrapper < automaton::DFA, string::LinearString > ( SuffixAutomaton::getInstance ( ), SuffixAutomaton::construct );
} /* namespace exact */
} /* namespace stringology */
/*
* Author: Radovan Cerveny
*/
#ifndef SUFFIX_AUTOMATON_HPP_
#define SUFFIX_AUTOMATON_HPP_
#include <automaton/Automaton.h>
#include <automaton/FSM/DFA.h>
#include <automaton/FSM/EpsilonNFA.h>
#include <string/LinearString.h>
#include <string/String.h>
#include <core/multipleDispatch.hpp>
namespace stringology {
namespace exact {
class SuffixAutomaton : public std::SingleDispatch < automaton::Automaton, string::StringBase > {
public:
/**
* Naive construction of minimal suffix automaton for given pattern - EpsNFA -> NFA -> DFA -> minDFA -> removeErrorState.
* @return minimal suffix automaton for given pattern.
*/
static automaton::Automaton construct ( const string::String & pattern );
static automaton::DFA construct ( const string::LinearString & pattern );
static SuffixAutomaton & getInstance ( ) {
static SuffixAutomaton res;
return res;
}
};
} /* namespace exact */
} /* namespace stringology */
#endif /* SUFFIX_AUTOMATON_HPP_ */
#include "SuffixAutomatonTest.h"
#include "string/LinearString.h"
#include "stringology/exact/SuffixAutomaton.hpp"
#include "string/generate/RandomStringFactory.h"
#include "string/generate/RandomSubstringFactory.h"
#define CPPUNIT_IMPLY( x, y ) CPPUNIT_ASSERT ( !( x ) || ( y ) )
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( SuffixAutomatonTest, "stringology" );
CPPUNIT_TEST_SUITE_REGISTRATION ( SuffixAutomatonTest );
void SuffixAutomatonTest::setUp ( ) {
}
void SuffixAutomatonTest::tearDown ( ) {
}
void SuffixAutomatonTest::testSuffixAutomatonConstruction ( ) {
string::LinearString pattern ( "atatac" );
automaton::DFA suffix = stringology::exact::SuffixAutomaton::construct ( pattern );
// FIXME
}
#ifndef SUFFIX_AUTOMATON_TEST_HPP_
#define SUFFIX_AUTOMATON_TEST_HPP_
#include <cppunit/extensions/HelperMacros.h>
class SuffixAutomatonTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE ( SuffixAutomatonTest );
CPPUNIT_TEST ( testSuffixAutomatonConstruction );
CPPUNIT_TEST_SUITE_END ( );
public:
void setUp ( );
void tearDown ( );
void testSuffixAutomatonConstruction ( );
};
#endif // SUFFIX_AUTOMATON_TEST_HPP_
......@@ -30,6 +30,7 @@
#include <stringology/exact/ExactNondeterministicSubsequenceAutomaton.h>
#include <stringology/exact/ExactMultiNondeterministicSubsequenceAutomaton.h>
#include <stringology/exact/FactorOracleAutomaton.hpp>
#include <stringology/exact/SuffixAutomaton.hpp>
#include <stringology/exact/BorderArray.h>
#include <stringology/indexing/SuffixTrie.h>
 
......@@ -44,6 +45,7 @@ int main ( int argc, char * argv[] ) {
allowed.push_back ( "exactNondeterministicSubsequenceAutomaton" );
allowed.push_back ( "exactMultiNondeterministicSubsequenceAutomaton" );
allowed.push_back ( "factorOracleAutomaton" );
allowed.push_back ( "suffixAutomaton" );
allowed.push_back ( "exactFactorMatch" );
allowed.push_back ( "boyerMooreHorspool" );
allowed.push_back ( "reversedBoyerMooreHorspool" );
......@@ -217,6 +219,18 @@ int main ( int argc, char * argv[] ) {
measurements::end ( );
measurements::start ( "Output write", measurements::Type::AUXILIARY );
 
alib::XmlDataFactory::toStdout ( automaton );
} else if ( algorithm.getValue ( ) == "suffixAutomaton" ) {
string::String pattern = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) );
measurements::end ( );
measurements::start ( "Algorithm", measurements::Type::MAIN );
automaton::Automaton automaton = stringology::exact::SuffixAutomaton::construct ( pattern );
measurements::end ( );
measurements::start ( "Output write", measurements::Type::AUXILIARY );
alib::XmlDataFactory::toStdout ( automaton );
} else if ( algorithm.getValue ( ) == "borderArray" ) {
string::String subject = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment