diff --git a/alib2algo/src/stringology/exact/FactorOracleAutomaton.cpp b/alib2algo/src/stringology/exact/FactorOracleAutomaton.cpp index 1d83402cea1f02155fe46da723c71ec7cdcfb578..bb5e031909d19aa8d0c066c19a20ec147c12d4a6 100644 --- a/alib2algo/src/stringology/exact/FactorOracleAutomaton.cpp +++ b/alib2algo/src/stringology/exact/FactorOracleAutomaton.cpp @@ -5,26 +5,23 @@ #include "FactorOracleAutomaton.hpp" #include <exception/AlibException.h> #include <string/LinearString.h> -#include <string/Epsilon.h> - -#include <deque> namespace stringology { namespace exact { -automaton::Automaton FactorOracleAutomaton::construct ( const string::String & text ) { - return getInstance ( ).dispatch ( text.getData ( ) ); +automaton::Automaton FactorOracleAutomaton::construct ( const string::String & pattern ) { + return getInstance ( ).dispatch ( pattern.getData ( ) ); } -automaton::DFA FactorOracleAutomaton::construct ( const string::LinearString & text ) { +automaton::DFA FactorOracleAutomaton::construct ( const string::LinearString & pattern ) { automaton::DFA oracle ( automaton::State ( 0 ) ); std::map < automaton::State, automaton::State > supplyFunction { { automaton::State ( 0 ), automaton::State ( -1 ) } }; - oracle.setInputAlphabet ( text.getAlphabet ( ) ); + oracle.setInputAlphabet ( pattern.getAlphabet ( ) ); - for ( const alphabet::Symbol & symbol : text.getContent ( ) ) + for ( const alphabet::Symbol & symbol : pattern.getContent ( ) ) oracleAddLetter ( oracle, symbol, supplyFunction ); return oracle; diff --git a/alib2algo/src/stringology/exact/SuffixAutomaton.cpp b/alib2algo/src/stringology/exact/SuffixAutomaton.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1f8de9b4f3f56c565d2db46b985a8306975d8b2c --- /dev/null +++ b/alib2algo/src/stringology/exact/SuffixAutomaton.cpp @@ -0,0 +1,55 @@ +/* + * Author: Radovan Cerveny + */ + +#include "SuffixAutomaton.hpp" +#include <exception/AlibException.h> +#include <string/Epsilon.h> +#include <label/LabelSetLabel.h> +#include "../../automaton/determinize/Determinize.h" +#include "../../automaton/simplify/Minimize.h" +#include "../../automaton/simplify/EpsilonRemoverIncoming.h" + +namespace stringology { + +namespace exact { + +automaton::Automaton SuffixAutomaton::construct ( const string::String & pattern ) { + return getInstance ( ).dispatch ( pattern.getData ( ) ); +} + +automaton::DFA SuffixAutomaton::construct ( const string::LinearString & pattern ) { + automaton::EpsilonNFA nfaSuffixAutomaton ( automaton::State ( 0 ) ); + + nfaSuffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) ); + + int i = 0; + + for ( const alphabet::Symbol & symbol : pattern.getContent ( ) ) { + i++; + nfaSuffixAutomaton.addState ( automaton::State ( i ) ); + nfaSuffixAutomaton.addTransition ( automaton::State ( i - 1 ), symbol, automaton::State ( i ) ); + nfaSuffixAutomaton.addTransition ( automaton::State ( 0 ), automaton::State ( i ) ); + } + + nfaSuffixAutomaton.addFinalState ( automaton::State ( i ) ); + + automaton::DFA minimalSuffixAutomaton = automaton::simplify::Minimize::minimize ( automaton::determinize::Determinize::determinize ( automaton::simplify::EpsilonRemoverIncoming::remove ( nfaSuffixAutomaton ) ) ); + + automaton::State failState = automaton::State ( label::Label ( label::LabelSetLabel ( { } ) ) ); + + auto transitionsToFailState = minimalSuffixAutomaton.getTransitionsToState ( failState ); + + for ( const auto & transition : transitionsToFailState ) + minimalSuffixAutomaton.removeTransition ( transition.first.first, transition.first.second, transition.second ); + + minimalSuffixAutomaton.removeState ( failState ); + + return minimalSuffixAutomaton; +} + +auto SuffixAutomatonLinearString = SuffixAutomaton::RegistratorWrapper < automaton::DFA, string::LinearString > ( SuffixAutomaton::getInstance ( ), SuffixAutomaton::construct ); + +} /* namespace exact */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/SuffixAutomaton.hpp b/alib2algo/src/stringology/exact/SuffixAutomaton.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8b2c4c8367bd1902e344f2ca4a4d286abab4b5cb --- /dev/null +++ b/alib2algo/src/stringology/exact/SuffixAutomaton.hpp @@ -0,0 +1,41 @@ +/* + * Author: Radovan Cerveny + */ + +#ifndef SUFFIX_AUTOMATON_HPP_ +#define SUFFIX_AUTOMATON_HPP_ + +#include <automaton/Automaton.h> +#include <automaton/FSM/DFA.h> +#include <automaton/FSM/EpsilonNFA.h> +#include <string/LinearString.h> +#include <string/String.h> +#include <core/multipleDispatch.hpp> + +namespace stringology { + +namespace exact { + +class SuffixAutomaton : public std::SingleDispatch < automaton::Automaton, string::StringBase > { +public: + /** + * Naive construction of minimal suffix automaton for given pattern - EpsNFA -> NFA -> DFA -> minDFA -> removeErrorState. + * @return minimal suffix automaton for given pattern. + */ + static automaton::Automaton construct ( const string::String & pattern ); + + static automaton::DFA construct ( const string::LinearString & pattern ); + + static SuffixAutomaton & getInstance ( ) { + static SuffixAutomaton res; + + return res; + } + +}; + +} /* namespace exact */ + +} /* namespace stringology */ + +#endif /* SUFFIX_AUTOMATON_HPP_ */ diff --git a/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp b/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70d729ff52d1eb8606cd225b2df4f962addb70ec --- /dev/null +++ b/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.cpp @@ -0,0 +1,27 @@ +#include "SuffixAutomatonTest.h" + +#include "string/LinearString.h" +#include "stringology/exact/SuffixAutomaton.hpp" + +#include "string/generate/RandomStringFactory.h" +#include "string/generate/RandomSubstringFactory.h" + +#define CPPUNIT_IMPLY( x, y ) CPPUNIT_ASSERT ( !( x ) || ( y ) ) + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( SuffixAutomatonTest, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( SuffixAutomatonTest ); + +void SuffixAutomatonTest::setUp ( ) { +} + +void SuffixAutomatonTest::tearDown ( ) { +} + +void SuffixAutomatonTest::testSuffixAutomatonConstruction ( ) { + + string::LinearString pattern ( "atatac" ); + + automaton::DFA suffix = stringology::exact::SuffixAutomaton::construct ( pattern ); + + // FIXME +} diff --git a/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.h b/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.h new file mode 100644 index 0000000000000000000000000000000000000000..edfc02045c8287ae54d9f15c9a23668261670519 --- /dev/null +++ b/alib2algo/test-src/stringology/exact/SuffixAutomatonTest.h @@ -0,0 +1,18 @@ +#ifndef SUFFIX_AUTOMATON_TEST_HPP_ +#define SUFFIX_AUTOMATON_TEST_HPP_ + +#include <cppunit/extensions/HelperMacros.h> + +class SuffixAutomatonTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE ( SuffixAutomatonTest ); + CPPUNIT_TEST ( testSuffixAutomatonConstruction ); + CPPUNIT_TEST_SUITE_END ( ); + +public: + void setUp ( ); + void tearDown ( ); + + void testSuffixAutomatonConstruction ( ); +}; + +#endif // SUFFIX_AUTOMATON_TEST_HPP_ diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index 46c5dcf358fb909576a6859c5f5fbe0f4a7bb0dc..27e78d3e7cf3ad48d1ce58880a04134b6eaf0ee8 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -30,6 +30,7 @@ #include <stringology/exact/ExactNondeterministicSubsequenceAutomaton.h> #include <stringology/exact/ExactMultiNondeterministicSubsequenceAutomaton.h> #include <stringology/exact/FactorOracleAutomaton.hpp> +#include <stringology/exact/SuffixAutomaton.hpp> #include <stringology/exact/BorderArray.h> #include <stringology/indexing/SuffixTrie.h> @@ -44,6 +45,7 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "exactNondeterministicSubsequenceAutomaton" ); allowed.push_back ( "exactMultiNondeterministicSubsequenceAutomaton" ); allowed.push_back ( "factorOracleAutomaton" ); + allowed.push_back ( "suffixAutomaton" ); allowed.push_back ( "exactFactorMatch" ); allowed.push_back ( "boyerMooreHorspool" ); allowed.push_back ( "reversedBoyerMooreHorspool" ); @@ -217,6 +219,18 @@ int main ( int argc, char * argv[] ) { measurements::end ( ); measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( automaton ); + } else if ( algorithm.getValue ( ) == "suffixAutomaton" ) { + string::String pattern = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + automaton::Automaton automaton = stringology::exact::SuffixAutomaton::construct ( pattern ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( automaton ); } else if ( algorithm.getValue ( ) == "borderArray" ) { string::String subject = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );