diff --git a/alib2algo/src/stringology/exact/BackwardNondeterministicDAWGMatching.hpp b/alib2algo/src/stringology/exact/BackwardNondeterministicDAWGMatching.hpp index a6af70d8942b903c6fd27a58b12ca36115ceeeda..6ebe51d72a1fe0a41e90ddb996df5742185fe046 100644 --- a/alib2algo/src/stringology/exact/BackwardNondeterministicDAWGMatching.hpp +++ b/alib2algo/src/stringology/exact/BackwardNondeterministicDAWGMatching.hpp @@ -1,10 +1,9 @@ - /* * Author: Radovan Cerveny */ -#ifndef _STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_H_ -#define _STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_H_ +#ifndef STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_HPP__ +#define STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_HPP__ #include <string/String.h> #include <string/StringFeatures.h> @@ -58,4 +57,4 @@ public: } /* namespace stringology */ -#endif /* _STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_H_ */ +#endif /* STRINGOLOGY_BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_HPP__ */ diff --git a/alib2algo/src/stringology/exact/FactorOracleAutomaton.cpp b/alib2algo/src/stringology/exact/FactorOracleAutomaton.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1d83402cea1f02155fe46da723c71ec7cdcfb578 --- /dev/null +++ b/alib2algo/src/stringology/exact/FactorOracleAutomaton.cpp @@ -0,0 +1,62 @@ +/* + * Author: Radovan Cerveny + */ + +#include "FactorOracleAutomaton.hpp" +#include <exception/AlibException.h> +#include <string/LinearString.h> +#include <string/Epsilon.h> + +#include <deque> + +namespace stringology { + +namespace exact { + +automaton::Automaton FactorOracleAutomaton::construct ( const string::String & text ) { + return getInstance ( ).dispatch ( text.getData ( ) ); +} + +automaton::DFA FactorOracleAutomaton::construct ( const string::LinearString & text ) { + automaton::DFA oracle ( automaton::State ( 0 ) ); + + std::map < automaton::State, automaton::State > supplyFunction { { automaton::State ( 0 ), automaton::State ( -1 ) } }; + + oracle.setInputAlphabet ( text.getAlphabet ( ) ); + + for ( const alphabet::Symbol & symbol : text.getContent ( ) ) + oracleAddLetter ( oracle, symbol, supplyFunction ); + + return oracle; +} + +void FactorOracleAutomaton::oracleAddLetter ( automaton::DFA & oracle, const alphabet::Symbol & symbol, std::map < automaton::State, automaton::State > & supplyFunction ) { + int m = ( int ) oracle.getStates ( ).size ( ) - 1; + + automaton::State lastState ( m ); + automaton::State newState ( m + 1 ); + + oracle.addState ( newState ); + oracle.addFinalState ( newState ); + + oracle.addTransition ( lastState, symbol, newState ); + automaton::State kState = supplyFunction.find( lastState ) -> second; + + while ( kState != automaton::State ( -1 ) && oracle.getTransitions ( ).find ( { kState, symbol } ) == oracle.getTransitions ( ).end ( ) ) { + oracle.addTransition ( kState, symbol, newState ); + kState = supplyFunction.find( kState ) -> second; + } + + automaton::State supplyState = automaton::State ( 0 ); + + if ( kState != automaton::State ( -1 ) ) + supplyState = oracle.getTransitions ( ).find( { kState, symbol } ) -> second; + + supplyFunction.insert( { newState, supplyState } ); +} + +auto FactorOracleAutomatonLinearString = FactorOracleAutomaton::RegistratorWrapper < automaton::DFA, string::LinearString > ( FactorOracleAutomaton::getInstance ( ), FactorOracleAutomaton::construct ); + +} /* namespace exact */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/exact/FactorOracleAutomaton.hpp b/alib2algo/src/stringology/exact/FactorOracleAutomaton.hpp new file mode 100644 index 0000000000000000000000000000000000000000..043eab4a78797ea9d763104dfee0888701cbbe94 --- /dev/null +++ b/alib2algo/src/stringology/exact/FactorOracleAutomaton.hpp @@ -0,0 +1,43 @@ +/* + * Author: Radovan Cerveny + */ + +#ifndef FACTOR_ORACLE_AUTOMATON_HPP__ +#define FACTOR_ORACLE_AUTOMATON_HPP__ + +#include <automaton/Automaton.h> +#include <automaton/FSM/DFA.h> +#include <string/LinearString.h> +#include <string/String.h> +#include <core/multipleDispatch.hpp> + +namespace stringology { + +namespace exact { + +class FactorOracleAutomaton : public std::SingleDispatch < automaton::Automaton, string::StringBase > { +private: + static void oracleAddLetter ( automaton::DFA & oracle, const alphabet::Symbol & symbol, std::map < automaton::State, automaton::State > & supplyFunction ); + +public: + /** + * Constructs factor oracle automaton for given pattern. + * @return factor oracle automaton for given pattern + */ + static automaton::Automaton construct ( const string::String & pattern ); + + static automaton::DFA construct ( const string::LinearString & pattern ); + + static FactorOracleAutomaton & getInstance ( ) { + static FactorOracleAutomaton res; + + return res; + } + +}; + +} /* namespace exact */ + +} /* namespace stringology */ + +#endif /* FACTOR_ORACLE_AUTOMATON_HPP__ */ diff --git a/alib2algo/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.h b/alib2algo/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.h index 6d1116d69181ee13b219ddd44d4dc48527957644..016be2e42a20aca90297cf6438516c885f9bd402 100644 --- a/alib2algo/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.h +++ b/alib2algo/test-src/stringology/exact/BackwardNondeterministicDAWGMatchingTest.h @@ -1,5 +1,5 @@ -#ifndef BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST -#define BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST +#ifndef BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_ +#define BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_ #include <cppunit/extensions/HelperMacros.h> @@ -15,4 +15,4 @@ public: void testBNDM ( ); }; -#endif // BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST +#endif // BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_ diff --git a/alib2algo/test-src/stringology/exact/FactorOracleAutomatonTest.cpp b/alib2algo/test-src/stringology/exact/FactorOracleAutomatonTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e62d4d083d7f6463fb70ee346ee9ad36adaf0e6e --- /dev/null +++ b/alib2algo/test-src/stringology/exact/FactorOracleAutomatonTest.cpp @@ -0,0 +1,47 @@ +#include "FactorOracleAutomatonTest.h" + +#include "string/LinearString.h" +#include "stringology/exact/FactorOracleAutomaton.hpp" + +#include "string/generate/RandomStringFactory.h" +#include "string/generate/RandomSubstringFactory.h" + +#define CPPUNIT_IMPLY( x, y ) CPPUNIT_ASSERT ( !( x ) || ( y ) ) + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( FactorOracleAutomatonTest, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( FactorOracleAutomatonTest ); + +void FactorOracleAutomatonTest::setUp ( ) { +} + +void FactorOracleAutomatonTest::tearDown ( ) { +} + +void FactorOracleAutomatonTest::testFactorOracleConstruction ( ) { + + string::LinearString pattern ( "atatac" ); + + automaton::DFA oracle = stringology::exact::FactorOracleAutomaton::construct ( pattern ); + + automaton::DFA refOracle ( automaton::State ( 0 ) ); + + refOracle.setInputAlphabet ( pattern.getAlphabet ( ) ); + + for ( int i = 1; i <= 6; ++i ) { + refOracle.addState ( automaton::State ( i ) ); + refOracle.addFinalState ( automaton::State ( i ) ); + } + + refOracle.addTransition ( automaton::State ( 0 ), alphabet::symbolFrom ( 'a' ), automaton::State ( 1 ) ); + refOracle.addTransition ( automaton::State ( 0 ), alphabet::symbolFrom ( 't' ), automaton::State ( 2 ) ); + refOracle.addTransition ( automaton::State ( 0 ), alphabet::symbolFrom ( 'c' ), automaton::State ( 6 ) ); + refOracle.addTransition ( automaton::State ( 1 ), alphabet::symbolFrom ( 't' ), automaton::State ( 2 ) ); + refOracle.addTransition ( automaton::State ( 1 ), alphabet::symbolFrom ( 'c' ), automaton::State ( 6 ) ); + refOracle.addTransition ( automaton::State ( 2 ), alphabet::symbolFrom ( 'a' ), automaton::State ( 3 ) ); + refOracle.addTransition ( automaton::State ( 3 ), alphabet::symbolFrom ( 't' ), automaton::State ( 4 ) ); + refOracle.addTransition ( automaton::State ( 3 ), alphabet::symbolFrom ( 'c' ), automaton::State ( 6 ) ); + refOracle.addTransition ( automaton::State ( 4 ), alphabet::symbolFrom ( 'a' ), automaton::State ( 5 ) ); + refOracle.addTransition ( automaton::State ( 5 ), alphabet::symbolFrom ( 'c' ), automaton::State ( 6 ) ); + + CPPUNIT_ASSERT ( oracle == refOracle ); +} diff --git a/alib2algo/test-src/stringology/exact/FactorOracleAutomatonTest.h b/alib2algo/test-src/stringology/exact/FactorOracleAutomatonTest.h new file mode 100644 index 0000000000000000000000000000000000000000..c301ea8e7a8dbfd29f51dc85906420420795b3bc --- /dev/null +++ b/alib2algo/test-src/stringology/exact/FactorOracleAutomatonTest.h @@ -0,0 +1,18 @@ +#ifndef FACTOR_ORACLE_AUTOMATON_TEST_HPP_ +#define FACTOR_ORACLE_AUTOMATON_TEST_HPP_ + +#include <cppunit/extensions/HelperMacros.h> + +class FactorOracleAutomatonTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE ( FactorOracleAutomatonTest ); + CPPUNIT_TEST ( testFactorOracleConstruction ); + CPPUNIT_TEST_SUITE_END ( ); + +public: + void setUp ( ); + void tearDown ( ); + + void testFactorOracleConstruction ( ); +}; + +#endif // FACTOR_ORACLE_AUTOMATON_TEST_HPP_ diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index 52352d0cca36595813e55ccda1dcd1c2f1c1fcde..46c5dcf358fb909576a6859c5f5fbe0f4a7bb0dc 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -29,6 +29,7 @@ #include <stringology/exact/ExactSubsequenceAutomaton.h> #include <stringology/exact/ExactNondeterministicSubsequenceAutomaton.h> #include <stringology/exact/ExactMultiNondeterministicSubsequenceAutomaton.h> +#include <stringology/exact/FactorOracleAutomaton.hpp> #include <stringology/exact/BorderArray.h> #include <stringology/indexing/SuffixTrie.h> @@ -42,6 +43,7 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "exactSubsequenceAutomaton" ); allowed.push_back ( "exactNondeterministicSubsequenceAutomaton" ); allowed.push_back ( "exactMultiNondeterministicSubsequenceAutomaton" ); + allowed.push_back ( "factorOracleAutomaton" ); allowed.push_back ( "exactFactorMatch" ); allowed.push_back ( "boyerMooreHorspool" ); allowed.push_back ( "reversedBoyerMooreHorspool" ); @@ -203,6 +205,18 @@ int main ( int argc, char * argv[] ) { measurements::end ( ); measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( automaton ); + } else if ( algorithm.getValue ( ) == "factorOracleAutomaton" ) { + string::String pattern = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + automaton::Automaton automaton = stringology::exact::FactorOracleAutomaton::construct ( pattern ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( automaton ); } else if ( algorithm.getValue ( ) == "borderArray" ) { string::String subject = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );