diff --git a/alib2algo/src/alphabet/generate/AsStringAlphabet.cpp b/alib2algo/src/alphabet/generate/AsStringAlphabet.cpp new file mode 100644 index 0000000000000000000000000000000000000000..62aefb00b613c517f651ea889abd33fffd4a294c --- /dev/null +++ b/alib2algo/src/alphabet/generate/AsStringAlphabet.cpp @@ -0,0 +1,19 @@ +#include "AsStringAlphabet.h" + +#include <registration/AlgoRegistration.hpp> + +namespace { + +auto AsStringAlphabetChar = registration::AbstractRegister < alphabet::generate::AsStringAlphabet, ext::set < std::string >, const ext::set < char > & > ( alphabet::generate::AsStringAlphabet::asStringAlphabet < char >, "alphabet" ).setDocumentation ( +"Converts a character alphabet to alphabet of string type.\n\ +\n\ +@param alphabet the converted alphabet\n\ +@return string typed alphabet" ); + +auto AsStringAlphabetInt = registration::AbstractRegister < alphabet::generate::AsStringAlphabet, ext::set < std::string >, const ext::set < int > & > ( alphabet::generate::AsStringAlphabet::asStringAlphabet < int >, "alphabet" ).setDocumentation ( +"Converts an integer alphabet to alphabet of string type.\n\ +\n\ +@param alphabet the converted alphabet\n\ +@return string typed alphabet" ); + +} /* namespace */ diff --git a/alib2algo/src/alphabet/generate/AsStringAlphabet.h b/alib2algo/src/alphabet/generate/AsStringAlphabet.h new file mode 100644 index 0000000000000000000000000000000000000000..f8d5c3e80df34cf4e5ea5aa091061fde8ae880b2 --- /dev/null +++ b/alib2algo/src/alphabet/generate/AsStringAlphabet.h @@ -0,0 +1,23 @@ +#pragma once + +#include <alib/set> +#include <ext/string> + +namespace alphabet::generate { + +class AsStringAlphabet { +public: +template < class T > +static ext::set < std::string > asStringAlphabet ( const ext::set < T > & alphabet ) { + ext::set < std::string > res; + + for ( const T & symbol : alphabet ) { + res.insert ( ext::to_string ( symbol ) ); + } + + return res; +} + +}; + +} /* namespace alphabet::generate */ diff --git a/alib2algo/src/alphabet/generate/GenerateAlphabet.cpp b/alib2algo/src/alphabet/generate/GenerateAlphabet.cpp new file mode 100644 index 0000000000000000000000000000000000000000..72d83fb3f50bda63964dff60fe2b10b33f5e4159 --- /dev/null +++ b/alib2algo/src/alphabet/generate/GenerateAlphabet.cpp @@ -0,0 +1,21 @@ +#include "GenerateAlphabet.h" + +#include <registration/AlgoRegistration.hpp> + +namespace { + +auto GenerateCharacterAlphabet = registration::AbstractRegister < alphabet::generate::GenerateAlphabet, ext::set < char >, size_t, bool, bool > ( alphabet::generate::GenerateAlphabet::generateCharacterAlphabet, "alphabetSize", "randomizeAlphabet", "lowerCase" ).setDocumentation ( +"Generates a random alphabet.\n\ +\n\ +@param alphabetSize size of the alphabet (the number of symbols selected from a-z or A-Z)\n\ +@param randomizeAlphabet selects random symbols from a-z range if true\n\ +@param lowerCase if true the generator produces lower case character, upper case is used if false\n\ +@return random alphabet" ); + +auto GenerateIntegerAlphabet = registration::AbstractRegister < alphabet::generate::GenerateAlphabet, ext::set < int >, size_t > ( alphabet::generate::GenerateAlphabet::generateIntegerAlphabet, "alphabetSize" ).setDocumentation ( +"Generates a random alphabet.\n\ +\n\ +@param alphabetSize size of the alphabet (non-negative integer)\n\ +@return random alphabet" ); + +} /* namespace */ diff --git a/alib2algo/src/alphabet/generate/GenerateAlphabet.h b/alib2algo/src/alphabet/generate/GenerateAlphabet.h new file mode 100644 index 0000000000000000000000000000000000000000..0a70eac2becd713c6b918ecfd887cf3c0dfc2aac --- /dev/null +++ b/alib2algo/src/alphabet/generate/GenerateAlphabet.h @@ -0,0 +1,48 @@ +#pragma once + +#include <string> +#include <algorithm> + +#include <ext/deque> +#include <ext/random> + +#include <alib/set> + +#include <exception/CommonException.h> + +namespace alphabet::generate { + +class GenerateAlphabet { +public: +static ext::set < char > generateCharacterAlphabet ( size_t alphabetSize, bool randomizeAlphabet, bool lowerCase ) { + constexpr size_t ENGLISH_ALPHABET_SIZE = 26; + if ( alphabetSize > ENGLISH_ALPHABET_SIZE ) + throw exception::CommonException ( "Alphabet too big." ); + + ext::deque < char > alphabet; + for ( char i = 'a'; i <= 'z'; i++ ) + alphabet.push_back ( lowerCase ? i : i + 'A' - 'a'); + + if ( randomizeAlphabet ) + shuffle ( alphabet.begin ( ), alphabet.end ( ), ext::random_devices::semirandom ); + + alphabet.resize ( alphabetSize ); + + return ext::set < char > ( alphabet.begin ( ), alphabet.end ( ) ); +} + +static ext::set < int > generateIntegerAlphabet ( size_t alphabetSize ) { + if ( alphabetSize <= 0 ) + throw exception::CommonException ( "Alphabet size must be greater than 0." ); + + ext::set < int > alphabet; + + for ( size_t i = 0; i < alphabetSize; ++ i ) + alphabet.insert ( static_cast < int > ( ext::random_devices::semirandom ( ) % alphabetSize ) ); + + return alphabet; +} + +}; + +} /* namespace alphabet::generate */ diff --git a/alib2algo/src/string/generate/RandomStringFactory.cpp b/alib2algo/src/string/generate/RandomStringFactory.cpp index 958ddd69cd8235b95cdf8dbeb18accc4f243e113..cef2d55b02b92c107ccca7b22031e5ac5a497c8f 100644 --- a/alib2algo/src/string/generate/RandomStringFactory.cpp +++ b/alib2algo/src/string/generate/RandomStringFactory.cpp @@ -1,69 +1,8 @@ #include "RandomStringFactory.h" -#include <ext/algorithm> - #include <registration/AlgoRegistration.hpp> -namespace string::generate { - -string::LinearString < std::string > string::generate::RandomStringFactory::generateLinearString ( size_t size, size_t alphabetSize, bool randomizedAlphabet, bool integerSymbols ) { - if ( !integerSymbols ) return generateLinearString ( size, alphabetSize, randomizedAlphabet ); - - if ( alphabetSize <= 0 ) - throw exception::CommonException ( "Alphabet size must be greater than 0." ); - - ext::vector < std::string > elems; - - for ( size_t i = 0; i < size; i++ ) - elems.push_back ( ext::to_string ( static_cast < int > ( ext::random_devices::semirandom ( ) % alphabetSize ) ) ); - - return string::LinearString < std::string > ( elems ); -} - namespace { - constexpr unsigned ENGLISH_ALPHABET_SIZE = 26; -} - -string::LinearString < std::string > string::generate::RandomStringFactory::generateLinearString ( size_t size, size_t alphabetSize, bool randomizedAlphabet ) { - if ( alphabetSize > ENGLISH_ALPHABET_SIZE ) - throw exception::CommonException ( "Too big alphabet." ); - - if ( alphabetSize <= 0 ) - throw exception::CommonException ( "Alphabet size must be greater than 0." ); - - ext::vector < std::string > alphabet; - alphabet.reserve ( 'z' - 'a' + 1 ); - - for(char i = 'a'; i <= 'z'; i++) - alphabet.emplace_back ( 1, i ); - - if ( randomizedAlphabet ) shuffle ( alphabet.begin ( ), alphabet.end ( ), ext::random_devices::semirandom ); - - alphabet.resize ( alphabetSize ); - - return string::generate::RandomStringFactory::generateLinearString ( size, std::move ( alphabet ) ); -} - -} /* namespace string::generate */ - -namespace { - -auto GenerateLinearString1 = registration::AbstractRegister < string::generate::RandomStringFactory, string::LinearString < std::string >, size_t, size_t, bool, bool > ( string::generate::RandomStringFactory::generateLinearString, abstraction::AlgorithmCategories::AlgorithmCategory::DEFAULT, "size", "alphabetSize", "randomizedAlphabet", "integerSymbols" ).setDocumentation ( -"Generates a random string of given size.\n\ -\n\ -@param size the length of the generated string\n\ -@param alphabetSize size of the alphabet (1-26 for characters and 0-INT_MAX for integers)\n\ -@param randomizedAlphabet selects random symbols from a-z range if true\n\ -@param integerSymbols use integers as symbols in the generated string is true, randomize alphabet is not used if integer alphabet is requested\n\ -@return random string" ); - -auto GenerateLinearString2 = registration::AbstractRegister < string::generate::RandomStringFactory, string::LinearString < std::string >, size_t, size_t, bool > ( string::generate::RandomStringFactory::generateLinearString, abstraction::AlgorithmCategories::AlgorithmCategory::DEFAULT, "size", "alphabetSize", "randomizedAlphabet" ).setDocumentation ( -"Generates a random string of given size.\n\ -\n\ -@param size the length of the generated string\n\ -@param alphabetSize size of the alphabet (1-26 for characters)\n\ -@param randomizedAlphabet selects random symbols from a-z range if true\n\ -@return random string" ); auto GenerateLinearString3 = registration::AbstractRegister < string::generate::RandomStringFactory, string::LinearString < >, size_t, ext::set < DefaultSymbolType > > ( string::generate::RandomStringFactory::generateLinearString, abstraction::AlgorithmCategories::AlgorithmCategory::DEFAULT, "size", "alphabet" ).setDocumentation ( "Generates a random string of given size\n\ @@ -72,11 +11,4 @@ auto GenerateLinearString3 = registration::AbstractRegister < string::generate:: @param alphabet alphabet of the generated string\n\ @return random string" ); -auto GenerateLinearString4 = registration::AbstractRegister < string::generate::RandomStringFactory, string::LinearString < >, size_t, ext::vector < DefaultSymbolType > > ( string::generate::RandomStringFactory::generateLinearString, abstraction::AlgorithmCategories::AlgorithmCategory::DEFAULT, "size", "alphabet" ).setDocumentation ( -"Generates a random string of given size\n\ -\n\ -@param size the length of the generated string\n\ -@param alphabet alphabet of the generated string\n\ -@return random string" ); - } /* namespace */ diff --git a/alib2algo/src/string/generate/RandomStringFactory.h b/alib2algo/src/string/generate/RandomStringFactory.h index 7da732d305595bd5a8c9f36e818b85cd13f82af2..1a82f916815381472ca920a77d2c7d5853cb388c 100644 --- a/alib2algo/src/string/generate/RandomStringFactory.h +++ b/alib2algo/src/string/generate/RandomStringFactory.h @@ -18,30 +18,6 @@ namespace generate { * */ class RandomStringFactory { -public: - /** - * Generates a random string of given size. - * - * \param size the length of the generated string - * \param alphabetSize size of the alphabet (1-26 for characters and 0-INT_MAX for integers) - * \param randomizedAlphabet selects random symbols from a-z range if true - * \param integerSymbols use integers as symbols in the generated string is true, randomize alphabet is not used if integer alphabet is requested - * - * \return random string - */ - static string::LinearString < std::string > generateLinearString ( size_t size, size_t alphabetSize, bool randomizedAlphabet, bool integerSymbols ); - - /** - * Generates a random string of given size. - * - * \param size the length of the generated string - * \param alphabetSize size of the alphabet (1-26 for characters) - * \param randomizedAlphabet selects random symbols from a-z range if true - * - * \return random string - */ - static string::LinearString < std::string > generateLinearString ( size_t size, size_t alphabetSize, bool randomizedAlphabet ); - /** * Generates a random string of given size * \tparam SymbolType the type of symbols of the random string @@ -52,8 +28,9 @@ public: * \return random string */ template < class SymbolType > - static string::LinearString < SymbolType > generateLinearString ( size_t size, ext::set < SymbolType > alphabet ); + static string::LinearString < SymbolType > generateLinearString ( size_t size, ext::vector < SymbolType > alphabet ); +public: /** * Generates a random string of given size * \tparam SymbolType the type of symbols of the random string @@ -64,7 +41,7 @@ public: * \return random string */ template < class SymbolType > - static string::LinearString < SymbolType > generateLinearString ( size_t size, ext::vector < SymbolType > alphabet ); + static string::LinearString < SymbolType > generateLinearString ( size_t size, ext::set < SymbolType > alphabet ); }; template < class SymbolType > diff --git a/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.cpp b/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.cpp index b585ade1a9f6d537b93b78cdb3cf321621de2535..60acc3b80760f72ed96416ea40dab0caf7ff7742 100644 --- a/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.cpp +++ b/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.cpp @@ -10,6 +10,8 @@ #include <string/generate/RandomStringFactory.h> #include <string/generate/RandomSubstringFactory.h> +#include <alphabet/generate/GenerateAlphabet.h> + TEST_CASE ( "Backward Nondeterministic DAWG", "[unit][algo][stringology][matching]" ) { SECTION ( "Simple" ) { ext::vector<std::string> subjects; @@ -37,9 +39,9 @@ TEST_CASE ( "Backward Nondeterministic DAWG", "[unit][algo][stringology][matchin INFO ( subjects[i] << ' ' << patterns[i] << ' ' << ext::to_string ( res1 ) ); } - auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64 * 64, 512, false, true); + auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64 * 64, alphabet::generate::GenerateAlphabet::generateIntegerAlphabet ( 512 ) ); auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 32 * 32, longSubject ); - indexes::stringology::BitSetIndex < std::string > pattern = stringology::matching::BNDMMatcherConstruction::construct ( longPattern ); + indexes::stringology::BitSetIndex < int > pattern = stringology::matching::BNDMMatcherConstruction::construct ( longPattern ); ext::set < unsigned > res = stringology::query::BNDMOccurrences::query ( pattern, longSubject ); ext::set < unsigned > ref = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); INFO ( "long: " << ext::to_string ( res ) ); diff --git a/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp b/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp index b13a7c8a1908fd50d1c59edb3faaec66a7d14fde..c970e1c725a3addf548791600aa5224b156d1465 100644 --- a/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp +++ b/alib2algo/test-src/stringology/query/DAWGMatcherTest.cpp @@ -8,6 +8,8 @@ #include "string/generate/RandomStringFactory.h" #include "string/generate/RandomSubstringFactory.h" +#include <alphabet/generate/GenerateAlphabet.h> + TEST_CASE ( "DAWG Matcher", "[unit][algo][stringology][query]" ) { SECTION ( "Test Backward DAWG Matching" ) { ext::vector<std::string> subjects; @@ -36,9 +38,9 @@ TEST_CASE ( "DAWG Matcher", "[unit][algo][stringology][query]" ) { CHECK ( res == expectedOccs[i] ); } - auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true); + auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, alphabet::generate::GenerateAlphabet::generateIntegerAlphabet ( 512 ) ); auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 5, longSubject ); - indexes::stringology::SuffixAutomaton < std::string > suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( longPattern ); + indexes::stringology::SuffixAutomaton < int > suffixAutomaton = stringology::matching::DAWGMatcherConstruction::construct ( longPattern ); ext::set < unsigned > res = stringology::query::BackwardDAWGMatching::match ( longSubject, suffixAutomaton ); ext::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); INFO ( "long: " << ext::to_string ( res ) ); diff --git a/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp b/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp index 75122cec6042baca259ec525cd024abf758ca7a1..cbdf1c57d71ec9af83c070d53eaadbdd0088e7f7 100644 --- a/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp +++ b/alib2algo/test-src/stringology/query/OracleMatcherTest.cpp @@ -8,6 +8,8 @@ #include "string/generate/RandomStringFactory.h" #include "string/generate/RandomSubstringFactory.h" +#include <alphabet/generate/GenerateAlphabet.h> + TEST_CASE ( "Oracle Matcher", "[unit][algo][stringology][query]" ) { SECTION ( "Test Backward Oracle Matching" ) { ext::vector<std::string> subjects; @@ -35,9 +37,9 @@ TEST_CASE ( "Oracle Matcher", "[unit][algo][stringology][query]" ) { CHECK ( res == expectedOccs[i] ); } - auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, 512, false, true); + auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64, alphabet::generate::GenerateAlphabet::generateIntegerAlphabet ( 512 ) ); auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 5, longSubject ); - automaton::DFA < std::string, unsigned > oracleAutomaton = stringology::matching::OracleMatcherConstruction::construct ( longPattern ); + automaton::DFA < int, unsigned > oracleAutomaton = stringology::matching::OracleMatcherConstruction::construct ( longPattern ); ext::set < unsigned > res = stringology::query::BackwardOracleMatching::match ( longSubject, oracleAutomaton ); ext::set < unsigned > resRef = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); INFO ( "long: " << ext::to_string ( res ) ); diff --git a/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.cpp b/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.cpp index 9c44630d4a59805f53e7374011acb59616789317..e2fb0f8de8f3226288370b1845cc6f01495bc260 100644 --- a/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.cpp +++ b/alib2algo_experimental/test-src/stringology/query/CompactSuffixAutomatonQueryTest.cpp @@ -8,6 +8,8 @@ #include <string/generate/RandomStringFactory.h> #include <string/generate/RandomSubstringFactory.h> +#include <alphabet/generate/GenerateAlphabet.h> + TEST_CASE ( "CDAWG", "[unit][stringology]" ) { ext::vector<std::string> subjects; ext::vector<std::string> patterns; @@ -36,10 +38,10 @@ TEST_CASE ( "CDAWG", "[unit][stringology]" ) { CHECK ( res == ref ); } - auto longSubject = string::generate::RandomStringFactory::generateLinearString ( 4000, 26, false, true ); + auto longSubject = string::generate::RandomStringFactory::generateLinearString ( 4000, alphabet::generate::GenerateAlphabet::generateIntegerAlphabet ( 26 ) ); auto longPattern = string::generate::RandomSubstringFactory::generateSubstring ( 2, longSubject ); - indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < std::string > index = stringology::indexing::ExperimentalCompactSuffixAutomatonConstruct::construct ( longSubject ); + indexes::stringology::CompactSuffixAutomatonTerminatingSymbol < int > index = stringology::indexing::ExperimentalCompactSuffixAutomatonConstruct::construct ( longSubject ); ext::set < unsigned > res = stringology::query::CompactSuffixAutomatonFactors::query ( index, longPattern ); ext::set < unsigned > ref = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern ); diff --git a/tests/cppaql/approximateMatching.cpp b/tests/cppaql/approximateMatching.cpp index cd2f6c2f30fd425376347dff621ae8a6973fd64e..c88fb74c7c4bb7eed1a011cf2d7e936011789504 100644 --- a/tests/cppaql/approximateMatching.cpp +++ b/tests/cppaql/approximateMatching.cpp @@ -23,7 +23,7 @@ static std::string qExtendAlphabet ( const std::string & s1, const std::string & } static std::string qGenString ( const size_t & len, const size_t &alph_len, const std::string & var ) { - return ext::concat ( "execute string::generate::RandomStringFactory ( size_t )", rand ( ) % len + 1, "( size_t )", rand ( ) % alph_len + 1, "true | string::simplify::NormalizeAlphabet - > $", var ); + return ext::concat ( "execute string::generate::RandomStringFactory ( size_t )", rand ( ) % len + 1, "<(alphabet::generate::GenerateAlphabet ( size_t )", rand ( ) % alph_len + 1, "true true) | string::simplify::NormalizeAlphabet - > $", var ); } static std::string qCreateMatchingAutomaton ( const std::string & algo, const std::string & pattern, const std::string & error, const std::string & var ) { diff --git a/tests/cppaql/exactMatching.cpp b/tests/cppaql/exactMatching.cpp index a7490977e7ccb4a6eb8f9d8785bf40afe776bcf7..ecd81609dc098dec3a37b80ec30485e7d0a1fb6a 100644 --- a/tests/cppaql/exactMatching.cpp +++ b/tests/cppaql/exactMatching.cpp @@ -15,7 +15,7 @@ static std::string qExtendAlphabet ( const std::string & s1, const std::string & } static std::string qGenString ( size_t min_len, size_t max_len, size_t alph_len, const std::string & var ) { - return ext::concat ( "execute string::generate::RandomStringFactory ( size_t )", rand ( ) % ( max_len - min_len + 1 ) + min_len + 1, "( size_t )", rand ( ) % alph_len + 1, "true | string::simplify::NormalizeAlphabet - > $", var ); + return ext::concat ( "execute string::generate::RandomStringFactory ( size_t )", rand ( ) % ( max_len - min_len + 1 ) + min_len + 1, " <(alphabet::generate::GenerateAlphabet (size_t)", rand ( ) % alph_len + 1, "true true) | string::simplify::NormalizeAlphabet - > $", var ); } TEST_CASE ( "ExactMatching", "[integration]" ) {