From d0c88a2b2191248c593c04295186dbb3d1c0cca1 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Mon, 29 Sep 2014 21:24:15 +0200 Subject: [PATCH] random regexp generator -- not so good... yet... --- .../src/generator/RandomRegExpFactory.cpp | 145 ++++++++++++++++++ alib2algo/src/generator/RandomRegExpFactory.h | 32 ++++ arand2/src/arand.cpp | 98 ++++++++---- tests.aconversion.sh | 2 +- 4 files changed, 249 insertions(+), 28 deletions(-) create mode 100644 alib2algo/src/generator/RandomRegExpFactory.cpp create mode 100644 alib2algo/src/generator/RandomRegExpFactory.h diff --git a/alib2algo/src/generator/RandomRegExpFactory.cpp b/alib2algo/src/generator/RandomRegExpFactory.cpp new file mode 100644 index 0000000000..7dfd8eca9e --- /dev/null +++ b/alib2algo/src/generator/RandomRegExpFactory.cpp @@ -0,0 +1,145 @@ +/* + * RandomRegExpFactory.cpp + * + * Created on: 27. 3. 2014 + * Author: Jan Travnicek + */ + +#include "RandomRegExpFactory.h" + +#include <algorithm> + +namespace generator { + +regexp::UnboundedRegExp RandomRegExpFactory::generateUnboundedRegExp( size_t leafNodes, size_t height, size_t alphabetSize ) { + srand( time( NULL ) ); + + if(alphabetSize > 26) + throw exception::AlibException("Too big alphabet."); + + if( alphabetSize <= 0 ) + throw exception::AlibException( "Alphabet size must be greater than 0." ); + + std::set<alphabet::Symbol> alphabet; + while( alphabet.size( ) < alphabetSize ) { + std::string s( 1, rand() % 26 + 'a' ); + alphabet::Symbol symbol = alphabet::symbolFrom (s); + alphabet.insert( symbol ); + } + + return RandomRegExpFactory::generateUnboundedRegExp( leafNodes, height, alphabet ); +} + +regexp::UnboundedRegExp RandomRegExpFactory::generateUnboundedRegExp( size_t leafNodes, size_t height, std::set<alphabet::Symbol> alphabet) { + + if( alphabet.size() > 26) + throw exception::AlibException("Too big alphabet."); + + if( alphabet.size() <= 0 ) + throw exception::AlibException( "Alphabet size must be greater than 0." ); + + std::vector<const regexp::UnboundedRegExpElement*> elems; + + { + elems.push_back(new regexp::UnboundedRegExpEmpty()); + elems.push_back(new regexp::UnboundedRegExpEpsilon()); + } + if(alphabet.size() > 6) { + elems.push_back(new regexp::UnboundedRegExpEmpty()); + elems.push_back(new regexp::UnboundedRegExpEpsilon()); + } + if(alphabet.size() > 16) { + elems.push_back(new regexp::UnboundedRegExpEmpty()); + elems.push_back(new regexp::UnboundedRegExpEpsilon()); + } + + for(const alphabet::Symbol& symbol : alphabet) { + elems.push_back(new regexp::UnboundedRegExpSymbol(symbol)); + } + + regexp::UnboundedRegExp res = RandomRegExpFactory::SimpleUnboundedRegExp( leafNodes, height, elems ); + + for(const regexp::UnboundedRegExpElement* elem : elems ) { + delete elem; + } + return res; +} + +regexp::UnboundedRegExp RandomRegExpFactory::SimpleUnboundedRegExp( size_t n, size_t h, const std::vector<const regexp::UnboundedRegExpElement*> & elems) { + + const regexp::UnboundedRegExpElement* elem = SimpleUnboundedRegExpElement(n, h, elems); + regexp::UnboundedRegExp res(std::move(*elem)); + delete elem; + return res; +} + +const regexp::UnboundedRegExpElement* RandomRegExpFactory::SimpleUnboundedRegExpElement(size_t n, size_t h, const std::vector<const regexp::UnboundedRegExpElement*> & elems) { + if(h == 0 || n == 0) { + return elems[ rand( ) % elems.size( ) ]->clone(); + } else { + unsigned childNodes = rand( ) % 10; + if(childNodes < 3) childNodes = 1; + else if(childNodes < 6) childNodes = 2; + else if(childNodes < 8) childNodes = 3; + else childNodes = 4; + + childNodes = childNodes > n ? n : childNodes; + + int subSizes[4]; + if(childNodes == 4) { + subSizes[3] = rand() % ( n - 1 ); + subSizes[2] = rand() % ( n - subSizes[3] - 1 ); + subSizes[1] = rand() % ( n - subSizes[2] - subSizes [3] - 1 ); + + subSizes[3] += 1; + subSizes[2] += 1; + subSizes[1] += 1; + + subSizes[0] = n - subSizes[1] - subSizes[2] - subSizes[3]; + } + if(childNodes == 3) { + subSizes[2] = rand() % ( n - 1); + subSizes[1] = rand() % ( n - subSizes[2] - 1); + + subSizes[2] += 1; + subSizes[1] += 1; + + subSizes[0] = n - subSizes[1] - subSizes[2]; + } + if(childNodes == 2) { + subSizes[1] = rand() % ( n - 1 ); + + subSizes[1] += 1; + + subSizes[0] = n - subSizes[1]; + } + if(childNodes == 1) { + const regexp::UnboundedRegExpElement* subElem = SimpleUnboundedRegExpElement(n, h - 1, elems); + regexp::UnboundedRegExpIteration iter(std::move(*subElem)); + delete subElem; + return new regexp::UnboundedRegExpIteration(iter); + } + + int nodeType = rand( ) % 2; + if(nodeType == 0) { + regexp::UnboundedRegExpConcatenation con; + for(unsigned i = 0; i < childNodes; i++) { + const regexp::UnboundedRegExpElement* subElem = SimpleUnboundedRegExpElement(subSizes[i], h - 1, elems); + con.appendElement(std::move(*subElem)); + delete subElem; + } + return new regexp::UnboundedRegExpConcatenation(con); + } else { + regexp::UnboundedRegExpAlternation con; + for(unsigned i = 0; i < childNodes; i++) { + const regexp::UnboundedRegExpElement* subElem = SimpleUnboundedRegExpElement(subSizes[i], h - 1, elems); + con.appendElement(std::move(*subElem)); + delete subElem; + } + return new regexp::UnboundedRegExpAlternation(con); + } + + } +} + +} diff --git a/alib2algo/src/generator/RandomRegExpFactory.h b/alib2algo/src/generator/RandomRegExpFactory.h new file mode 100644 index 0000000000..0459faf3ac --- /dev/null +++ b/alib2algo/src/generator/RandomRegExpFactory.h @@ -0,0 +1,32 @@ +/* + * RandomAutomatonFactory.h + * + * Created on: 27. 3. 2014 + * Author: Jan Travnicek + */ + +#ifndef RANDOM_REG_EXP_FACTORY_H_ +#define RANDOM_REG_EXP_FACTORY_H_ + +#include <set> +#include <vector> + +#include <exception/AlibException.h> +#include <regexp/unbounded/UnboundedRegExp.h> +#include <regexp/unbounded/UnboundedRegExpElements.h> + +namespace generator { + +class RandomRegExpFactory { +public: + static regexp::UnboundedRegExp generateUnboundedRegExp( size_t leafNodes, size_t height, size_t alphabetSize ); + static regexp::UnboundedRegExp generateUnboundedRegExp( size_t leafNodes, size_t height, std::set<alphabet::Symbol> alphabet); + +private: + static regexp::UnboundedRegExp SimpleUnboundedRegExp( size_t n, size_t h, const std::vector<const regexp::UnboundedRegExpElement*> & elems); + static const regexp::UnboundedRegExpElement* SimpleUnboundedRegExpElement(size_t n, size_t h, const std::vector<const regexp::UnboundedRegExpElement*> & elems); +}; + +} + +#endif /* RANDOM_REG_EXP_FACTORY_H_ */ diff --git a/arand2/src/arand.cpp b/arand2/src/arand.cpp index f7898fbbf2..2f04a34819 100644 --- a/arand2/src/arand.cpp +++ b/arand2/src/arand.cpp @@ -10,18 +10,24 @@ #include <climits> #include <cfloat> #include <getopt.h> +#include <cstring> #include <automaton/FSM/NFA.h> +#include <regexp/unbounded/UnboundedRegExp.h> #include <factory/DataFactory.hpp> #include "generator/RandomAutomatonFactory.h" +#include "generator/RandomRegExpFactory.h" void help( void ) { std::cout << "arand" << std::endl; std::cout << "Generates random automaton." << std::endl; - std::cout << "Usage: arand -n=N -d=D -a=A" << std::endl << std::endl; + std::cout << "Usage: arand -t type -n=N -d=D -a=A" << std::endl << std::endl; + std::cout << " -t, --type FSM/RE. " << std::endl; std::cout << " -n, --nodes=N \t Number of nodes of automata." << std::endl; std::cout << " -d, --density=D \t Percentage of transitions to generate. 1.0 = automata graph is tree, 100.0 = every possible transition." << std::endl; + std::cout << " -l, --leafNodes=N \t Number of leaf nodes of regexp." << std::endl; + std::cout << " -h, --height=N \t Height of regexp." << std::endl; std::cout << " -a, --alphabet=A \t Alphabet size." << std::endl; } @@ -30,31 +36,37 @@ int main(int argc, char* argv[]) { static struct option long_options[] = { // common - {"help", no_argument, NULL, 'h'}, -// {"type", required_argument, NULL, 't'}, + {"help", no_argument, NULL, 'h'}, + {"alphabet", required_argument, NULL, 'a' }, + {"type", required_argument, NULL, 't'}, // automaton options {"density", required_argument, NULL, 'd' }, {"nodes", required_argument, NULL, 'n' }, - {"alphabet", required_argument, NULL, 'a' }, + + // automaton options + {"leafNodes", required_argument, NULL, 'l' }, + {"height", required_argument, NULL, 'h' }, + + // common options {0, 0, 0, '\0'} }; int long_index = 0, opt = 0; // string type; - int statesCount = INT_MAX, alphabetSize = INT_MAX; + int statesCount = INT_MAX, alphabetSize = INT_MAX, leafNodes = INT_MAX, height = INT_MAX; double density = DBL_MAX; + std::string type; - while( ( opt = getopt_long( argc, argv,"ht:d:n:a:", long_options, & long_index ) ) != -1 ) + while( ( opt = getopt_long( argc, argv,"a:t:d:n:l:h:", long_options, & long_index ) ) != -1 ) { switch( opt ) { - /* + case 't': type.assign( optarg, strlen( optarg ) ); break; - */ case 'n': statesCount = std::stoi( optarg, nullptr, 10 ); @@ -64,40 +76,72 @@ int main(int argc, char* argv[]) density = std::stod( optarg, nullptr ); break; + case 'l': + leafNodes = std::stoi( optarg, nullptr, 10 ); + break; + + case 'h': + height = std::stod( optarg, nullptr ); + break; + case 'a': alphabetSize = std::stoi( optarg, nullptr, 10 ); break; case 'v': - case 'h': default: help( ); return 0; } } - if( density < 1 || density > 100 ) - { - //TODO: floating point arithmetic - std::cerr << "You must specify density as a number between 1 and 100." << std::endl; - return 1; + if( type == "FSM" ) { - } + if( density < 1 || density > 100 ) + { + //TODO: floating point arithmetic + std::cerr << "You must specify density as a number between 1 and 100." << std::endl; + return 1; + } - if( statesCount == INT_MAX ) - { - std::cerr << "You must specify number of states." << std::endl; - return 1; - } + if( statesCount == INT_MAX ) + { + std::cerr << "You must specify number of states." << std::endl; + return 1; + } - if( alphabetSize == INT_MAX ) - { - std::cerr << "You must specify alphabet or at least its size." << std::endl; - return 1; - } + if( alphabetSize == INT_MAX ) + { + std::cerr << "You must specify alphabet or at least its size." << std::endl; + return 1; + } + + automaton::NFA res = generator::RandomAutomatonFactory::generateNFA( statesCount, alphabetSize, density ); + alib::DataFactory::toStdout(res); + } else if( type == "RE" ) { - automaton::NFA res = generator::RandomAutomatonFactory::generateNFA( statesCount, alphabetSize, density ); - alib::DataFactory::toStdout(res); + if( height == INT_MAX ) + { + //TODO: floating point arithmetic + std::cerr << "You must specify density as a number between 1 and 100." << std::endl; + return 1; + } + + if( leafNodes == INT_MAX ) + { + std::cerr << "You must specify number of states." << std::endl; + return 1; + } + + if( alphabetSize == INT_MAX ) + { + std::cerr << "You must specify alphabet or at least its size." << std::endl; + return 1; + } + + regexp::UnboundedRegExp res = generator::RandomRegExpFactory::generateUnboundedRegExp(leafNodes, height, alphabetSize ); + alib::DataFactory::toStdout(res); + } return 0; } diff --git a/tests.aconversion.sh b/tests.aconversion.sh index 42a8e65f8a..2216a35f9f 100755 --- a/tests.aconversion.sh +++ b/tests.aconversion.sh @@ -48,7 +48,7 @@ function log { } function generateNFA { - ./arand2 -d $RAND_DENSITY -n $(( $RANDOM % $RAND_STATES + 1 )) -a $(( $RANDOM % $RAND_ALPHABET + 1 )) 2>/dev/null + ./arand2 -t FSM -d $RAND_DENSITY -n $(( $RANDOM % $RAND_STATES + 1 )) -a $(( $RANDOM % $RAND_ALPHABET + 1 )) 2>/dev/null } # $1 = command for conversion. Output of such command must be (eps-)NFA !! -- GitLab