From d0c88a2b2191248c593c04295186dbb3d1c0cca1 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Mon, 29 Sep 2014 21:24:15 +0200
Subject: [PATCH] random regexp generator -- not so good... yet...

---
 .../src/generator/RandomRegExpFactory.cpp     | 145 ++++++++++++++++++
 alib2algo/src/generator/RandomRegExpFactory.h |  32 ++++
 arand2/src/arand.cpp                          |  98 ++++++++----
 tests.aconversion.sh                          |   2 +-
 4 files changed, 249 insertions(+), 28 deletions(-)
 create mode 100644 alib2algo/src/generator/RandomRegExpFactory.cpp
 create mode 100644 alib2algo/src/generator/RandomRegExpFactory.h

diff --git a/alib2algo/src/generator/RandomRegExpFactory.cpp b/alib2algo/src/generator/RandomRegExpFactory.cpp
new file mode 100644
index 0000000000..7dfd8eca9e
--- /dev/null
+++ b/alib2algo/src/generator/RandomRegExpFactory.cpp
@@ -0,0 +1,145 @@
+/*
+ * RandomRegExpFactory.cpp
+ *
+ *  Created on: 27. 3. 2014
+ *	  Author: Jan Travnicek
+ */
+
+#include "RandomRegExpFactory.h"
+
+#include <algorithm>
+
+namespace generator {
+
+regexp::UnboundedRegExp RandomRegExpFactory::generateUnboundedRegExp( size_t leafNodes, size_t height, size_t alphabetSize ) {
+	srand( time( NULL ) );
+
+	if(alphabetSize > 26)
+		throw exception::AlibException("Too big alphabet.");
+
+	if( alphabetSize <= 0 )
+		throw exception::AlibException( "Alphabet size must be greater than 0." );
+
+	std::set<alphabet::Symbol> alphabet;
+	while( alphabet.size( ) < alphabetSize ) {
+		std::string s( 1, rand() % 26 + 'a' );
+		alphabet::Symbol symbol = alphabet::symbolFrom (s);
+		alphabet.insert( symbol );
+	}
+
+	return RandomRegExpFactory::generateUnboundedRegExp( leafNodes, height, alphabet );
+}
+
+regexp::UnboundedRegExp RandomRegExpFactory::generateUnboundedRegExp( size_t leafNodes, size_t height, std::set<alphabet::Symbol> alphabet) {
+
+	if( alphabet.size() > 26)
+		throw exception::AlibException("Too big alphabet.");
+
+	if( alphabet.size() <= 0 )
+		throw exception::AlibException( "Alphabet size must be greater than 0." );
+
+	std::vector<const regexp::UnboundedRegExpElement*> elems;
+
+	{
+		elems.push_back(new regexp::UnboundedRegExpEmpty());
+		elems.push_back(new regexp::UnboundedRegExpEpsilon());
+	}
+	if(alphabet.size() > 6) {
+		elems.push_back(new regexp::UnboundedRegExpEmpty());
+		elems.push_back(new regexp::UnboundedRegExpEpsilon());
+	}
+	if(alphabet.size() > 16) {
+		elems.push_back(new regexp::UnboundedRegExpEmpty());
+		elems.push_back(new regexp::UnboundedRegExpEpsilon());
+	}
+
+	for(const alphabet::Symbol& symbol : alphabet) {
+		elems.push_back(new regexp::UnboundedRegExpSymbol(symbol));
+	}
+
+	regexp::UnboundedRegExp res = RandomRegExpFactory::SimpleUnboundedRegExp( leafNodes, height, elems );
+
+	for(const regexp::UnboundedRegExpElement* elem : elems ) {
+		delete elem;
+	}
+	return res;
+}
+
+regexp::UnboundedRegExp RandomRegExpFactory::SimpleUnboundedRegExp( size_t n, size_t h, const std::vector<const regexp::UnboundedRegExpElement*> & elems) {
+
+	const regexp::UnboundedRegExpElement* elem = SimpleUnboundedRegExpElement(n, h, elems);
+	regexp::UnboundedRegExp res(std::move(*elem));
+	delete elem;
+	return res;
+}
+
+const regexp::UnboundedRegExpElement* RandomRegExpFactory::SimpleUnboundedRegExpElement(size_t n, size_t h, const std::vector<const regexp::UnboundedRegExpElement*> & elems) {
+	if(h == 0 || n == 0) {
+		return elems[ rand( ) % elems.size( ) ]->clone();
+	} else {
+		unsigned childNodes = rand( ) % 10;
+		if(childNodes <  3) childNodes = 1;
+		else if(childNodes <  6) childNodes = 2;
+		else if(childNodes <  8) childNodes = 3;
+		else childNodes = 4;
+
+		childNodes = childNodes > n ? n : childNodes;
+
+		int subSizes[4];
+		if(childNodes == 4) {
+			subSizes[3] = rand() % ( n - 1 );
+			subSizes[2] = rand() % ( n - subSizes[3] - 1 );
+			subSizes[1] = rand() % ( n - subSizes[2] - subSizes [3] - 1 );
+
+			subSizes[3] += 1;
+			subSizes[2] += 1;
+			subSizes[1] += 1;
+
+			subSizes[0] = n - subSizes[1] - subSizes[2] - subSizes[3];
+		}
+		if(childNodes == 3) {
+			subSizes[2] = rand() % ( n - 1);
+			subSizes[1] = rand() % ( n - subSizes[2] - 1);
+
+			subSizes[2] += 1;
+			subSizes[1] += 1;
+
+			subSizes[0] = n - subSizes[1] - subSizes[2];
+		}
+		if(childNodes == 2) {
+			subSizes[1] = rand() % ( n - 1 );
+
+			subSizes[1] += 1;
+
+			subSizes[0] = n - subSizes[1];
+		}
+		if(childNodes == 1) {
+			const regexp::UnboundedRegExpElement* subElem = SimpleUnboundedRegExpElement(n, h - 1, elems);
+			regexp::UnboundedRegExpIteration iter(std::move(*subElem));
+			delete subElem;
+			return new regexp::UnboundedRegExpIteration(iter);
+		}
+
+		int nodeType = rand( ) % 2;
+		if(nodeType == 0) {
+			regexp::UnboundedRegExpConcatenation con;
+			for(unsigned i = 0; i < childNodes; i++) {
+				const regexp::UnboundedRegExpElement* subElem = SimpleUnboundedRegExpElement(subSizes[i], h - 1, elems);
+				con.appendElement(std::move(*subElem));
+				delete subElem;
+			}
+			return new regexp::UnboundedRegExpConcatenation(con);
+		} else {
+			regexp::UnboundedRegExpAlternation con;
+			for(unsigned i = 0; i < childNodes; i++) {
+				const regexp::UnboundedRegExpElement* subElem = SimpleUnboundedRegExpElement(subSizes[i], h - 1, elems);
+				con.appendElement(std::move(*subElem));
+				delete subElem;
+			}
+			return new regexp::UnboundedRegExpAlternation(con);
+		}
+		
+	}
+}
+
+}
diff --git a/alib2algo/src/generator/RandomRegExpFactory.h b/alib2algo/src/generator/RandomRegExpFactory.h
new file mode 100644
index 0000000000..0459faf3ac
--- /dev/null
+++ b/alib2algo/src/generator/RandomRegExpFactory.h
@@ -0,0 +1,32 @@
+/*
+ * RandomAutomatonFactory.h
+ *
+ *  Created on: 27. 3. 2014
+ *      Author: Jan Travnicek
+ */
+
+#ifndef RANDOM_REG_EXP_FACTORY_H_
+#define RANDOM_REG_EXP_FACTORY_H_
+
+#include <set>
+#include <vector>
+
+#include <exception/AlibException.h>
+#include <regexp/unbounded/UnboundedRegExp.h>
+#include <regexp/unbounded/UnboundedRegExpElements.h>
+
+namespace generator {
+
+class RandomRegExpFactory {
+public:
+	static regexp::UnboundedRegExp generateUnboundedRegExp( size_t leafNodes, size_t height, size_t alphabetSize );
+	static regexp::UnboundedRegExp generateUnboundedRegExp( size_t leafNodes, size_t height, std::set<alphabet::Symbol> alphabet);
+
+private:
+	static regexp::UnboundedRegExp SimpleUnboundedRegExp( size_t n, size_t h, const std::vector<const regexp::UnboundedRegExpElement*> & elems);
+	static const regexp::UnboundedRegExpElement* SimpleUnboundedRegExpElement(size_t n, size_t h, const std::vector<const regexp::UnboundedRegExpElement*> & elems);
+};
+
+}
+
+#endif /* RANDOM_REG_EXP_FACTORY_H_ */
diff --git a/arand2/src/arand.cpp b/arand2/src/arand.cpp
index f7898fbbf2..2f04a34819 100644
--- a/arand2/src/arand.cpp
+++ b/arand2/src/arand.cpp
@@ -10,18 +10,24 @@
 #include <climits>
 #include <cfloat>
 #include <getopt.h>
+#include <cstring>
 
 #include <automaton/FSM/NFA.h>
+#include <regexp/unbounded/UnboundedRegExp.h>
 #include <factory/DataFactory.hpp>
 #include "generator/RandomAutomatonFactory.h"
+#include "generator/RandomRegExpFactory.h"
 
 void help( void )
 {
 	std::cout << "arand" << std::endl;
 	std::cout << "Generates random automaton." << std::endl;
-	std::cout << "Usage: arand -n=N -d=D -a=A" << std::endl << std::endl;
+	std::cout << "Usage: arand -t type -n=N -d=D -a=A" << std::endl << std::endl;
+	std::cout << "  -t, --type FSM/RE. " << std::endl;
 	std::cout << "  -n, --nodes=N \t Number of nodes of automata." << std::endl;
 	std::cout << "  -d, --density=D \t Percentage of transitions to generate. 1.0 = automata graph is tree, 100.0 = every possible transition." << std::endl;
+	std::cout << "  -l, --leafNodes=N \t Number of leaf nodes of regexp." << std::endl;
+	std::cout << "  -h, --height=N \t Height of regexp." << std::endl;
 	std::cout << "  -a, --alphabet=A \t Alphabet size." << std::endl;
 }
 
@@ -30,31 +36,37 @@ int main(int argc, char* argv[])
 {
 	static struct option long_options[] = {
 	  // common
-	  {"help",	no_argument,		NULL, 'h'},
-//	  {"type",	required_argument,	NULL, 't'},
+	  {"help",		no_argument,		NULL,	'h'},
+	  {"alphabet",		required_argument,	NULL,	'a' },
+	  {"type",		required_argument,	NULL,	't'},
 
 	  // automaton options
 	  {"density",		required_argument,	NULL,	'd' },
 	  {"nodes",		required_argument,	NULL,	'n' },
-	  {"alphabet",		required_argument,	NULL,	'a' },
+
+	  // automaton options
+	  {"leafNodes",		required_argument,	NULL,	'l' },
+	  {"height",		required_argument,	NULL,	'h' },
+
+	  // common options
 	  {0,			0,			0,	'\0'}
 	};
 
 	int long_index = 0, opt = 0;
 
 	// string type;
-	int statesCount = INT_MAX, alphabetSize = INT_MAX;
+	int statesCount = INT_MAX, alphabetSize = INT_MAX, leafNodes = INT_MAX, height = INT_MAX;
 	double density = DBL_MAX;
+	std::string type;
 
-	while( ( opt = getopt_long( argc, argv,"ht:d:n:a:", long_options, & long_index ) )  != -1 )
+	while( ( opt = getopt_long( argc, argv,"a:t:d:n:l:h:", long_options, & long_index ) )  != -1 )
 	{
 		switch( opt )
 		{
-		/*
+
 		case 't':
 			type.assign( optarg, strlen( optarg ) );
 			break;
-		 */
 
 		case 'n':
 			statesCount = std::stoi( optarg, nullptr, 10 );
@@ -64,40 +76,72 @@ int main(int argc, char* argv[])
 			density = std::stod( optarg, nullptr );
 			break;
 
+		case 'l':
+			leafNodes = std::stoi( optarg, nullptr, 10 );
+			break;
+
+		case 'h':
+			height = std::stod( optarg, nullptr );
+			break;
+
 		case 'a':
 			alphabetSize = std::stoi( optarg, nullptr, 10 );
 			break;
 
 		case 'v':
-		case 'h':
 		default:
 			help( );
 			return 0;
 		}
 	}
 
-	if( density < 1 || density > 100 )
-	{
-		//TODO: floating point arithmetic
-		std::cerr << "You must specify density as a number between 1 and 100." << std::endl;
-		return 1;
+	if( type == "FSM" ) {
 
-	}
+		if( density < 1 || density > 100 )
+		{
+			//TODO: floating point arithmetic
+			std::cerr << "You must specify density as a number between 1 and 100." << std::endl;
+			return 1;
+		}
 
-	if( statesCount == INT_MAX )
-	{
-		std::cerr << "You must specify number of states." << std::endl;
-		return 1;
-	}
+		if( statesCount == INT_MAX )
+		{
+			std::cerr << "You must specify number of states." << std::endl;
+			return 1;
+		}
 
-	if( alphabetSize == INT_MAX )
-	{
-		std::cerr << "You must specify alphabet or at least its size." << std::endl;
-		return 1;
-	}
+		if( alphabetSize == INT_MAX )
+		{
+			std::cerr << "You must specify alphabet or at least its size." << std::endl;
+			return 1;
+		}
+
+		automaton::NFA res = generator::RandomAutomatonFactory::generateNFA( statesCount, alphabetSize, density );
+		alib::DataFactory::toStdout(res);
+	} else if( type == "RE" ) {
 
-	automaton::NFA res = generator::RandomAutomatonFactory::generateNFA( statesCount, alphabetSize, density );
-	alib::DataFactory::toStdout(res);
+		if( height == INT_MAX )
+		{
+			//TODO: floating point arithmetic
+			std::cerr << "You must specify density as a number between 1 and 100." << std::endl;
+			return 1;
+		}
+
+		if( leafNodes == INT_MAX )
+		{
+			std::cerr << "You must specify number of states." << std::endl;
+			return 1;
+		}
+
+		if( alphabetSize == INT_MAX )
+		{
+			std::cerr << "You must specify alphabet or at least its size." << std::endl;
+			return 1;
+		}
+
+		regexp::UnboundedRegExp res = generator::RandomRegExpFactory::generateUnboundedRegExp(leafNodes, height, alphabetSize );
+		alib::DataFactory::toStdout(res);
+	}
 
 	return 0;
 }
diff --git a/tests.aconversion.sh b/tests.aconversion.sh
index 42a8e65f8a..2216a35f9f 100755
--- a/tests.aconversion.sh
+++ b/tests.aconversion.sh
@@ -48,7 +48,7 @@ function log {
 }
 
 function generateNFA {
-	./arand2 -d $RAND_DENSITY -n $(( $RANDOM % $RAND_STATES + 1 )) -a $(( $RANDOM % $RAND_ALPHABET + 1 )) 2>/dev/null
+	./arand2 -t FSM -d $RAND_DENSITY -n $(( $RANDOM % $RAND_STATES + 1 )) -a $(( $RANDOM % $RAND_ALPHABET + 1 )) 2>/dev/null
 }
 
 # $1 = command for conversion. Output of such command must be (eps-)NFA !!
-- 
GitLab