Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
RandomTreeFactory.cpp 13.20 KiB
/*
 * RandomTreeFactory.cpp
 *
 *  Created on: 18. 3. 2015
 *	  Author: Stepan Plachy
 */

#include "RandomTreeFactory.h"

#include <vector>
#include <map>
#include <set>
#include <algorithm>
#include <cstdlib>
#include <ctime>
#include <cmath>

#include <iostream>
#include <random>

#include <alphabet/SubtreeWildcardSymbol.h>
#include <alphabet/NonlinearVariableSymbol.h>
#include <exception/CommonException.h>

namespace tree {

namespace generate {

struct Node {
	char symbol;
	int depth;
	Node * right;
	Node * child = NULL;
	int rank = 0;

	Node ( ) : depth ( 0 ) { }
	Node ( Node * parent ) : depth ( parent->depth + 1 ) {
		if ( parent->child == NULL ) {
			parent->child = this;
			right = this;
		} else {
			right = parent->child->right;
			parent->child->right = this;
		}

		parent->rank++;
	}

	~Node ( ) {
		if ( child == NULL ) return;

		Node * ch = child;

		do {
			Node * tmp = ch->right;
			delete ch;
			ch = tmp;
		} while ( ch != child );
	}

	void rotateLeftBranch ( ) {
		if ( child != NULL ) child->rotateLeftBranch ( );

		if ( rank > 1 ) {
			int angle = std::random_devices::semirandom ( ) % rank;
			Node * newChild = child;

			for ( int i = 0; i < angle; i++ )
				newChild = newChild->right;
			child = newChild;
		}
	}

	void generateUnrankedSymbols ( const std::vector < char > & alphabet ) {
		symbol = alphabet[std::random_devices::semirandom ( ) % alphabet.size ( )];
		Node * nextChild = child;

		for ( int i = 0; i < rank; i++ ) {
			nextChild->generateUnrankedSymbols ( alphabet );
			nextChild = nextChild->right;
		}
	}

	void generateRankedSymbols ( const std::map < int, std::vector < char > > rankedAlphabet ) {
		const std::vector < char > & alphabet = rankedAlphabet.at ( rank );

		symbol = alphabet[std::random_devices::semirandom ( ) % alphabet.size ( )];
		Node * nextChild = child;

		for ( int i = 0; i < rank; i++ ) {
			nextChild->generateRankedSymbols ( rankedAlphabet );
			nextChild = nextChild->right;
		}
	}

	void fillRanks ( std::map < int, std::vector < char > > & rankedAlphabet ) {
		rankedAlphabet[rank];
		Node * nextChild = child;

		for ( int i = 0; i < rank; i++ ) {
			nextChild->fillRanks ( rankedAlphabet );
			nextChild = nextChild->right;
		}
	}

	std::tree < alphabet::Symbol > createUnrankedNode ( ) {
		std::vector < std::tree < alphabet::Symbol > > children;
		Node * nextChild = child;

		for ( int i = 0; i < rank; i++ ) {
			children.emplace_back ( std::tree < alphabet::Symbol > ( nextChild->createUnrankedNode ( ) ) );
			nextChild = nextChild->right;
		}

		return std::tree < alphabet::Symbol > ( alphabet::symbolFrom ( symbol ), std::move ( children ) );
	}

	std::tree < alphabet::Symbol > createUnrankedPatternNode ( ) {
		if ( rank == 0 ) {
			return std::tree < alphabet::Symbol > ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), { } );
		} else {
			std::vector < std::tree < alphabet::Symbol > > children;
			Node * nextChild = child;

			for ( int i = 0; i < rank; i++ ) {
				children.emplace_back ( std::tree < alphabet::Symbol > ( nextChild->createUnrankedPatternNode ( ) ) );
				nextChild = nextChild->right;
			}

			return std::tree < alphabet::Symbol > ( alphabet::symbolFrom ( symbol ), std::move ( children ) );
		}
	}

	std::tree < alphabet::RankedSymbol > createRankedNode ( ) {
		std::vector < std::tree < alphabet::RankedSymbol > > children;
		Node * nextChild = child;

		for ( int i = 0; i < rank; i++ ) {
			children.emplace_back ( std::tree < alphabet::RankedSymbol > ( nextChild->createRankedNode ( ) ) );
			nextChild = nextChild->right;
		}

		return std::tree < alphabet::RankedSymbol > ( alphabet::RankedSymbol ( symbol, rank ), std::move ( children ) );
	}

	std::tree < alphabet::RankedSymbol > createRankedPatternNode ( ) {
		if ( rank == 0 ) {
			return std::tree < alphabet::RankedSymbol > ( alphabet::RankedSymbol ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ), { } );
		} else {
			std::vector < std::tree < alphabet::RankedSymbol > > children;
			Node * nextChild = child;

			for ( int i = 0; i < rank; i++ ) {
				children.emplace_back ( std::tree < alphabet::RankedSymbol > ( nextChild->createRankedPatternNode ( ) ) );
				nextChild = nextChild->right;
			}

			return std::tree < alphabet::RankedSymbol > ( alphabet::RankedSymbol ( symbol, rank ), std::move ( children ) );
		}
	}

	std::tree < alphabet::RankedSymbol > createRankedNonlinearPatternNode ( bool singleNonlinearVariable ) {
		if ( rank == 0 ) {
			if ( singleNonlinearVariable )
				return std::tree < alphabet::RankedSymbol > ( alphabet::RankedSymbol ( alphabet::Symbol ( alphabet::NonlinearVariableSymbol ( alphabet::symbolFrom ( "A" ) ) ), 0 ), { } );
			else
				return std::tree < alphabet::RankedSymbol > ( alphabet::RankedSymbol ( alphabet::Symbol ( alphabet::NonlinearVariableSymbol ( symbol ) ), 0 ), { } );
		} else {
			std::vector < std::tree < alphabet::RankedSymbol > > children;
			Node * nextChild = child;

			for ( int i = 0; i < rank; i++ ) {
				children.emplace_back ( std::tree < alphabet::RankedSymbol > ( nextChild->createRankedNonlinearPatternNode ( singleNonlinearVariable ) ) );
				nextChild = nextChild->right;
			}

			return std::tree < alphabet::RankedSymbol > ( alphabet::RankedSymbol ( symbol, rank ), std::move ( children ) );
		}
	}

	void nicePrint ( std::ostream & os = std::cout, const std::string & prefix = "", const bool last = true ) const {
		os << prefix;

		std::string nextPrefix ( prefix );

		if ( last ) {
			os << "\\-";
			nextPrefix += "  ";
		} else {
			os << "|-";
			nextPrefix += "| ";
		}

		os << symbol << " (" << rank << ")" << std::endl;

		Node * nextChild = child;

		for ( int i = 0; i < rank; i++ ) {
			 // os << nextPrefix << "|" << std::endl;
			nextChild->nicePrint ( os, nextPrefix, i == rank - 1 );
			nextChild = nextChild->right;
		}
	}

};

std::vector < char > generateUnrankedAlphabet ( int maxAlphabetSize, bool randomizedAlphabet ) {
	std::vector < char > symbols ( 26 );
	for ( int i = 0; i < 26; i++ ) symbols[i] = i + 'a';

	if ( randomizedAlphabet ) shuffle ( symbols.begin ( ), symbols.end ( ), std::random_devices::semirandom );

	return std::vector < char > ( symbols.begin ( ), symbols.begin ( ) + maxAlphabetSize );
}

void generateRankedAlphabet ( std::map < int, std::vector < char > > & rankedAlphabet, int maxAlphabetSize, bool randomizedAlphabet ) {
	int ranksCount = rankedAlphabet.size ( );
	std::vector < char > unrankedAlphabet = generateUnrankedAlphabet ( maxAlphabetSize > ranksCount ? maxAlphabetSize : ranksCount, randomizedAlphabet );

	std::set < int > rankSeparators;

	rankSeparators.insert ( 0 );
	rankSeparators.insert ( unrankedAlphabet.size ( ) );

	while ( ( int ) rankSeparators.size ( ) != ranksCount + 1 /*&& rankSeparators.size() != maxRank + 2*/ ) rankSeparators.insert ( std::random_devices::semirandom ( ) % unrankedAlphabet.size ( ) );

	std::set < int >::iterator it = rankSeparators.begin ( );

	for ( std::map < int, std::vector < char > >::iterator i = rankedAlphabet.begin ( ); i != rankedAlphabet.end ( ); ++i ) {
		std::set < int >::iterator prevIt = it++;
		i->second.insert ( i->second.begin ( ), unrankedAlphabet.begin ( ) + * prevIt, unrankedAlphabet.begin ( ) + * it );
	}
}

Node * generateTreeStructure ( int depth, int nodesCount, int maxRank = INT_MAX ) {
	if ( depth >= nodesCount ) throw exception::CommonException ( "number of nodes is too small" );

	if ( ( maxRank != INT_MAX ) && ( pow ( maxRank, depth + 1 ) - 1 < nodesCount ) ) throw exception::CommonException ( "number of nodes is too small" );

	std::vector < Node * > nodes ( nodesCount );

	 // generate path depth long
	Node * root = nodes[0] = new Node ( );

	for ( int i = 1; i <= depth; i++ ) nodes[i] = new Node ( nodes[i - 1] );

	 // move final leaf to end
	nodes[nodesCount - 1] = nodes[depth];

	int availableNodesIndex = depth;
	int finalNodesIndex = nodesCount - 2;

	while ( finalNodesIndex >= availableNodesIndex ) {
		int randomIndex = std::random_devices::semirandom ( ) % availableNodesIndex;
		Node * parent = nodes[randomIndex];
		Node * node = new Node ( parent );

		 // put node to end if it reached depth limit
		if ( node->depth < depth ) {
			nodes[availableNodesIndex] = node;
			availableNodesIndex++;
		} else {
			nodes[finalNodesIndex] = node;
			finalNodesIndex--;
		}

		 // put parent node to end if it reached rank limit
		if ( parent->rank >= maxRank ) {
			nodes[randomIndex] = nodes[availableNodesIndex - 1];
			nodes[finalNodesIndex] = parent;
			availableNodesIndex--;
			finalNodesIndex--;
		}
	}

	 // move generated path to random branch
	root->rotateLeftBranch ( );
	return root;
}

UnrankedTree RandomTreeFactory::generateUnrankedTree ( int depth, int nodesCount, int maxAlphabetSize, bool randomizedAlphabet, int maxRank ) {
	Node * root = generateTreeStructure ( depth, nodesCount, maxRank );
	std::vector < char > alphabet = generateUnrankedAlphabet ( maxAlphabetSize, randomizedAlphabet );

	root->generateUnrankedSymbols ( alphabet );

	std::set < alphabet::Symbol > treeAlphabet;

	for ( char it : alphabet )
		treeAlphabet.insert ( alphabet::symbolFrom ( it ) );

	UnrankedTree tree ( treeAlphabet, root->createUnrankedNode ( ) );
	delete root;
	return tree;
}

UnrankedPattern RandomTreeFactory::generateUnrankedPattern ( int depth, int nodesCount, int maxAlphabetSize, bool randomizedAlphabet, int maxRank ) {
	Node * root = generateTreeStructure ( depth, nodesCount, maxRank );
	std::vector < char > alphabet = generateUnrankedAlphabet ( maxAlphabetSize, randomizedAlphabet );

	root->generateUnrankedSymbols ( alphabet );

	std::set < alphabet::Symbol > treeAlphabet;

	for ( std::vector < char >::const_iterator it = alphabet.begin ( ); it != alphabet.end ( ); ++it )
		treeAlphabet.insert ( alphabet::symbolFrom ( * it ) );

	alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD );
	treeAlphabet.insert ( subtreeWildcard );
	UnrankedPattern tree ( std::move ( subtreeWildcard ), treeAlphabet, root->createUnrankedPatternNode ( ) );
	delete root;
	return tree;
}

RankedTree RandomTreeFactory::generateRankedTree ( int depth, int nodesCount, int maxAlphabetSize, bool randomizedAlphabet, int maxRank ) {
	Node * root = generateTreeStructure ( depth, nodesCount, maxRank );
	std::map < int, std::vector < char > > rankedAlphabet;

	root->fillRanks ( rankedAlphabet );
	generateRankedAlphabet ( rankedAlphabet, maxAlphabetSize, randomizedAlphabet );
	root->generateRankedSymbols ( rankedAlphabet );

	std::set < alphabet::RankedSymbol > treeRankedAlphabet;

	for ( const std::pair < int, std::vector < char > > & it : rankedAlphabet )
		for ( char i : it.second )
			treeRankedAlphabet.insert ( alphabet::RankedSymbol ( i, it.first ) );

	RankedTree tree ( treeRankedAlphabet, root->createRankedNode ( ) );
	delete root;
	return tree;
}

RankedPattern RandomTreeFactory::generateRankedPattern ( int depth, int nodesCount, int maxAlphabetSize, bool randomizedAlphabet, int maxRank ) {
	Node * root = generateTreeStructure ( depth, nodesCount, maxRank );
	std::map < int, std::vector < char > > rankedAlphabet;

	root->fillRanks ( rankedAlphabet );
	generateRankedAlphabet ( rankedAlphabet, maxAlphabetSize, randomizedAlphabet );
	root->generateRankedSymbols ( rankedAlphabet );

	std::set < alphabet::RankedSymbol > treeRankedAlphabet;

	for ( const std::pair < int, std::vector < char > > & it : rankedAlphabet )
		for ( char i : it.second )
			treeRankedAlphabet.insert ( alphabet::RankedSymbol ( i, it.first ) );
	alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 );
	treeRankedAlphabet.insert ( subtreeWildcard );
	RankedPattern tree ( std::move ( subtreeWildcard ), treeRankedAlphabet, root->createRankedPatternNode ( ) );
	delete root;
	return tree;
}

RankedNonlinearPattern RandomTreeFactory::generateRankedNonlinearPattern ( int depth, int nodesCount, int maxAlphabetSize, bool randomizedAlphabet, bool singleNonlinearVariable, int maxRank ) {
	Node * root = generateTreeStructure ( depth, nodesCount, maxRank );
	std::map < int, std::vector < char > > rankedAlphabet;

	root->fillRanks ( rankedAlphabet );
	generateRankedAlphabet ( rankedAlphabet, maxAlphabetSize, randomizedAlphabet );
	root->generateRankedSymbols ( rankedAlphabet );

	std::set < alphabet::RankedSymbol > treeRankedAlphabet;
	std::set < alphabet::RankedSymbol > nonlinearVariables;

	for ( const std::pair < int, std::vector < char > > & it : rankedAlphabet )
		for ( char i : it.second )
			treeRankedAlphabet.insert ( alphabet::RankedSymbol ( i, it.first ) );

	if ( singleNonlinearVariable )
		nonlinearVariables.insert ( alphabet::RankedSymbol ( alphabet::Symbol ( alphabet::NonlinearVariableSymbol ( alphabet::symbolFrom ( "A" ) ) ), 0 ) );
	else
		for ( char i : rankedAlphabet [ 0 ] )
			nonlinearVariables.insert ( alphabet::RankedSymbol ( alphabet::Symbol ( alphabet::NonlinearVariableSymbol ( i ) ), 0 ) );

	alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 );
	treeRankedAlphabet.insert ( subtreeWildcard );
	treeRankedAlphabet.insert ( nonlinearVariables.begin ( ), nonlinearVariables.end ( ) );
	RankedNonlinearPattern tree ( std::move ( subtreeWildcard ), nonlinearVariables, treeRankedAlphabet, root->createRankedNonlinearPatternNode ( singleNonlinearVariable ) );
	delete root;
	return tree;
}

} /* namespace generate */

} /* namespace automaton */