From cb2a9db438f164a531be3be8ae2f4b1fa638c964 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Thu, 28 Aug 2014 09:44:24 +0200
Subject: [PATCH] determinization of nfa

---
 adeterminize2/makefile                        |  73 +++++++++++
 adeterminize2/src/adeterminize.cpp            | 124 ++++++++++++++++++
 .../src/determinize/nfa/NFADeterminizer.cpp   |  80 +++++++++++
 .../src/determinize/nfa/NFADeterminizer.h     |  56 ++++++++
 4 files changed, 333 insertions(+)
 create mode 100644 adeterminize2/makefile
 create mode 100644 adeterminize2/src/adeterminize.cpp
 create mode 100644 alib2algo/src/determinize/nfa/NFADeterminizer.cpp
 create mode 100644 alib2algo/src/determinize/nfa/NFADeterminizer.h

diff --git a/adeterminize2/makefile b/adeterminize2/makefile
new file mode 100644
index 0000000000..e972c74ff4
--- /dev/null
+++ b/adeterminize2/makefile
@@ -0,0 +1,73 @@
+SHELL:=/bin/bash
+EXECUTABLE:=adeterminize2
+
+LDFLAGS= -L../alib2data/lib -L../alib2algo/lib -rdynamic -lxml2 -lalib2data -lalib2algo -Wl,-rpath,.
+
+OBJECTS:=$(patsubst src/%.cpp, obj/%.o, $(shell find src/ -name *cpp))
+
+.PHONY: all build clean
+
+all: build
+
+
+
+bin/$(EXECUTABLE): obj/ $(OBJECTS)
+	mkdir -p bin
+	$(CXX) $(OBJECTS) -o $@ $(LDFLAGS)
+
+obj/makefile: makefile
+	mkdir -p $(dir $@)
+	echo "SHELL:=/bin/bash" >> $@
+	echo "SRCDIR:=" >> $@
+	echo "DEPTH:=" >> $@
+	echo "" >> $@
+	echo "CXXFLAGS:= -std=c++11 -Og -g -c -Wall -pedantic -Wextra -I../../\$$(DEPTH)alib2data/src/ -I../../\$$(DEPTH)alib2algo/src -I/usr/include/libxml2/" >> $@
+	echo "" >> $@
+	echo "SOURCES:= \$$(shell find ../\$$(DEPTH)src/\$$(SRCDIR) -maxdepth 1 -type f -name \"*.cpp\")" >> $@
+	echo "DEPENDENCIES:= \$$(patsubst ../\$$(DEPTH)src/\$$(SRCDIR)%.cpp, ../\$$(DEPTH)obj/\$$(SRCDIR)%.d, \$$(SOURCES))" >> $@
+	echo "OBJECTS:= \$$(patsubst %.d, %.o, \$$(DEPENDENCIES))" >> $@
+	echo "SOURCES_DIRS:= \$$(shell find ../\$$(DEPTH)src/\$$(SRCDIR) -maxdepth 1 -mindepth 1 -type d)" >> $@
+	echo "OBJECTS_DIRS:= \$$(patsubst ../\$$(DEPTH)src/\$$(SRCDIR)%, %/, \$$(SOURCES_DIRS))" >> $@
+	echo "OBJECTS_DIRS_MAKEFILES:= \$$(patsubst %, %makefile, \$$(OBJECTS_DIRS))" >> $@
+	echo "" >> $@
+	echo ".PHONY: all" >> $@
+	echo ".PRECIOUS: \$$(DEPENDECIES) \$$(OBJECTS_DIRS_MAKEFILES)" >> $@
+	echo "" >> $@
+	echo "all: \$$(OBJECTS_DIRS) \$$(OBJECTS)" >> $@
+	echo "" >> $@
+	echo "%.d:" >> $@
+	echo "	@echo \"\$$(shell sha1sum <<< \"\$$@\" | sed \"s/  -//g\") = \\$$\$$(shell (\\$$\$$(CXX) -MM \\$$\$$(CXXFLAGS) \$$(patsubst ../\$$(DEPTH)obj/\$$(SRCDIR)%.d,../\$$(DEPTH)src/\$$(SRCDIR)%.cpp, \$$@) 2>/dev/null || echo \\\"\$$(patsubst ../\$$(DEPTH)obj/\$$(SRCDIR)%.d,../\$$(DEPTH)src/\$$(SRCDIR)%.cpp, \$$@) FORCE\\\") | sed \\\"s/.*://g;s/\\\\\\\\\\\\\\\\//g\\\")\" >> \$$@" >> $@
+	echo "	@echo \"\$$(patsubst %.d,%.o, \$$@): \\$$\$$(\$$(shell sha1sum <<< \"\$$@\" | sed \"s/  -//g\"))\" >> \$$@" >> $@
+	echo "	@echo \"	\\$$\$$(CXX) \\$$\$$(CXXFLAGS) \\$$\$$< -o \$$(patsubst %.d,%.o, \$$@)\" >> \$$@" >> $@
+	echo "" >> $@
+	echo "%/makefile:" >> $@
+	echo "	mkdir -p \$$(dir \$$@)" >> $@
+	echo "	cp makefile \$$@" >> $@
+	echo "" >> $@
+	echo "%/: FORCE | %/makefile" >> $@
+	echo "	@accesstime=\`stat -c %Y \$$@\` && \\" >> $@
+	echo "	\$$(MAKE) -C \$$@ SRCDIR=\$$(SRCDIR)\$$(notdir \$$(patsubst %/, %, \$$@))/ DEPTH=\$$(DEPTH)../ && \\" >> $@
+	echo "	accesstime2=\`stat -c %Y \$$@\` && \\" >> $@
+	echo "	if [ "\$$\$$accesstime" -ne "\$$\$$accesstime2" ]; then \\" >> $@
+	echo "		touch .; \\" >> $@
+	echo "	fi" >> $@
+	echo "" >> $@
+	echo "FORCE:" >> $@
+	echo "" >> $@
+	echo "-include \$$(DEPENDENCIES)" >> $@
+
+obj/: FORCE | obj/makefile
+	$(MAKE) -C $@
+
+$(OBJECTS): obj/
+
+
+build: bin/$(EXECUTABLE)
+
+
+
+clean:
+	$(RM) -r *.o *.d bin lib obj test-bin test-obj
+
+FORCE:
+
diff --git a/adeterminize2/src/adeterminize.cpp b/adeterminize2/src/adeterminize.cpp
new file mode 100644
index 0000000000..144431885a
--- /dev/null
+++ b/adeterminize2/src/adeterminize.cpp
@@ -0,0 +1,124 @@
+#include <iostream>
+#include <getopt.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "automaton/Automaton.h"
+#include "automaton/FSM/NFA.h"
+#include "automaton/FSM/DFA.h"
+//#include "automaton/PDA/PDA.h"
+#include "factory/DataFactory.hpp"
+#include "exception/AlibException.h"
+
+#include "determinize/nfa/NFADeterminizer.h"
+//#include "idpda/IdpdaDeterminizer.h"
+//#include "vpa/VpaDeterminizer.h"
+//#include "vpa/VpaDeterminizer2.h"
+//#include "vpa/VpaDeterminizer3.h"
+//#include "rhdpda/RhdpdaDeterminizer.h"
+//#include "rhdpda/RhdpdaDeterminizer2.h"
+//#include "rhdpda/RhdpdaDeterminizer3.h"
+//#include "rhdpda/RhdpdaDeterminizer4.h"
+
+#define VERSION "0.0.1"
+
+#define TYPE_FSM "fsm"
+#define TYPE_IDPDA "idpda"
+#define TYPE_VPA "vpa"
+#define TYPE_RHDPDA "rhdpda"
+
+#define VERSION_1 "1"
+#define VERSION_2 "2"
+#define VERSION_3 "3"
+#define VERSION_4 "4"
+
+
+/**
+ * Prints help to standard output.
+ */
+void printHelp() {
+	std::cout << "adeterminize " << VERSION << std::endl;
+	std::cout << "Determinize various types of automaton." << std::endl;
+	std::cout << "Usage: adeterminize -t TYPE [SWITCH...]" << std::endl;
+	std::cout << "Possible arguments:" << std::endl;
+	std::cout << "  -t, --type=TYPE \t Specifies type of input automaton, possible values are 'FSM' for final-state machine, 'IDPDA' for input-driven pushdown automaton, 'VPA' for visible pushdown automaton and 'RHDPDA' for real-time height deterministic pushdown automaton." << std::endl;
+	std::cout << "  -a, --algorithm=VERSION_OF_ALGORITHM \t Specifies version of algorithm. This argument works only with VPA or RHDPDA type. VPA determinization has 3 versions (use numbers 1, 2 and 3) and RHDPDA determinization has 4 versions (use numbers 1, 2, 3 and 4). Default value is always number 1." << std::endl;
+	std::cout << "  -h, --help \t Displays this help message." << std::endl;
+	std::cout << std::endl;
+}
+
+
+/**
+ * Converts given type to lower case.
+ *
+ * @param type
+ * @return type in lower case
+ */
+std::string getLowerCaseType(std::string type) {
+	std::string lowerCaseType;
+	for (const auto& c : type) {
+		lowerCaseType.append(1, tolower(c));
+	}
+	return lowerCaseType;
+}
+
+
+int main(int argc, char** argv) {
+	static struct option longOptions[] = {
+	  {"type", required_argument, NULL, 't'},
+	  {"algorithm", required_argument, NULL, 'a'},
+	  {"version", no_argument, NULL, 'v'},
+	  {"help", no_argument, NULL, 'h'},
+	  {0, 0, 0, 0}
+	};
+
+	int longIndex = 0;
+	int opt = 0;
+	std::string type;
+	std::string version = "1";
+
+	while ((opt = getopt_long(argc, argv, "t:a:vh", longOptions, &longIndex)) != -1) {
+		switch(opt) {
+			case 't':
+				type.assign(optarg, strlen(optarg));
+				break;
+			case 'a':
+				version.assign(optarg, strlen(optarg));
+				break;
+			case 'v':
+			case 'h':
+			default:
+				printHelp();
+				return 0;
+		}
+	}
+
+	type = getLowerCaseType(type);
+
+	try {
+		if (type == TYPE_FSM) {
+			automaton::NFA nfa = alib::DataFactory::fromStdin<automaton::NFA>();
+			automaton::DFA dfa = determinize::NFADeterminizer::determinize(nfa);
+			alib::DataFactory::toStdout(dfa);
+			return 0;
+		}
+
+/*		} else if (type == TYPE_IDPDA) {
+			return new idpda::IdpdaDeterminizer((PDA*) automaton);
+
+		} else if (type == TYPE_VPA) {
+			return getVpaDeterminizer(automaton, version);
+
+		} else if (type == TYPE_RHDPDA) {
+			return getRhdpdaDeterminizer(automaton, version);
+		}*/
+
+		printHelp(); // should not be reached
+		return 2;
+	} catch (const exception::AlibException& e) {
+		alib::DataFactory::toStdout(e);
+		return 1;
+	} catch (...) {
+		return 127;
+	}
+}
diff --git a/alib2algo/src/determinize/nfa/NFADeterminizer.cpp b/alib2algo/src/determinize/nfa/NFADeterminizer.cpp
new file mode 100644
index 0000000000..9af97b5284
--- /dev/null
+++ b/alib2algo/src/determinize/nfa/NFADeterminizer.cpp
@@ -0,0 +1,80 @@
+/*
+ * NFADeterminizer.cpp
+ *
+ *  Created on: 16. 1. 2014
+ *	  Author: Jan Vesely
+ */
+
+#include "NFADeterminizer.h"
+#include "label/LabelSetLabel.h"
+
+#include <deque>
+#include <algorithm>
+
+namespace determinize {
+
+
+automaton::State NFADeterminizer::createDFAState(const std::set<automaton::State>& nfaStates) {
+	std::set<label::Label> labelSet;
+	for(const automaton::State& state : nfaStates) {
+		labelSet.insert(state.getName());
+	}
+	return automaton::State(label::Label(label::LabelSetLabel(labelSet)));
+}
+
+std::set<automaton::State> NFADeterminizer::recreateNFAStates(const automaton::State& dfaState) {
+	std::set<automaton::State> states;
+	for (const auto& label : static_cast<const label::LabelSetLabel&>(dfaState.getName().getData()).getData()) {
+		states.insert(automaton::State(label));
+	}
+	return states;
+}
+
+automaton::DFA NFADeterminizer::determinize(const automaton::NFA& nfa) {
+	// 1, 4
+	automaton::State initialState(createDFAState(nfa.getInitialStates()));
+	automaton::DFA res(initialState);
+	res.setInputSymbols(nfa.getInputAlphabet());
+	
+	// 2
+	std::deque<automaton::State> todo;
+	todo.push_back(initialState);
+	
+	do {
+		// 3a, c
+		automaton::State state = todo.front();
+		todo.pop_front();
+
+		// 3b
+		for (const auto& input : nfa.getInputAlphabet()) {
+			std::set<automaton::State> targetNFAStates;
+			for(const auto& nfaState : recreateNFAStates(state)) {
+				auto iter = nfa.getTransitions().find(std::make_pair(nfaState, input));
+				if(iter != nfa.getTransitions().end()) {
+					targetNFAStates.insert(iter->second.begin(), iter->second.end());
+				}
+			}
+			automaton::State dfaState = createDFAState(targetNFAStates);
+
+			// 4
+			bool existed = !res.addState(dfaState);
+
+			// 3b
+			res.addTransition(state, input, dfaState);
+
+			if(!existed) todo.push_back(dfaState);
+		}
+	} while(!todo.empty());
+	
+	// 5
+	for (const auto& dfaState : res.getStates()) {
+		std::set<automaton::State> nfaStates = recreateNFAStates(dfaState);
+		if(std::any_of(nfaStates.begin(), nfaStates.end(), [&](const automaton::State& nfaState) { return nfa.getFinalStates().count(nfaState); })) {
+			res.addFinalState(dfaState);
+		}
+	}
+	
+	return res;
+}
+
+}
diff --git a/alib2algo/src/determinize/nfa/NFADeterminizer.h b/alib2algo/src/determinize/nfa/NFADeterminizer.h
new file mode 100644
index 0000000000..b0a7697fa0
--- /dev/null
+++ b/alib2algo/src/determinize/nfa/NFADeterminizer.h
@@ -0,0 +1,56 @@
+/*
+ * NFADeterminizer.h
+ *
+ *  Created on: 16. 1. 2014
+ *	  Author: Jan Vesely
+ */
+
+#ifndef NFA_DETERMINIZER_H_
+#define NFA_DETERMINIZER_H_
+
+#include <set>
+
+#include "automaton/common/State.h"
+#include "automaton/FSM/NFA.h"
+#include "automaton/FSM/DFA.h"
+
+namespace determinize {
+
+/**
+ * Class for running determinization algorithm on fsm.
+ */
+class NFADeterminizer {
+
+private:
+
+	/**
+	 * Returns existing state from the resulting automaton, if there is one, or creates new one and adds it into
+	 *  the resulting deterministic automaton.
+	 *
+	 * @param originalStates set of states from nondeterministic fsm which represents state of deterministic fsm
+	 * @return state of deterministic fsm
+	 */
+	static automaton::State createDFAState(const std::set<automaton::State>& nfaStates);
+
+	/**
+	 * Finds states from nondeterministic fsm to which at least one state from given set of states have transition
+	 *  with given input.
+	 *
+	 * @param fromStates set of states from nondeterministic fsm
+	 * @param input symbol from input alphabet
+	 * @return set of states from nondeterministic fsm
+	 */
+	static std::set<automaton::State> recreateNFAStates(const automaton::State& dfaState);
+public:
+
+	/**
+	 * @param nfsm nondeterministic final-state machine given for determinization
+	 * Runs determinization algorithm on nondeterministic fsm given in constructor.
+	 */
+	static automaton::DFA determinize(const automaton::NFA& nfa);
+
+};
+
+} /* namespace determinize */
+
+#endif /* NFA_DETERMINIZER_H_ */
-- 
GitLab