From cb2a9db438f164a531be3be8ae2f4b1fa638c964 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 28 Aug 2014 09:44:24 +0200 Subject: [PATCH] determinization of nfa --- adeterminize2/makefile | 73 +++++++++++ adeterminize2/src/adeterminize.cpp | 124 ++++++++++++++++++ .../src/determinize/nfa/NFADeterminizer.cpp | 80 +++++++++++ .../src/determinize/nfa/NFADeterminizer.h | 56 ++++++++ 4 files changed, 333 insertions(+) create mode 100644 adeterminize2/makefile create mode 100644 adeterminize2/src/adeterminize.cpp create mode 100644 alib2algo/src/determinize/nfa/NFADeterminizer.cpp create mode 100644 alib2algo/src/determinize/nfa/NFADeterminizer.h diff --git a/adeterminize2/makefile b/adeterminize2/makefile new file mode 100644 index 0000000000..e972c74ff4 --- /dev/null +++ b/adeterminize2/makefile @@ -0,0 +1,73 @@ +SHELL:=/bin/bash +EXECUTABLE:=adeterminize2 + +LDFLAGS= -L../alib2data/lib -L../alib2algo/lib -rdynamic -lxml2 -lalib2data -lalib2algo -Wl,-rpath,. + +OBJECTS:=$(patsubst src/%.cpp, obj/%.o, $(shell find src/ -name *cpp)) + +.PHONY: all build clean + +all: build + + + +bin/$(EXECUTABLE): obj/ $(OBJECTS) + mkdir -p bin + $(CXX) $(OBJECTS) -o $@ $(LDFLAGS) + +obj/makefile: makefile + mkdir -p $(dir $@) + echo "SHELL:=/bin/bash" >> $@ + echo "SRCDIR:=" >> $@ + echo "DEPTH:=" >> $@ + echo "" >> $@ + echo "CXXFLAGS:= -std=c++11 -Og -g -c -Wall -pedantic -Wextra -I../../\$$(DEPTH)alib2data/src/ -I../../\$$(DEPTH)alib2algo/src -I/usr/include/libxml2/" >> $@ + echo "" >> $@ + echo "SOURCES:= \$$(shell find ../\$$(DEPTH)src/\$$(SRCDIR) -maxdepth 1 -type f -name \"*.cpp\")" >> $@ + echo "DEPENDENCIES:= \$$(patsubst ../\$$(DEPTH)src/\$$(SRCDIR)%.cpp, ../\$$(DEPTH)obj/\$$(SRCDIR)%.d, \$$(SOURCES))" >> $@ + echo "OBJECTS:= \$$(patsubst %.d, %.o, \$$(DEPENDENCIES))" >> $@ + echo "SOURCES_DIRS:= \$$(shell find ../\$$(DEPTH)src/\$$(SRCDIR) -maxdepth 1 -mindepth 1 -type d)" >> $@ + echo "OBJECTS_DIRS:= \$$(patsubst ../\$$(DEPTH)src/\$$(SRCDIR)%, %/, \$$(SOURCES_DIRS))" >> $@ + echo "OBJECTS_DIRS_MAKEFILES:= \$$(patsubst %, %makefile, \$$(OBJECTS_DIRS))" >> $@ + echo "" >> $@ + echo ".PHONY: all" >> $@ + echo ".PRECIOUS: \$$(DEPENDECIES) \$$(OBJECTS_DIRS_MAKEFILES)" >> $@ + echo "" >> $@ + echo "all: \$$(OBJECTS_DIRS) \$$(OBJECTS)" >> $@ + echo "" >> $@ + echo "%.d:" >> $@ + echo " @echo \"\$$(shell sha1sum <<< \"\$$@\" | sed \"s/ -//g\") = \\$$\$$(shell (\\$$\$$(CXX) -MM \\$$\$$(CXXFLAGS) \$$(patsubst ../\$$(DEPTH)obj/\$$(SRCDIR)%.d,../\$$(DEPTH)src/\$$(SRCDIR)%.cpp, \$$@) 2>/dev/null || echo \\\"\$$(patsubst ../\$$(DEPTH)obj/\$$(SRCDIR)%.d,../\$$(DEPTH)src/\$$(SRCDIR)%.cpp, \$$@) FORCE\\\") | sed \\\"s/.*://g;s/\\\\\\\\\\\\\\\\//g\\\")\" >> \$$@" >> $@ + echo " @echo \"\$$(patsubst %.d,%.o, \$$@): \\$$\$$(\$$(shell sha1sum <<< \"\$$@\" | sed \"s/ -//g\"))\" >> \$$@" >> $@ + echo " @echo \" \\$$\$$(CXX) \\$$\$$(CXXFLAGS) \\$$\$$< -o \$$(patsubst %.d,%.o, \$$@)\" >> \$$@" >> $@ + echo "" >> $@ + echo "%/makefile:" >> $@ + echo " mkdir -p \$$(dir \$$@)" >> $@ + echo " cp makefile \$$@" >> $@ + echo "" >> $@ + echo "%/: FORCE | %/makefile" >> $@ + echo " @accesstime=\`stat -c %Y \$$@\` && \\" >> $@ + echo " \$$(MAKE) -C \$$@ SRCDIR=\$$(SRCDIR)\$$(notdir \$$(patsubst %/, %, \$$@))/ DEPTH=\$$(DEPTH)../ && \\" >> $@ + echo " accesstime2=\`stat -c %Y \$$@\` && \\" >> $@ + echo " if [ "\$$\$$accesstime" -ne "\$$\$$accesstime2" ]; then \\" >> $@ + echo " touch .; \\" >> $@ + echo " fi" >> $@ + echo "" >> $@ + echo "FORCE:" >> $@ + echo "" >> $@ + echo "-include \$$(DEPENDENCIES)" >> $@ + +obj/: FORCE | obj/makefile + $(MAKE) -C $@ + +$(OBJECTS): obj/ + + +build: bin/$(EXECUTABLE) + + + +clean: + $(RM) -r *.o *.d bin lib obj test-bin test-obj + +FORCE: + diff --git a/adeterminize2/src/adeterminize.cpp b/adeterminize2/src/adeterminize.cpp new file mode 100644 index 0000000000..144431885a --- /dev/null +++ b/adeterminize2/src/adeterminize.cpp @@ -0,0 +1,124 @@ +#include <iostream> +#include <getopt.h> +#include <string.h> +#include <ctype.h> + +#include "automaton/Automaton.h" +#include "automaton/FSM/NFA.h" +#include "automaton/FSM/DFA.h" +//#include "automaton/PDA/PDA.h" +#include "factory/DataFactory.hpp" +#include "exception/AlibException.h" + +#include "determinize/nfa/NFADeterminizer.h" +//#include "idpda/IdpdaDeterminizer.h" +//#include "vpa/VpaDeterminizer.h" +//#include "vpa/VpaDeterminizer2.h" +//#include "vpa/VpaDeterminizer3.h" +//#include "rhdpda/RhdpdaDeterminizer.h" +//#include "rhdpda/RhdpdaDeterminizer2.h" +//#include "rhdpda/RhdpdaDeterminizer3.h" +//#include "rhdpda/RhdpdaDeterminizer4.h" + +#define VERSION "0.0.1" + +#define TYPE_FSM "fsm" +#define TYPE_IDPDA "idpda" +#define TYPE_VPA "vpa" +#define TYPE_RHDPDA "rhdpda" + +#define VERSION_1 "1" +#define VERSION_2 "2" +#define VERSION_3 "3" +#define VERSION_4 "4" + + +/** + * Prints help to standard output. + */ +void printHelp() { + std::cout << "adeterminize " << VERSION << std::endl; + std::cout << "Determinize various types of automaton." << std::endl; + std::cout << "Usage: adeterminize -t TYPE [SWITCH...]" << std::endl; + std::cout << "Possible arguments:" << std::endl; + std::cout << " -t, --type=TYPE \t Specifies type of input automaton, possible values are 'FSM' for final-state machine, 'IDPDA' for input-driven pushdown automaton, 'VPA' for visible pushdown automaton and 'RHDPDA' for real-time height deterministic pushdown automaton." << std::endl; + std::cout << " -a, --algorithm=VERSION_OF_ALGORITHM \t Specifies version of algorithm. This argument works only with VPA or RHDPDA type. VPA determinization has 3 versions (use numbers 1, 2 and 3) and RHDPDA determinization has 4 versions (use numbers 1, 2, 3 and 4). Default value is always number 1." << std::endl; + std::cout << " -h, --help \t Displays this help message." << std::endl; + std::cout << std::endl; +} + + +/** + * Converts given type to lower case. + * + * @param type + * @return type in lower case + */ +std::string getLowerCaseType(std::string type) { + std::string lowerCaseType; + for (const auto& c : type) { + lowerCaseType.append(1, tolower(c)); + } + return lowerCaseType; +} + + +int main(int argc, char** argv) { + static struct option longOptions[] = { + {"type", required_argument, NULL, 't'}, + {"algorithm", required_argument, NULL, 'a'}, + {"version", no_argument, NULL, 'v'}, + {"help", no_argument, NULL, 'h'}, + {0, 0, 0, 0} + }; + + int longIndex = 0; + int opt = 0; + std::string type; + std::string version = "1"; + + while ((opt = getopt_long(argc, argv, "t:a:vh", longOptions, &longIndex)) != -1) { + switch(opt) { + case 't': + type.assign(optarg, strlen(optarg)); + break; + case 'a': + version.assign(optarg, strlen(optarg)); + break; + case 'v': + case 'h': + default: + printHelp(); + return 0; + } + } + + type = getLowerCaseType(type); + + try { + if (type == TYPE_FSM) { + automaton::NFA nfa = alib::DataFactory::fromStdin<automaton::NFA>(); + automaton::DFA dfa = determinize::NFADeterminizer::determinize(nfa); + alib::DataFactory::toStdout(dfa); + return 0; + } + +/* } else if (type == TYPE_IDPDA) { + return new idpda::IdpdaDeterminizer((PDA*) automaton); + + } else if (type == TYPE_VPA) { + return getVpaDeterminizer(automaton, version); + + } else if (type == TYPE_RHDPDA) { + return getRhdpdaDeterminizer(automaton, version); + }*/ + + printHelp(); // should not be reached + return 2; + } catch (const exception::AlibException& e) { + alib::DataFactory::toStdout(e); + return 1; + } catch (...) { + return 127; + } +} diff --git a/alib2algo/src/determinize/nfa/NFADeterminizer.cpp b/alib2algo/src/determinize/nfa/NFADeterminizer.cpp new file mode 100644 index 0000000000..9af97b5284 --- /dev/null +++ b/alib2algo/src/determinize/nfa/NFADeterminizer.cpp @@ -0,0 +1,80 @@ +/* + * NFADeterminizer.cpp + * + * Created on: 16. 1. 2014 + * Author: Jan Vesely + */ + +#include "NFADeterminizer.h" +#include "label/LabelSetLabel.h" + +#include <deque> +#include <algorithm> + +namespace determinize { + + +automaton::State NFADeterminizer::createDFAState(const std::set<automaton::State>& nfaStates) { + std::set<label::Label> labelSet; + for(const automaton::State& state : nfaStates) { + labelSet.insert(state.getName()); + } + return automaton::State(label::Label(label::LabelSetLabel(labelSet))); +} + +std::set<automaton::State> NFADeterminizer::recreateNFAStates(const automaton::State& dfaState) { + std::set<automaton::State> states; + for (const auto& label : static_cast<const label::LabelSetLabel&>(dfaState.getName().getData()).getData()) { + states.insert(automaton::State(label)); + } + return states; +} + +automaton::DFA NFADeterminizer::determinize(const automaton::NFA& nfa) { + // 1, 4 + automaton::State initialState(createDFAState(nfa.getInitialStates())); + automaton::DFA res(initialState); + res.setInputSymbols(nfa.getInputAlphabet()); + + // 2 + std::deque<automaton::State> todo; + todo.push_back(initialState); + + do { + // 3a, c + automaton::State state = todo.front(); + todo.pop_front(); + + // 3b + for (const auto& input : nfa.getInputAlphabet()) { + std::set<automaton::State> targetNFAStates; + for(const auto& nfaState : recreateNFAStates(state)) { + auto iter = nfa.getTransitions().find(std::make_pair(nfaState, input)); + if(iter != nfa.getTransitions().end()) { + targetNFAStates.insert(iter->second.begin(), iter->second.end()); + } + } + automaton::State dfaState = createDFAState(targetNFAStates); + + // 4 + bool existed = !res.addState(dfaState); + + // 3b + res.addTransition(state, input, dfaState); + + if(!existed) todo.push_back(dfaState); + } + } while(!todo.empty()); + + // 5 + for (const auto& dfaState : res.getStates()) { + std::set<automaton::State> nfaStates = recreateNFAStates(dfaState); + if(std::any_of(nfaStates.begin(), nfaStates.end(), [&](const automaton::State& nfaState) { return nfa.getFinalStates().count(nfaState); })) { + res.addFinalState(dfaState); + } + } + + return res; +} + +} diff --git a/alib2algo/src/determinize/nfa/NFADeterminizer.h b/alib2algo/src/determinize/nfa/NFADeterminizer.h new file mode 100644 index 0000000000..b0a7697fa0 --- /dev/null +++ b/alib2algo/src/determinize/nfa/NFADeterminizer.h @@ -0,0 +1,56 @@ +/* + * NFADeterminizer.h + * + * Created on: 16. 1. 2014 + * Author: Jan Vesely + */ + +#ifndef NFA_DETERMINIZER_H_ +#define NFA_DETERMINIZER_H_ + +#include <set> + +#include "automaton/common/State.h" +#include "automaton/FSM/NFA.h" +#include "automaton/FSM/DFA.h" + +namespace determinize { + +/** + * Class for running determinization algorithm on fsm. + */ +class NFADeterminizer { + +private: + + /** + * Returns existing state from the resulting automaton, if there is one, or creates new one and adds it into + * the resulting deterministic automaton. + * + * @param originalStates set of states from nondeterministic fsm which represents state of deterministic fsm + * @return state of deterministic fsm + */ + static automaton::State createDFAState(const std::set<automaton::State>& nfaStates); + + /** + * Finds states from nondeterministic fsm to which at least one state from given set of states have transition + * with given input. + * + * @param fromStates set of states from nondeterministic fsm + * @param input symbol from input alphabet + * @return set of states from nondeterministic fsm + */ + static std::set<automaton::State> recreateNFAStates(const automaton::State& dfaState); +public: + + /** + * @param nfsm nondeterministic final-state machine given for determinization + * Runs determinization algorithm on nondeterministic fsm given in constructor. + */ + static automaton::DFA determinize(const automaton::NFA& nfa); + +}; + +} /* namespace determinize */ + +#endif /* NFA_DETERMINIZER_H_ */ -- GitLab