From 533ae4e676e1a3765d828003fd33224929c8e98b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <tomaspecka@gmail.com>
Date: Sat, 18 Jan 2014 22:05:24 +0100
Subject: [PATCH] aTrim: removes unreachable states from NFA

---
 atrim/.cproject         | 127 ++++++++++++++++++++++++++++++++++++++++
 atrim/.project          |  27 +++++++++
 atrim/src/TrimNFA.cpp   |  75 ++++++++++++++++++++++++
 atrim/src/TrimNFA.h     |  42 +++++++++++++
 atrim/src/atrim.fsm.cpp |  67 +++++++++++++++++++++
 5 files changed, 338 insertions(+)
 create mode 100644 atrim/.cproject
 create mode 100644 atrim/.project
 create mode 100644 atrim/src/TrimNFA.cpp
 create mode 100644 atrim/src/TrimNFA.h
 create mode 100644 atrim/src/atrim.fsm.cpp

diff --git a/atrim/.cproject b/atrim/.cproject
new file mode 100644
index 0000000000..6e6d8fdead
--- /dev/null
+++ b/atrim/.cproject
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.819905794">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.819905794" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.819905794" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.819905794." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.462046071" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1884562873" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+							<builder buildPath="${workspace_loc:/atrim}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.409450738" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.base.1496303476" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1722117447" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+								<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1839138361" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1380190232" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1888796955" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/usr/include/libxml2"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/alib/src}&quot;"/>
+								</option>
+								<option id="gnu.cpp.compiler.option.other.other.1520737263" superClass="gnu.cpp.compiler.option.other.other" value="-c -fmessage-length=0 -std=c++11" valueType="string"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.236265274" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.667742770" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1827815185" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.exe.debug.option.debugging.level.305154781" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2035410571" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.826768714" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1048652837" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+								<option id="gnu.cpp.link.option.paths.559553255" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/alib/Debug}&quot;"/>
+								</option>
+								<option id="gnu.cpp.link.option.libs.1545242354" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="alib"/>
+								</option>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1037307452" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1548705558" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1378762293" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.2103082938">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2103082938" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.2103082938" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+					<folderInfo id="cdt.managedbuild.config.gnu.exe.release.2103082938." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1535578457" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+							<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1117175553" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+							<builder buildPath="${workspace_loc:/atrim}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.122424405" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.base.1285561884" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1222750546" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+								<option id="gnu.cpp.compiler.exe.release.option.optimization.level.294064619" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1584535047" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.410432698" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1291884828" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+								<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1693265517" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.exe.release.option.debugging.level.313311830" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1411621768" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1077048210" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.220779305" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.74506423" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1285706984" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1757893385" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="atrim.cdt.managedbuild.target.gnu.exe.128051496" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.819905794;cdt.managedbuild.config.gnu.exe.debug.819905794.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1722117447;cdt.managedbuild.tool.gnu.cpp.compiler.input.236265274">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.819905794;cdt.managedbuild.config.gnu.exe.debug.819905794.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.667742770;cdt.managedbuild.tool.gnu.c.compiler.input.2035410571">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.2103082938;cdt.managedbuild.config.gnu.exe.release.2103082938.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1291884828;cdt.managedbuild.tool.gnu.c.compiler.input.1411621768">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.2103082938;cdt.managedbuild.config.gnu.exe.release.2103082938.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1222750546;cdt.managedbuild.tool.gnu.cpp.compiler.input.410432698">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
+	<storageModule moduleId="refreshScope"/>
+</cproject>
diff --git a/atrim/.project b/atrim/.project
new file mode 100644
index 0000000000..50b1ef0466
--- /dev/null
+++ b/atrim/.project
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>atrim</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+</projectDescription>
diff --git a/atrim/src/TrimNFA.cpp b/atrim/src/TrimNFA.cpp
new file mode 100644
index 0000000000..d8796faa0d
--- /dev/null
+++ b/atrim/src/TrimNFA.cpp
@@ -0,0 +1,75 @@
+/*
+ * TrimNFA.cpp
+ *
+ *  Created on: 18. 1. 2014
+ *      Author: tomas
+ */
+
+#include "TrimNFA.h"
+
+using namespace alib;
+using namespace automaton;
+using namespace std;
+using namespace trim;
+
+namespace trim
+{
+
+TrimNFA::TrimNFA( const FSM & fsm ) : m_origFSM( fsm )
+{
+    if( m_origFSM.getInitialStates( ).size( ) != 1 )
+        throw AlibException( "NFA must have exactly one initial state." );
+}
+
+const FSM TrimNFA::remove( void )
+{
+    set<State> reachableStates = findReachableStates( );
+
+    for( auto q : reachableStates )
+        m_FSM.addState( q );
+
+    for( auto symbol : m_origFSM.getInputAlphabet( ) )
+        m_FSM.addInputSymbol( symbol );
+
+    for( auto t : m_origFSM.getTransitions( ) )
+        if( isInSet( t.getFrom( ), reachableStates ) )
+            m_FSM.addTransition( t );
+
+
+    for( auto state : m_origFSM.getInitialStates( ) )
+        m_FSM.addInitialState( state );
+
+    set<State> intersect, F = m_origFSM.getFinalStates( );
+    set_intersection( F.begin(), F.end(), reachableStates.begin(), reachableStates.end(), inserter( intersect, intersect.begin() ) );
+    for( auto state : intersect )
+        m_FSM.addFinalState( state );
+
+    return m_FSM;
+}
+
+
+set<State> TrimNFA::findReachableStates( void ) const
+{
+    map<State, set<TransitionFSM>> transitionsBySourceState;
+
+    for( auto t : m_origFSM.getTransitions( ) )
+        transitionsBySourceState[ t.getFrom() ].insert( t );
+
+    set<State> qprev, qcurr;
+    qcurr.insert( * m_origFSM.getInitialStates().begin() );
+
+    do
+    {
+        qprev = qcurr;
+
+        for( auto p : qprev )
+            for( auto transition : transitionsBySourceState[ p ] )
+                qcurr.insert( transition.getTo() );
+    }
+    while( qcurr != qprev );
+
+    return qcurr;
+}
+
+
+} /* namespace trim */
diff --git a/atrim/src/TrimNFA.h b/atrim/src/TrimNFA.h
new file mode 100644
index 0000000000..6fc972033b
--- /dev/null
+++ b/atrim/src/TrimNFA.h
@@ -0,0 +1,42 @@
+/*
+ * TrimNFA.h
+ *
+ *  Created on: 18. 1. 2014
+ *      Author: tomas
+ */
+
+#ifndef TRIMNFA_H_
+#define TRIMNFA_H_
+
+#include <automaton/FSM/FSM.h>
+#include <AlibException.h>
+
+#include <algorithm>
+#include <iterator>
+#include <map>
+#include <set>
+
+namespace trim
+{
+
+#define isInSet(x,set) ( (set).find((x)) != (set).end())
+
+/**
+ * Well, this is just simple BFS/DFS, but we implement Melichar, 2.29 algorithm.
+ */
+class TrimNFA
+{
+public:
+    TrimNFA( const automaton::FSM & fsm );
+    const automaton::FSM remove( void );
+
+private:
+    std::set<automaton::State> findReachableStates( void ) const;
+
+    const automaton::FSM & m_origFSM;
+    automaton::FSM m_FSM;
+};
+
+} /* namespace trim */
+
+#endif /* TRIMNFA_H_ */
diff --git a/atrim/src/atrim.fsm.cpp b/atrim/src/atrim.fsm.cpp
new file mode 100644
index 0000000000..b52484255d
--- /dev/null
+++ b/atrim/src/atrim.fsm.cpp
@@ -0,0 +1,67 @@
+    #include <iostream>
+
+#include <AutomatonFactory.h>
+#include <AlibException.h>
+#include <automaton/AutomatonParser.h>
+
+#include <sax/SaxInterface.h>
+#include <sax/ParserException.h>
+
+#include "TrimNFA.h"
+
+using namespace std;
+using namespace automaton;
+using namespace alib;
+using namespace sax;
+
+using namespace trim;
+
+int main(int argc, char** argv) {
+    int fileParameterIndex = -1;
+
+    try {
+        if( argc > 1 )
+        {
+            for( int i = 1; i < argc; i++ )
+            {
+                if( string( "-h" ).compare( argv[i] ) == 0 )
+                {
+                    std::cout << "Removes unreachable states from NFA." << std::endl;
+                    std::cout << "Usage: atrim [automaton.xml]" << std::endl;
+                    return 1;
+                }
+                else
+                {
+                    if(fileParameterIndex == -1)
+                        fileParameterIndex = i;
+                    else
+                        throw AlibException("Only one file can be passed as parameter - " + string(argv[i]) + " " + string(argv[fileParameterIndex]));
+                }
+            }
+        }
+
+        std::list<Token> tokens;
+
+        if(fileParameterIndex != -1)
+        {
+            SaxInterface::parseFile(argv[fileParameterIndex],tokens);
+        }
+        else
+        {
+            string input(istreambuf_iterator<char>(cin), (istreambuf_iterator<char>()));
+            SaxInterface::parseMemory(input, tokens);
+        }
+
+        UnknownAutomaton automaton = AutomatonParser::parse( tokens );
+        FSM fsm = AutomatonFactory::buildFSM( automaton );
+
+        TrimNFA tr ( fsm );
+        tr.remove().toXML( cout );
+
+    } catch (AlibException& e) {
+        cout << e.what() << endl;
+        return -1;
+    }
+
+    cout.flush();
+}
-- 
GitLab