From a095e228ea523cc8fb2773d586f6c5d06cda146e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Sat, 22 Mar 2014 20:12:08 +0100 Subject: [PATCH] atrim.grammar: executable, unproductive nonterminal search --- atrim.grammar/.cproject | 125 ++++++++++++++++++ atrim.grammar/.project | 27 ++++ atrim.grammar/makefile | 20 +++ .../src/ContextFreeGrammarTransformations.cpp | 53 ++++++++ .../src/ContextFreeGrammarTransformations.h | 39 ++++++ atrim.grammar/src/atrim.grammar.cpp | 86 ++++++++++++ 6 files changed, 350 insertions(+) create mode 100644 atrim.grammar/.cproject create mode 100644 atrim.grammar/.project create mode 100644 atrim.grammar/makefile create mode 100644 atrim.grammar/src/ContextFreeGrammarTransformations.cpp create mode 100644 atrim.grammar/src/ContextFreeGrammarTransformations.h create mode 100644 atrim.grammar/src/atrim.grammar.cpp diff --git a/atrim.grammar/.cproject b/atrim.grammar/.cproject new file mode 100644 index 0000000000..e9326becf3 --- /dev/null +++ b/atrim.grammar/.cproject @@ -0,0 +1,125 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage"> + <storageModule moduleId="org.eclipse.cdt.core.settings"> + <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.1781936632"> + <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1781936632" moduleId="org.eclipse.cdt.core.settings" name="Debug"> + <externalSettings/> + <extensions> + <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/> + </extensions> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1781936632" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug"> + <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1781936632." name="/" resourcePath=""> + <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.856451800" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug"> + <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.282600984" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/> + <builder buildPath="${workspace_loc:/atrim.grammar}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1729375420" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/> + <tool id="cdt.managedbuild.tool.gnu.archiver.base.1831807084" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/> + <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.601242030" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug"> + <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1798038208" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/> + <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1142476857" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/> + <option id="gnu.cpp.compiler.option.include.paths.995676746" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath"> + <listOptionValue builtIn="false" value=""${workspace_loc:/alib/src}""/> + </option> + <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.222501119" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1484224413" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug"> + <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.322188981" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/> + <option id="gnu.c.compiler.exe.debug.option.debugging.level.805309112" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.949170540" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1963106341" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/> + <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.385413238" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug"> + <option id="gnu.cpp.link.option.paths.1129615670" superClass="gnu.cpp.link.option.paths" valueType="libPaths"> + <listOptionValue builtIn="false" value=""${workspace_loc:/alib/lib}""/> + <listOptionValue builtIn="false" value="/usr/include/libxml2"/> + </option> + <option id="gnu.cpp.link.option.libs.1835756621" superClass="gnu.cpp.link.option.libs" valueType="libs"> + <listOptionValue builtIn="false" value="alib"/> + <listOptionValue builtIn="false" value="xml2"/> + </option> + <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1876252850" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input"> + <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/> + <additionalInput kind="additionalinput" paths="$(LIBS)"/> + </inputType> + </tool> + <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.751703985" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug"> + <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1410855404" superClass="cdt.managedbuild.tool.gnu.assembler.input"/> + </tool> + </toolChain> + </folderInfo> + </configuration> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/> + </cconfiguration> + <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.762106393"> + <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.762106393" moduleId="org.eclipse.cdt.core.settings" name="Release"> + <externalSettings/> + <extensions> + <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> + <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/> + </extensions> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.762106393" name="Release" parent="cdt.managedbuild.config.gnu.exe.release"> + <folderInfo id="cdt.managedbuild.config.gnu.exe.release.762106393." name="/" resourcePath=""> + <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.104881597" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release"> + <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.2052430383" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/> + <builder buildPath="${workspace_loc:/atrim.grammar}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1392074195" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/> + <tool id="cdt.managedbuild.tool.gnu.archiver.base.560847626" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/> + <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1747884076" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release"> + <option id="gnu.cpp.compiler.exe.release.option.optimization.level.637150912" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/> + <option id="gnu.cpp.compiler.exe.release.option.debugging.level.1558043500" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1110483355" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.2009695907" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release"> + <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1736915700" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/> + <option id="gnu.c.compiler.exe.release.option.debugging.level.1545546989" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/> + <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1501890992" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/> + </tool> + <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1622201612" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/> + <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1624702050" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release"> + <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.960050635" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input"> + <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/> + <additionalInput kind="additionalinput" paths="$(LIBS)"/> + </inputType> + </tool> + <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1252221022" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release"> + <inputType id="cdt.managedbuild.tool.gnu.assembler.input.311814789" superClass="cdt.managedbuild.tool.gnu.assembler.input"/> + </tool> + </toolChain> + </folderInfo> + </configuration> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/> + </cconfiguration> + </storageModule> + <storageModule moduleId="cdtBuildSystem" version="4.0.0"> + <project id="atrim.grammar.cdt.managedbuild.target.gnu.exe.1122442323" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/> + </storageModule> + <storageModule moduleId="scannerConfiguration"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.762106393;cdt.managedbuild.config.gnu.exe.release.762106393.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1747884076;cdt.managedbuild.tool.gnu.cpp.compiler.input.1110483355"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1781936632;cdt.managedbuild.config.gnu.exe.debug.1781936632.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.601242030;cdt.managedbuild.tool.gnu.cpp.compiler.input.222501119"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.762106393;cdt.managedbuild.config.gnu.exe.release.762106393.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.2009695907;cdt.managedbuild.tool.gnu.c.compiler.input.1501890992"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/> + </scannerConfigBuildInfo> + <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1781936632;cdt.managedbuild.config.gnu.exe.debug.1781936632.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1484224413;cdt.managedbuild.tool.gnu.c.compiler.input.949170540"> + <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/> + </scannerConfigBuildInfo> + </storageModule> + <storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/> +</cproject> diff --git a/atrim.grammar/.project b/atrim.grammar/.project new file mode 100644 index 0000000000..2ffaa4340a --- /dev/null +++ b/atrim.grammar/.project @@ -0,0 +1,27 @@ +<?xml version="1.0" encoding="UTF-8"?> +<projectDescription> + <name>atrim.grammar</name> + <comment></comment> + <projects> + </projects> + <buildSpec> + <buildCommand> + <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name> + <triggers>clean,full,incremental,</triggers> + <arguments> + </arguments> + </buildCommand> + <buildCommand> + <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name> + <triggers>full,incremental,</triggers> + <arguments> + </arguments> + </buildCommand> + </buildSpec> + <natures> + <nature>org.eclipse.cdt.core.cnature</nature> + <nature>org.eclipse.cdt.core.ccnature</nature> + <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature> + <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature> + </natures> +</projectDescription> diff --git a/atrim.grammar/makefile b/atrim.grammar/makefile new file mode 100644 index 0000000000..b4de9e9c2d --- /dev/null +++ b/atrim.grammar/makefile @@ -0,0 +1,20 @@ +CC=g++ +EXECUTABLE=atrim.grammar +CCFLAGS= -std=c++11 -O2 -c -Wall -I../alib/src -I/usr/include/libxml2 +LDFLAGS= -L../alib/lib -lxml2 -lalib -Wl,-rpath,. + +SOURCES=$(shell find src/ -name *cpp) +OBJECTS=$(patsubst src/%.cpp, obj/%.o, $(SOURCES)) + +all: $(SOURCES) bin/$(EXECUTABLE) + +bin/$(EXECUTABLE): $(OBJECTS) + mkdir -p bin + $(CC) $(OBJECTS) -o $@ $(LDFLAGS) + +obj/%.o: src/%.cpp + mkdir -p $(dir $@) + $(CC) $(CCFLAGS) $< -o $@ + +clean: + $(RM) -r *.o *.d bin obj diff --git a/atrim.grammar/src/ContextFreeGrammarTransformations.cpp b/atrim.grammar/src/ContextFreeGrammarTransformations.cpp new file mode 100644 index 0000000000..209b9c637f --- /dev/null +++ b/atrim.grammar/src/ContextFreeGrammarTransformations.cpp @@ -0,0 +1,53 @@ +/* + * ContextFreeGrammarTransformations.cpp + * + * Created on: 22. 3. 2014 + * Author: tomas + */ + +#include "ContextFreeGrammarTransformations.h" + +using namespace std; +using namespace grammar; + +set<Symbol> ContextFreeGrammarTransformations::getProductiveNonTerminals( const ContextFreeGrammar & grammar ) +{ + // 1. + deque<set<Symbol>> Ni; + Ni.push_back( set<Symbol>( ) ); + + int i = 1; + + while( true ) + { + Ni.push_back( Ni.at( i - 1 ) ); + bool valid = true; + + for( const auto & rule : grammar.getRules( ) ) + { + for( const auto & symbol : rule.getRightSide( ) ) + { + if( ! isInSet( symbol, Ni.at( i - 1 ) ) && ! isInSet( symbol, grammar.getTerminalSymbols( ) ) ) + { + valid = false; + break; + } + } + + if( valid ) + Ni.at( i ).insert( rule.getLeftSide( ).front( ) ); + } + + if( Ni.at( i ) == Ni.at( i - 1 ) ) + break; + + i = i + 1; + } + + return Ni.at( i ); +} + +bool ContextFreeGrammarTransformations::isLanguageEmpty( const grammar::ContextFreeGrammar & grammar ) +{ + return isInSet( grammar.getStartSymbol( ), getProductiveNonTerminals( grammar ) ); +} diff --git a/atrim.grammar/src/ContextFreeGrammarTransformations.h b/atrim.grammar/src/ContextFreeGrammarTransformations.h new file mode 100644 index 0000000000..b74a15a3c9 --- /dev/null +++ b/atrim.grammar/src/ContextFreeGrammarTransformations.h @@ -0,0 +1,39 @@ +/* + * ContextFreeGrammarTransformations.h + * + * Created on: 22. 3. 2014 + * Author: tomas + */ + +#ifndef CONTEXTFREEGRAMMARTRANSFORMATIONS_H_ +#define CONTEXTFREEGRAMMARTRANSFORMATIONS_H_ + +#include <deque> +#include <set> + +#include <grammar/ContextFree/ContextFreeGrammar.h> + +#define isInSet(x,set) ( (set).find((x)) != (set).end( ) ) + +/** + * Implements algorithms from Melichar, chapter 3.3 + */ +class ContextFreeGrammarTransformations +{ +public: + /* + * Melichar 3.6 - decides whether L( grammar ) = \0 + * + * Severals steps implemented in method ContextFreeGrammarTransformations::getProductiveNonTerminals(); + * @see getProductiveNonTerminals + */ + static bool isLanguageEmpty( const grammar::ContextFreeGrammar & grammar ); + +private: + /** + * Implements steps 1 through 3 in Melichar 3.6 + */ + static std::set<alphabet::Symbol> getProductiveNonTerminals( const grammar::ContextFreeGrammar & grammar ); +}; + +#endif /* CONTEXTFREEGRAMMARTRANSFORMATIONS_H_ */ diff --git a/atrim.grammar/src/atrim.grammar.cpp b/atrim.grammar/src/atrim.grammar.cpp new file mode 100644 index 0000000000..e81e79cfc2 --- /dev/null +++ b/atrim.grammar/src/atrim.grammar.cpp @@ -0,0 +1,86 @@ +#include <iostream> +#include <getopt.h> + +#include <AlibException.h> +#include <GrammarFactory.h> +#include <grammar/GrammarParser.h> + +#include <sax/SaxInterface.h> +#include <sax/ParserException.h> + +#include "ContextFreeGrammarTransformations.h" + +using namespace std; +using namespace grammar; +using namespace alib; +using namespace sax; + +void help( void ) +{ + cout << "atrim.grammar 0.01" << endl; + cout << "Removes unreachable and unproductive rules from CFG grammar. Input is read from stdin." << endl; + cout << "Usage: atrim.grammar [--unproductive] [--unreachable]" << endl << endl; + cout << "If neither --unproductive nor --unreachable option is used, both dead and unreachable states are removed." << endl; + cout << endl; + cout << " --unproductive \t Removes unproductive nonterminals (Those which has no derivation X =>* w, w being string of terminals)." << endl; + cout << " --unreachable \t Removes unreachable nonterminals." << endl; + cout << " -h, --help \t shows this." << endl; + + cout << endl; +} + +int main(int argc, char* argv[]) +{ + int del_unreachable = 0, del_unproductive = 0; + + static struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"unproductive", no_argument, & del_unproductive, 1}, + {"unreachable", no_argument, & del_unreachable, 1}, + {0, 0, 0, 0} + }; + + int long_index = 0, opt = 0; + + while( ( opt = getopt_long( argc, argv, "h", long_options, & long_index ) ) != -1 ) + { + switch( opt ) + { + case 0: + break; + + case 'v': + case 'h': + default: + help( ); + return 0; + } + } + + list<Token> tokens; + if(optind == argc) + { + string input(istreambuf_iterator<char>(cin), (istreambuf_iterator<char>())); + SaxInterface::parseMemory(input, tokens); + } + else + { + SaxInterface::parseFile(argv[optind],tokens); + } + + + ContextFreeGrammar cfg = GrammarFactory::buildContextFreeGrammar( GrammarParser::parse( tokens ) ); + + // default behaviour, no switches + if( ! del_unreachable && ! del_unproductive ) + del_unreachable = del_unproductive = 1; + + if( del_unproductive ) + ; // cfg = ContextFreeGrammarTransformations::removeUnreachableSymbols( cfg ); + if( del_unreachable ) + ; // cfg = ContextFreeGrammarTransformations::removeUnproductiveSymbols( cfg ); + + cfg.toXML( cout ); + + return 0; +} -- GitLab