From 8da9d69fdebeb48c6225a3e6c1cdd83d78feb036 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Sun, 7 Sep 2014 19:03:53 +0200 Subject: [PATCH] aoptimize.regexp -> atrim2 --- alib2algo/src/regexp/RegExpOptimize.cpp | 10 ++++++ alib2algo/src/regexp/RegExpOptimize.h | 6 ++++ alib2data/src/regexp/formal/FormalRegExp.h | 2 +- .../src/regexp/unbounded/UnboundedRegExp.h | 2 +- aoptimize.regexp/makefile | 20 ----------- aoptimize.regexp/src/aoptimize.regexp.cpp | 31 ----------------- atrim2/src/atrim.cpp | 34 +++++++++++++++++-- 7 files changed, 49 insertions(+), 56 deletions(-) delete mode 100644 aoptimize.regexp/makefile delete mode 100644 aoptimize.regexp/src/aoptimize.regexp.cpp diff --git a/alib2algo/src/regexp/RegExpOptimize.cpp b/alib2algo/src/regexp/RegExpOptimize.cpp index e4c84ffc63..03574a2935 100644 --- a/alib2algo/src/regexp/RegExpOptimize.cpp +++ b/alib2algo/src/regexp/RegExpOptimize.cpp @@ -13,6 +13,16 @@ namespace regexp { +FormalRegExp RegExpOptimize::optimize( FormalRegExp const & regexp ) +{ + throw exception::AlibException("Unimplemented"); +} + +void RegExpOptimize::optimize( FormalRegExpElement & element ) +{ + throw exception::AlibException("Unimplemented"); +} + UnboundedRegExp RegExpOptimize::optimize( UnboundedRegExp const & regexp ) { UnboundedRegExpElement* optimized = optimize( & regexp.getRegExp( ) ); diff --git a/alib2algo/src/regexp/RegExpOptimize.h b/alib2algo/src/regexp/RegExpOptimize.h index 0e97409eee..8f6ff40ee2 100644 --- a/alib2algo/src/regexp/RegExpOptimize.h +++ b/alib2algo/src/regexp/RegExpOptimize.h @@ -15,6 +15,9 @@ #include <regexp/unbounded/UnboundedRegExp.h> #include <regexp/unbounded/UnboundedRegExpElements.h> +#include <regexp/formal/FormalRegExp.h> +#include <regexp/formal/FormalRegExpElements.h> + #include <exception/AlibException.h> namespace regexp { @@ -59,6 +62,9 @@ class RegExpOptimize public: regexp::UnboundedRegExp optimize( const regexp::UnboundedRegExp & regexp ); void optimize( regexp::UnboundedRegExpElement & regexp ); + + regexp::FormalRegExp optimize( const regexp::FormalRegExp & regexp ); + void optimize( regexp::FormalRegExpElement & regexp ); private: regexp::UnboundedRegExpElement * optimize( regexp::UnboundedRegExpElement const * const & node ); regexp::UnboundedRegExpElement * optimize( regexp::UnboundedRegExpAlternation const * const & node ); diff --git a/alib2data/src/regexp/formal/FormalRegExp.h b/alib2data/src/regexp/formal/FormalRegExp.h index 56e2edbf96..3c0b587b4c 100644 --- a/alib2data/src/regexp/formal/FormalRegExp.h +++ b/alib2data/src/regexp/formal/FormalRegExp.h @@ -30,6 +30,7 @@ protected: std::set<alphabet::Symbol> alphabet; +public: /** * @copydoc FormalRegExpElement::clone() const */ @@ -40,7 +41,6 @@ protected: */ virtual RegExpBase* plunder() &&; -public: FormalRegExp(); explicit FormalRegExp(const UnboundedRegExp& other); FormalRegExp(const std::set<alphabet::Symbol>& alphabet, const FormalRegExpElement& regExp); diff --git a/alib2data/src/regexp/unbounded/UnboundedRegExp.h b/alib2data/src/regexp/unbounded/UnboundedRegExp.h index 9d2d046213..b89c862693 100644 --- a/alib2data/src/regexp/unbounded/UnboundedRegExp.h +++ b/alib2data/src/regexp/unbounded/UnboundedRegExp.h @@ -30,6 +30,7 @@ protected: std::set<alphabet::Symbol> alphabet; +public: /** * @copydoc UnboundedRegExpElement::clone() const */ @@ -40,7 +41,6 @@ protected: */ virtual RegExpBase* plunder() &&; -public: UnboundedRegExp(); explicit UnboundedRegExp(const FormalRegExp& other); UnboundedRegExp(const std::set<alphabet::Symbol>& alphabet, const UnboundedRegExpElement& regExp); diff --git a/aoptimize.regexp/makefile b/aoptimize.regexp/makefile deleted file mode 100644 index 228b24923d..0000000000 --- a/aoptimize.regexp/makefile +++ /dev/null @@ -1,20 +0,0 @@ -CC=g++ -EXECUTABLE=aoptimize.regexp -CCFLAGS= -std=c++11 -O2 -c -Wall -I../alib/src -I../libaregexptree/src -I/usr/include/libxml2 -LDFLAGS= -L../alib/lib -L../libaregexptree/lib -lxml2 -laregexptree -lalib -Wl,-rpath,. - -SOURCES=$(shell find src/ -name *cpp) -OBJECTS=$(patsubst src/%.cpp, obj/%.o, $(SOURCES)) - -all: $(SOURCES) bin/$(EXECUTABLE) - -bin/$(EXECUTABLE): $(OBJECTS) - mkdir -p bin - $(CC) $(OBJECTS) -o $@ $(LDFLAGS) - -obj/%.o: src/%.cpp - mkdir -p $(dir $@) - $(CC) $(CCFLAGS) $< -o $@ - -clean: - $(RM) -r *.o *.d bin obj diff --git a/aoptimize.regexp/src/aoptimize.regexp.cpp b/aoptimize.regexp/src/aoptimize.regexp.cpp deleted file mode 100644 index 6398348e5e..0000000000 --- a/aoptimize.regexp/src/aoptimize.regexp.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include <iostream> -#include "regexp/RegExpParser.h" -#include "AlibException.h" -#include "sax/SaxInterface.h" - -#include "RegExpOptimize.h" - - -using namespace std; -using namespace regexp; -using namespace sax; -using namespace alib; - -int main(int argc, char** argv) { - list<Token> tokens; - - if (argc > 1) { - SaxInterface::parseFile(argv[1], tokens); - } else { - string input(istreambuf_iterator<char>(cin), - (istreambuf_iterator<char>())); - SaxInterface::parseMemory(input, tokens); - } - - RegExp regexp = RegExpParser::parse(tokens); - RegExpOptimize opt; - regexp = opt.optimize(regexp); - regexp.toXML(cout); - - return 0; -} diff --git a/atrim2/src/atrim.cpp b/atrim2/src/atrim.cpp index cb650b45bf..59561d3fc7 100644 --- a/atrim2/src/atrim.cpp +++ b/atrim2/src/atrim.cpp @@ -6,15 +6,17 @@ #include "trim/grammar/TrimCFG.h" #include "trim/automaton/TrimFSM.h" +#include "regexp/RegExpOptimize.h" void help( void ) { std::cout << "atrim 0.01" << std::endl; - std::cout << "Removes unreachable and useless states from FSM, productive and unreachable nonterminals from CFG." << std::endl; + std::cout << "Removes unreachable and useless states from FSM, productive and unreachable nonterminals from CFG. Simplifies representation of RE" << std::endl; std::cout << "Usage: atrim [-u] [-r]" << std::endl << std::endl; std::cout << "If neither --useless nor --unreachable option is used, both useless and unreachable states (or symbols) are removed." << std::endl; std::cout << std::endl; - std::cout << " -u, --useless \t Removes useless states only (works with FSM only)." << std::endl; - std::cout << " -r, --unreachable \t Removes unreachable states only. (works with FSM or CFG)." << std::endl; + std::cout << " -u, --useless \t Removes useless states. (works with FSM only)" << std::endl; + std::cout << " -r, --unreachable \t Removes unreachable states. (works with FSM or CFG)" << std::endl; + std::cout << " -s, --simplify \t Simplifies representation. (works with RE)" << std::endl; std::cout << " -h, --help \t shows this." << std::endl; std::cout << std::endl; } @@ -102,6 +104,32 @@ automaton::Automaton trimAutomaton(const automaton::Automaton& g, bool del_unrea throw exception::AlibException("Unsupported automaton type"); } +template<typename T> +regexp::RegExpBase* dynamicOptimize(const regexp::RegExp& r) { + const T* rp = dynamic_cast<const T*>(&r.getData()); + + if(rp) { + T res(*rp); + regexp::RegExpOptimize opt; + res = opt.optimize( res ); + return std::move(res).plunder(); + } else { + return NULL; + } +} + +regexp::RegExp optimizeRegExp(const regexp::RegExp& r) { + regexp::RegExpBase* res = NULL; + + res = dynamicOptimize<regexp::UnboundedRegExp>(r); + if(res) return regexp::RegExp(*res); + + res = dynamicOptimize<regexp::FormalRegExp>(r); + if(res) return regexp::RegExp(*res); + + throw exception::AlibException("Unsupported automaton type"); +} + int main(int argc, char* argv[]) { bool del_useless = false, del_unreachables = false; -- GitLab