From 14d6dd7606f611231905daceb9e960c23dea94a9 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <jan.travnicek@.fit.cvut.cz>
Date: Mon, 18 Mar 2019 15:15:49 +0100
Subject: [PATCH] enable A8 regexp optimize

---
 .../src/regexp/simplify/RegExpOptimize.h      |  8 +-
 .../simplify/RegExpOptimizeUnboundedPart.hpp  | 82 +++++++++++--------
 .../regexp/simplify/RegExpOptimizeTest.cpp    | 12 +--
 .../test-src/tests/regexpOptimizeTest.cpp     | 36 ++++++++
 4 files changed, 92 insertions(+), 46 deletions(-)
 create mode 100644 alib2integrationtest/test-src/tests/regexpOptimizeTest.cpp

diff --git a/alib2algo/src/regexp/simplify/RegExpOptimize.h b/alib2algo/src/regexp/simplify/RegExpOptimize.h
index 10b969194c..414c8894c0 100644
--- a/alib2algo/src/regexp/simplify/RegExpOptimize.h
+++ b/alib2algo/src/regexp/simplify/RegExpOptimize.h
@@ -42,8 +42,8 @@ namespace simplify {
  *  - A5 : -> : x(yz) = (xy)z = xyz
  *  - A6 : -> : \ex = x\e = x
  *  - A7 : -> : \0x = x\0 = \0
- *  - A8 : -> : x( y + z ) = xy + xz
- *  - A9 : -> : ( x + y )z = xz + yz
+ *  - A8 : <- : x( y + z ) = xy + xz
+ *  - A9 : <- : ( x + y )z = xz + yz
  *  - A10: <- : x* = \e + x*x
  *  - A11: <- : x* = ( \e + x )*
  *  - V1 : -> : \0* = \e
@@ -123,8 +123,8 @@ private:
 		static bool A5( regexp::UnboundedRegExpConcatenation < SymbolType > & node );
 		static bool A6( regexp::UnboundedRegExpConcatenation < SymbolType > & node );
 		static bool A7( regexp::UnboundedRegExpConcatenation < SymbolType > & node );
-		static bool A8( regexp::UnboundedRegExpConcatenation < SymbolType > & node );
-		static bool A9( regexp::UnboundedRegExpConcatenation < SymbolType > & node );
+		static bool A8( regexp::UnboundedRegExpAlternation < SymbolType > & node );
+		static bool A9( regexp::UnboundedRegExpAlternation < SymbolType > & node );
 		static bool A10( regexp::UnboundedRegExpAlternation < SymbolType > & node );
 		static bool A11( regexp::UnboundedRegExpIteration < SymbolType > & node );
 		static bool V1( regexp::UnboundedRegExpIteration < SymbolType > & node );
diff --git a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp
index 9ee980b87f..95d2501e16 100644
--- a/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp
+++ b/alib2algo/src/regexp/simplify/RegExpOptimizeUnboundedPart.hpp
@@ -13,12 +13,14 @@ namespace simplify {
 
 template < class SymbolType >
 void RegExpOptimize::optimize( UnboundedRegExpAlternation < SymbolType > & alt ) {
-	while ( Unbounded < SymbolType >::A10 ( alt ) || Unbounded < SymbolType >::V2 ( alt ) || Unbounded < SymbolType >::V5 ( alt ) || Unbounded < SymbolType >::V6 ( alt ) );
+	while ( Unbounded < SymbolType >::A1( alt ) || Unbounded < SymbolType >::A2( alt ) || Unbounded < SymbolType >::A3( alt ) || Unbounded < SymbolType >::A4( alt ) );
+
+	while ( Unbounded < SymbolType >::A8( alt ) || Unbounded < SymbolType >::A9( alt ) || Unbounded < SymbolType >::A10 ( alt ) || Unbounded < SymbolType >::V2 ( alt ) || Unbounded < SymbolType >::V5 ( alt ) || Unbounded < SymbolType >::V6 ( alt ) );
 
 	for( size_t i = 0; i < alt.getChildren ( ).size ( ); i++ )
 		alt.setChild ( std::move ( alt.getChild ( i ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( true ), i );
 
-	while ( Unbounded < SymbolType >::A1( alt ) || Unbounded < SymbolType >::A2( alt ) || Unbounded < SymbolType >::A3( alt ) || Unbounded < SymbolType >::A4( alt ) || Unbounded < SymbolType >::A10( alt ) || Unbounded < SymbolType >::V2( alt ) || Unbounded < SymbolType >::V5( alt ) || Unbounded < SymbolType >::V6( alt ) || Unbounded < SymbolType >::X1( alt ) );
+	while ( Unbounded < SymbolType >::A1( alt ) || Unbounded < SymbolType >::A2( alt ) || Unbounded < SymbolType >::A3( alt ) || Unbounded < SymbolType >::A4( alt ) || Unbounded < SymbolType >::A8( alt ) || Unbounded < SymbolType >::A9( alt ) || Unbounded < SymbolType >::A10( alt ) || Unbounded < SymbolType >::V2( alt ) || Unbounded < SymbolType >::V5( alt ) || Unbounded < SymbolType >::V6( alt ) || Unbounded < SymbolType >::X1( alt ) );
 
 	for( size_t i = 0; i < alt.getChildren ( ).size ( ); i++ )
 		alt.setChild ( std::move ( alt.getChild ( i ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( false ), i );
@@ -26,12 +28,14 @@ void RegExpOptimize::optimize( UnboundedRegExpAlternation < SymbolType > & alt )
 
 template < class SymbolType >
 void RegExpOptimize::optimize( UnboundedRegExpConcatenation < SymbolType > & concat ) {
+	while ( Unbounded < SymbolType >::A5( concat ) || Unbounded < SymbolType >::A6( concat ) || Unbounded < SymbolType >::A7( concat ) );
+
 	while ( Unbounded < SymbolType >::V8 ( concat ) || Unbounded < SymbolType >::V8R ( concat ) || Unbounded < SymbolType >::V9( concat ) );
 
 	for( size_t i = 0; i < concat.getChildren ( ).size ( ); i++ )
 		concat.setChild ( std::move ( concat.getChild ( i ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( true ), i );
 
-	while ( Unbounded < SymbolType >::A5( concat ) || Unbounded < SymbolType >::A6( concat ) || Unbounded < SymbolType >::A7( concat ) || Unbounded < SymbolType >::A8( concat ) || Unbounded < SymbolType >::A9( concat ) || Unbounded < SymbolType >::V8( concat ) || Unbounded < SymbolType >::V8R( concat ) || Unbounded < SymbolType >::V9( concat ) );
+	while ( Unbounded < SymbolType >::A5( concat ) || Unbounded < SymbolType >::A6( concat ) || Unbounded < SymbolType >::A7( concat ) || Unbounded < SymbolType >::V8( concat ) || Unbounded < SymbolType >::V8R( concat ) || Unbounded < SymbolType >::V9( concat ) );
 
 	for( size_t i = 0; i < concat.getChildren ( ).size ( ); i++ )
 		concat.setChild ( std::move ( concat.getChild ( i ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( false ), i );
@@ -266,45 +270,51 @@ bool RegExpOptimize::Unbounded < SymbolType >::A7( UnboundedRegExpConcatenation
   * @return bool true if optimization applied else false
   */
 template < class SymbolType >
-bool RegExpOptimize::Unbounded < SymbolType >::A8( UnboundedRegExpConcatenation < SymbolType > & /* node */) {
-/*
-	bool optimized = false;
+bool RegExpOptimize::Unbounded < SymbolType >::A8( UnboundedRegExpAlternation < SymbolType > & node ) {
+	std::map < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > >, ext::vector < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > > > data;
 
-	for( auto it = std::next( node->elements.begin( ) ); it != node->elements.end( ); )
-	{
-		UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast<UnboundedRegExpAlternation < SymbolType >*>( * it );
-		if( ! alt )
-		{
-			it ++;
-			continue;
+	for ( UnboundedRegExpElement < SymbolType > & element : node ) {
+		UnboundedRegExpConcatenation < SymbolType > * childConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & element );
+		if ( childConcat ) {
+			data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( childConcat->getChild ( 0 ) ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) );
+		} else {
+			data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) );
 		}
+	}
 
-		// take everything to the left and copy it as prefix of every element in alternation.
-		UnboundedRegExpConcatenation < SymbolType > leftPart;
-		leftPart.elements.insert( leftPart.elements.end( ), node->elements.begin( ), it );
-
-		for( auto altIt = alt->elements.begin( ); altIt != alt->elements.end( ); altIt ++ )
-		{
-			UnboundedRegExpConcatenation < SymbolType > * altElem = new UnboundedRegExpConcatenation < SymbolType >( );
-			altElem->elements.push_back( leftPart );
-			altElem->elements.push_back( * altIt );
+	if ( data.size ( ) == node.getChildren ( ).size ( ) )
+		return false;
 
-			* altIt = altElem;
+	UnboundedRegExpAlternation < SymbolType > res;
+	for ( std::pair < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > >, ext::vector < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > > > && entry : ext::make_mover ( data ) ) {
+		if ( entry.second.size ( ) == 1 ) {
+			res.appendElement ( std::move ( entry.second.front ( ).get ( ) ) );
+		} else {
+			UnboundedRegExpConcatenation < SymbolType > innerConcat;
+			innerConcat.appendElement ( std::move ( entry.first.get ( ) ) );
+			UnboundedRegExpAlternation < SymbolType > innerAlt;
+			for ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > & innerEntry : entry.second ) {
+				UnboundedRegExpElement < SymbolType > & innerEntryElement = innerEntry.get ( );
+				UnboundedRegExpConcatenation < SymbolType > * innerEntryConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & innerEntryElement );
+				if ( innerEntryConcat ) {
+					if ( innerEntryConcat->getElements ( ).size ( ) == 1 ) {
+						innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
+					} else {
+						innerEntryConcat->erase ( innerEntryConcat->begin ( ) );
+						innerAlt.appendElement ( std::move ( * innerEntryConcat ) );
+					}
+				} else {
+					innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
+				}
+			}
+			innerConcat.appendElement ( std::move ( innerAlt ) );
+			res.appendElement ( Unbounded < SymbolType >::visit ( std::move ( innerConcat ), true ) );
 		}
-
-		UnboundedRegExpElement < SymbolType > * optIt = optimize( * it );
-		delete *it;
-		*it = optIt;
-
-		it = node->elements.erase( node->elements.begin( ), it );
-
-		optimized = true;
-		it ++;
 	}
 
-	return optimized;
-*/
-	return false; //TODO
+	node = res;
+
+	return false;
 }
 
 /**
@@ -313,7 +323,7 @@ bool RegExpOptimize::Unbounded < SymbolType >::A8( UnboundedRegExpConcatenation
   * @return bool true if optimization applied else false
   */
 template < class SymbolType >
-bool RegExpOptimize::Unbounded < SymbolType >::A9( UnboundedRegExpConcatenation < SymbolType > & /* node */) {
+bool RegExpOptimize::Unbounded < SymbolType >::A9( UnboundedRegExpAlternation < SymbolType > & /* node */) {
 /*
 	bool optimized = false;
 
diff --git a/alib2algo/test-src/regexp/simplify/RegExpOptimizeTest.cpp b/alib2algo/test-src/regexp/simplify/RegExpOptimizeTest.cpp
index 747477283e..cff6baacc3 100644
--- a/alib2algo/test-src/regexp/simplify/RegExpOptimizeTest.cpp
+++ b/alib2algo/test-src/regexp/simplify/RegExpOptimizeTest.cpp
@@ -13,7 +13,7 @@ TEST_CASE ( "RegExp Optimize", "[unit][algo][regexp][simplify]" ) {
 	SECTION ( "Test simple axioms" ) {
 		auto testcase = GENERATE (
 				std::make_pair ("a+a", "a" ),
-				std::make_pair ( "(a+a)b + (#0 b + (#0 a + (#0 b + a)))", "a + a b" ),
+				std::make_pair ( "(a+a)b + (#0 b + (#0 a + (#0 b + a)))", "a ( b + #E )" ),
 				//std::make_pair ( "a z + a b* b z", "a b* z" ),
 				std::make_pair ( "a***", "a*" ),
 				std::make_pair ( "(a*+b*)*", "(a+b)*" ),
@@ -95,14 +95,14 @@ TEST_CASE ( "RegExp Optimize", "[unit][algo][regexp][simplify]" ) {
 
 			regexp::UnboundedRegExp < char > res = regexp::simplify::RegExpOptimize::optimize ( regexp );
 
+			regexp::UnboundedRegExpAlternation < char > alt6;
+			alt6.appendElement ( b );
+			alt6.appendElement ( regexp::UnboundedRegExpEpsilon < char > { } );
 			regexp::UnboundedRegExpConcatenation < char > con6;
 			con6.appendElement ( a );
-			con6.appendElement ( b );
-			regexp::UnboundedRegExpAlternation < char > alt6;
-			alt6.appendElement ( a );
-			alt6.appendElement ( con6 );
+			con6.appendElement ( alt6 );
 
-			regexp::UnboundedRegExp < char > regexpRes( regexp::UnboundedRegExpStructure < char > { alt6 } );
+			regexp::UnboundedRegExp < char > regexpRes( regexp::UnboundedRegExpStructure < char > { con6 } );
 
 			CAPTURE ( regexp, res, regexpRes );
 			CHECK ( regexpRes == res );
diff --git a/alib2integrationtest/test-src/tests/regexpOptimizeTest.cpp b/alib2integrationtest/test-src/tests/regexpOptimizeTest.cpp
new file mode 100644
index 0000000000..28b250f97a
--- /dev/null
+++ b/alib2integrationtest/test-src/tests/regexpOptimizeTest.cpp
@@ -0,0 +1,36 @@
+#include <catch2/catch.hpp>
+#include <alib/vector>
+
+#include "testing/TimeoutAqlTest.hpp"
+#include "testing/TestFiles.hpp"
+
+const unsigned LEAF_NODES   = 10;
+const unsigned HEIGHT = 6;
+const double   ALPHABET_SIZE  = 2;
+const size_t   ITERATIONS = 100;
+
+const std::string qGenRE ( ) {
+	std::ostringstream oss;
+	oss << "execute regexp::generate::RandomRegExpFactory ";
+	oss << "(size_t)" << rand ( ) % LEAF_NODES + 1 << " ";
+	oss << "(size_t)" << rand ( ) % HEIGHT + 1 << " ";
+	oss << "(size_t)" << ALPHABET_SIZE << " ";
+	oss << "(bool)false ";
+	return oss.str ( );
+}
+
+TEST_CASE ( "RE optimize test", "[integration]" ) {
+	static const std::string qMinimize ( "automaton::simplify::efficient::EpsilonRemoverIncoming - | automaton::determinize::Determinize - | "
+			"automaton::simplify::Trim - | automaton::simplify::Minimize - | automaton::simplify::Normalize -" );
+
+	SECTION ( "Random tests" ) {
+		for ( size_t i = 0; i < ITERATIONS; i++ ) {
+			ext::vector < std::string > qs = {
+				qGenRE ( ) + " > $gen",
+				"execute string::Compose $gen > /tmp/file",
+				"quit compare::AutomatonCompare <( $gen | regexp::convert::ToAutomaton - | " + qMinimize + " )" + " <( $gen | regexp::simplify::RegExpOptimize - | regexp::convert::ToAutomaton - | " + qMinimize + ")"
+			};
+			TimeoutAqlTest ( 10s, qs );
+		}
+	}
+}
-- 
GitLab