From 06e10eaa598b32f8aefa0f08bce29910b36a8d8a Mon Sep 17 00:00:00 2001
From: Tomas Pecka <peckato1@fit.cvut.cz>
Date: Wed, 8 Dec 2021 12:17:32 +0100
Subject: [PATCH] algo: check if language of regexp is exactly epsilon

Introduce a new algorithm, regexp::properties::LanguageIsEpsilon to
check whether a language of a regexp is exactly equal to a language of
epsilon regexp.
This will be later useful in checking whether the language of a regexp
is infinite.
---
 .../regexp/properties/LanguageIsEpsilon.cpp   |  11 +
 .../src/regexp/properties/LanguageIsEpsilon.h | 191 ++++++++++++++++++
 .../properties/RegExpPropertiesTest.cpp       |  46 +++++
 3 files changed, 248 insertions(+)
 create mode 100644 alib2algo/src/regexp/properties/LanguageIsEpsilon.cpp
 create mode 100644 alib2algo/src/regexp/properties/LanguageIsEpsilon.h

diff --git a/alib2algo/src/regexp/properties/LanguageIsEpsilon.cpp b/alib2algo/src/regexp/properties/LanguageIsEpsilon.cpp
new file mode 100644
index 0000000000..20e7775a4b
--- /dev/null
+++ b/alib2algo/src/regexp/properties/LanguageIsEpsilon.cpp
@@ -0,0 +1,11 @@
+#include "LanguageIsEpsilon.h"
+#include <regexp/formal/FormalRegExpElements.h>
+#include <regexp/unbounded/UnboundedRegExpElements.h>
+#include <registration/AlgoRegistration.hpp>
+
+namespace {
+
+auto FormalRegExp = registration::AbstractRegister < regexp::properties::LanguageIsEpsilon, bool, const regexp::FormalRegExp < > & > ( regexp::properties::LanguageIsEpsilon::languageIsEpsilon );
+auto UnboundedRegExp = registration::AbstractRegister < regexp::properties::LanguageIsEpsilon, bool, const regexp::UnboundedRegExp < > & > ( regexp::properties::LanguageIsEpsilon::languageIsEpsilon );
+
+} /* namespace */
diff --git a/alib2algo/src/regexp/properties/LanguageIsEpsilon.h b/alib2algo/src/regexp/properties/LanguageIsEpsilon.h
new file mode 100644
index 0000000000..35125ece5c
--- /dev/null
+++ b/alib2algo/src/regexp/properties/LanguageIsEpsilon.h
@@ -0,0 +1,191 @@
+#pragma once
+
+#include <algorithm>
+#include <regexp/formal/FormalRegExp.h>
+#include <regexp/formal/FormalRegExpElements.h>
+#include <regexp/properties/LanguageIsEmpty.h>
+#include <regexp/unbounded/UnboundedRegExp.h>
+#include <regexp/unbounded/UnboundedRegExpElements.h>
+
+namespace regexp::properties {
+
+/**
+ * Determines whether regular expression (or its subtree) describes a language that is equal to epsilon (\eps = h(regexp)).
+ *
+ */
+class LanguageIsEpsilon {
+public:
+	/**
+	 * @brief Determines whether regular expression (or its subtree) describes a language that is equal to epsilon (\eps = h(regexp)).
+	 * @tparam SymbolType the type of symbol in the tested regular expression
+	 * @param regexp the regexp to test
+	 * @return true of the language described by the regular expression is epsilon
+	 */
+	template < class SymbolType >
+	static bool languageIsEpsilon(const regexp::FormalRegExpElement < SymbolType > & regexp);
+
+	/**
+	 * @overload
+	 */
+	template < class SymbolType >
+	static bool languageIsEpsilon(const regexp::FormalRegExpStructure < SymbolType > & regexp);
+
+	/**
+	 * @overload
+	 */
+	template < class SymbolType >
+	static bool languageIsEpsilon(const regexp::FormalRegExp < SymbolType > & regexp);
+
+	/**
+	 * @overload
+	 */
+	template < class SymbolType >
+	static bool languageIsEpsilon(const regexp::UnboundedRegExpElement < SymbolType > & regexp);
+
+	/**
+	 * @overload
+	 */
+	template < class SymbolType >
+	static bool languageIsEpsilon(const regexp::UnboundedRegExpStructure < SymbolType > & regexp);
+
+	/**
+	 * @overload
+	 */
+	template < class SymbolType >
+	static bool languageIsEpsilon(const regexp::UnboundedRegExp < SymbolType > & regexp);
+
+	template < class SymbolType >
+	class Unbounded {
+	public:
+		static bool visit(const regexp::UnboundedRegExpAlternation < SymbolType > & alternation);
+		static bool visit(const regexp::UnboundedRegExpConcatenation < SymbolType > & concatenation);
+		static bool visit(const regexp::UnboundedRegExpIteration < SymbolType > & iteration);
+		static bool visit(const regexp::UnboundedRegExpSymbol < SymbolType > & symbol);
+		static bool visit(const regexp::UnboundedRegExpEmpty < SymbolType > & empty);
+		static bool visit(const regexp::UnboundedRegExpEpsilon < SymbolType > & epsilon);
+	};
+
+	template < class SymbolType >
+	class Formal {
+	public:
+		static bool visit(const regexp::FormalRegExpAlternation < SymbolType > & alternation);
+		static bool visit(const regexp::FormalRegExpConcatenation < SymbolType > & concatenation);
+		static bool visit(const regexp::FormalRegExpIteration < SymbolType > & iteration);
+		static bool visit(const regexp::FormalRegExpSymbol < SymbolType > & symbol);
+		static bool visit(const regexp::FormalRegExpEmpty < SymbolType > & empty);
+		static bool visit(const regexp::FormalRegExpEpsilon < SymbolType > & epsilon);
+	};
+};
+
+// ----------------------------------------------------------------------------
+
+template < class SymbolType >
+bool LanguageIsEpsilon::languageIsEpsilon(const regexp::FormalRegExpElement < SymbolType > & regexp) {
+	return regexp.template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>();
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::languageIsEpsilon(const regexp::FormalRegExpStructure < SymbolType > & regexp) {
+	return languageIsEpsilon(regexp.getStructure());
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::languageIsEpsilon(const regexp::FormalRegExp < SymbolType > & regexp) {
+	return languageIsEpsilon(regexp.getRegExp());
+}
+
+// ----------------------------------------------------------------------------
+
+template < class SymbolType >
+bool LanguageIsEpsilon::languageIsEpsilon(const regexp::UnboundedRegExpElement < SymbolType > & regexp) {
+	return regexp.template accept<bool, LanguageIsEpsilon::Unbounded < SymbolType >>();
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::languageIsEpsilon(const regexp::UnboundedRegExpStructure < SymbolType > & regexp) {
+	return languageIsEpsilon(regexp.getStructure());
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::languageIsEpsilon(const regexp::UnboundedRegExp < SymbolType > & regexp) {
+	return languageIsEpsilon(regexp.getRegExp());
+}
+
+// ---------------------------------------------------------------------------
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpAlternation < SymbolType > & alternation) {
+	bool existsEpsilon = false;
+	return std::all_of ( alternation.getElements ( ).begin ( ), alternation.getElements ( ).end ( ), [ & ] ( const auto & e ) {
+			bool isEpsilon = e.template accept < bool, LanguageIsEpsilon::Unbounded < SymbolType > > ( );
+			existsEpsilon = existsEpsilon || isEpsilon;
+			return isEpsilon || LanguageIsEmpty::isLanguageEmpty ( e );
+		} ) && existsEpsilon;
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpConcatenation < SymbolType > & concatenation) {
+	return std::all_of ( concatenation.getElements ( ).begin ( ), concatenation.getElements ( ).end ( ), [ ] ( const UnboundedRegExpElement < SymbolType > & element ) {
+		return element.template accept < bool, LanguageIsEpsilon::Unbounded < SymbolType > > ( );
+	} );
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpIteration < SymbolType > & iteration) {
+	return LanguageIsEmpty::isLanguageEmpty ( iteration.getElement ( ) ) || iteration.getElement().template accept<bool, LanguageIsEpsilon::Unbounded < SymbolType > >();
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpSymbol < SymbolType > &) {
+	return false;
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpEpsilon < SymbolType > &) {
+	return true;
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpEmpty < SymbolType > &) {
+	return false;
+}
+
+// ----------------------------------------------------------------------------
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpAlternation < SymbolType > & alternation) {
+	// ALL {eps,empty} + {eps,empty} but not ALL empty
+	auto leftEmpty = LanguageIsEmpty::isLanguageEmpty ( alternation.getLeftElement ( ) );
+	auto rightEmpty = LanguageIsEmpty::isLanguageEmpty ( alternation.getLeftElement ( ) );
+	auto leftEps = alternation.getLeftElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>();
+	auto rightEps = alternation.getRightElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>();
+
+	return ! ( leftEmpty && rightEmpty ) && ( leftEmpty || leftEps ) && ( rightEmpty || rightEps );
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpConcatenation < SymbolType > & concatenation) {
+	return concatenation.getLeftElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>() && concatenation.getRightElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>();
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpIteration < SymbolType > & iteration) {
+	return LanguageIsEmpty::isLanguageEmpty ( iteration.getElement ( ) ) || iteration.getElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType > >();
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpSymbol < SymbolType > &) {
+	return false;
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpEmpty < SymbolType > &) {
+	return false;
+}
+
+template < class SymbolType >
+bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpEpsilon < SymbolType > &) {
+	return true;
+}
+
+}
diff --git a/alib2algo/test-src/regexp/properties/RegExpPropertiesTest.cpp b/alib2algo/test-src/regexp/properties/RegExpPropertiesTest.cpp
index ae23c06412..48c3bdab73 100644
--- a/alib2algo/test-src/regexp/properties/RegExpPropertiesTest.cpp
+++ b/alib2algo/test-src/regexp/properties/RegExpPropertiesTest.cpp
@@ -4,6 +4,7 @@
 #include <regexp/string/UnboundedRegExp.h>
 #include <regexp/properties/LanguageContainsEpsilon.h>
 #include <regexp/properties/LanguageIsEmpty.h>
+#include <regexp/properties/LanguageIsEpsilon.h>
 
 TEST_CASE ( "RegExp properties", "[unit][algo][regexp][properties]" ) {
 	SECTION ( "Contains epsilon" ) {
@@ -65,4 +66,49 @@ TEST_CASE ( "RegExp properties", "[unit][algo][regexp][properties]" ) {
 			CHECK(! regexp::properties::LanguageIsEmpty::isLanguageEmpty(re));
 		}
 	}
+
+	SECTION ( "Is exactly epsilon language" ) {
+		std::string inp;
+		bool expected;
+
+		SECTION ( "1" ) {
+			inp = "(#E #0 ) + ( #0 a + (b ( #0 (a*) ) ) )";
+			expected = false;
+		}
+
+		SECTION ( "2" ) {
+			inp = "#E";
+			expected = true;
+		}
+
+		SECTION ( "3" ) {
+			inp = "#0";
+			expected = false;
+		}
+
+		SECTION ( "4" ) {
+			inp = "#E + #0";
+			expected = true;
+		}
+
+		SECTION ( "5" ) {
+			inp = "(#E** + #0)* + #E + a b c";
+			expected = false;
+		}
+
+		SECTION ( "6" ) {
+			inp = "(#E** + #0)* + #E + #0*";
+			expected = true;
+		}
+
+		SECTION ( "7" ) {
+			inp = "a* + #E";
+			expected = false;
+		}
+
+		INFO ( inp );
+		INFO ( expected );
+		regexp::UnboundedRegExp < > reU = factory::StringDataFactory::fromString ( inp );
+		CHECK ( regexp::properties::LanguageIsEpsilon::languageIsEpsilon ( reU ) == expected );
+	}
 }
-- 
GitLab