From 06e10eaa598b32f8aefa0f08bce29910b36a8d8a Mon Sep 17 00:00:00 2001 From: Tomas Pecka <peckato1@fit.cvut.cz> Date: Wed, 8 Dec 2021 12:17:32 +0100 Subject: [PATCH] algo: check if language of regexp is exactly epsilon Introduce a new algorithm, regexp::properties::LanguageIsEpsilon to check whether a language of a regexp is exactly equal to a language of epsilon regexp. This will be later useful in checking whether the language of a regexp is infinite. --- .../regexp/properties/LanguageIsEpsilon.cpp | 11 + .../src/regexp/properties/LanguageIsEpsilon.h | 191 ++++++++++++++++++ .../properties/RegExpPropertiesTest.cpp | 46 +++++ 3 files changed, 248 insertions(+) create mode 100644 alib2algo/src/regexp/properties/LanguageIsEpsilon.cpp create mode 100644 alib2algo/src/regexp/properties/LanguageIsEpsilon.h diff --git a/alib2algo/src/regexp/properties/LanguageIsEpsilon.cpp b/alib2algo/src/regexp/properties/LanguageIsEpsilon.cpp new file mode 100644 index 0000000000..20e7775a4b --- /dev/null +++ b/alib2algo/src/regexp/properties/LanguageIsEpsilon.cpp @@ -0,0 +1,11 @@ +#include "LanguageIsEpsilon.h" +#include <regexp/formal/FormalRegExpElements.h> +#include <regexp/unbounded/UnboundedRegExpElements.h> +#include <registration/AlgoRegistration.hpp> + +namespace { + +auto FormalRegExp = registration::AbstractRegister < regexp::properties::LanguageIsEpsilon, bool, const regexp::FormalRegExp < > & > ( regexp::properties::LanguageIsEpsilon::languageIsEpsilon ); +auto UnboundedRegExp = registration::AbstractRegister < regexp::properties::LanguageIsEpsilon, bool, const regexp::UnboundedRegExp < > & > ( regexp::properties::LanguageIsEpsilon::languageIsEpsilon ); + +} /* namespace */ diff --git a/alib2algo/src/regexp/properties/LanguageIsEpsilon.h b/alib2algo/src/regexp/properties/LanguageIsEpsilon.h new file mode 100644 index 0000000000..35125ece5c --- /dev/null +++ b/alib2algo/src/regexp/properties/LanguageIsEpsilon.h @@ -0,0 +1,191 @@ +#pragma once + +#include <algorithm> +#include <regexp/formal/FormalRegExp.h> +#include <regexp/formal/FormalRegExpElements.h> +#include <regexp/properties/LanguageIsEmpty.h> +#include <regexp/unbounded/UnboundedRegExp.h> +#include <regexp/unbounded/UnboundedRegExpElements.h> + +namespace regexp::properties { + +/** + * Determines whether regular expression (or its subtree) describes a language that is equal to epsilon (\eps = h(regexp)). + * + */ +class LanguageIsEpsilon { +public: + /** + * @brief Determines whether regular expression (or its subtree) describes a language that is equal to epsilon (\eps = h(regexp)). + * @tparam SymbolType the type of symbol in the tested regular expression + * @param regexp the regexp to test + * @return true of the language described by the regular expression is epsilon + */ + template < class SymbolType > + static bool languageIsEpsilon(const regexp::FormalRegExpElement < SymbolType > & regexp); + + /** + * @overload + */ + template < class SymbolType > + static bool languageIsEpsilon(const regexp::FormalRegExpStructure < SymbolType > & regexp); + + /** + * @overload + */ + template < class SymbolType > + static bool languageIsEpsilon(const regexp::FormalRegExp < SymbolType > & regexp); + + /** + * @overload + */ + template < class SymbolType > + static bool languageIsEpsilon(const regexp::UnboundedRegExpElement < SymbolType > & regexp); + + /** + * @overload + */ + template < class SymbolType > + static bool languageIsEpsilon(const regexp::UnboundedRegExpStructure < SymbolType > & regexp); + + /** + * @overload + */ + template < class SymbolType > + static bool languageIsEpsilon(const regexp::UnboundedRegExp < SymbolType > & regexp); + + template < class SymbolType > + class Unbounded { + public: + static bool visit(const regexp::UnboundedRegExpAlternation < SymbolType > & alternation); + static bool visit(const regexp::UnboundedRegExpConcatenation < SymbolType > & concatenation); + static bool visit(const regexp::UnboundedRegExpIteration < SymbolType > & iteration); + static bool visit(const regexp::UnboundedRegExpSymbol < SymbolType > & symbol); + static bool visit(const regexp::UnboundedRegExpEmpty < SymbolType > & empty); + static bool visit(const regexp::UnboundedRegExpEpsilon < SymbolType > & epsilon); + }; + + template < class SymbolType > + class Formal { + public: + static bool visit(const regexp::FormalRegExpAlternation < SymbolType > & alternation); + static bool visit(const regexp::FormalRegExpConcatenation < SymbolType > & concatenation); + static bool visit(const regexp::FormalRegExpIteration < SymbolType > & iteration); + static bool visit(const regexp::FormalRegExpSymbol < SymbolType > & symbol); + static bool visit(const regexp::FormalRegExpEmpty < SymbolType > & empty); + static bool visit(const regexp::FormalRegExpEpsilon < SymbolType > & epsilon); + }; +}; + +// ---------------------------------------------------------------------------- + +template < class SymbolType > +bool LanguageIsEpsilon::languageIsEpsilon(const regexp::FormalRegExpElement < SymbolType > & regexp) { + return regexp.template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>(); +} + +template < class SymbolType > +bool LanguageIsEpsilon::languageIsEpsilon(const regexp::FormalRegExpStructure < SymbolType > & regexp) { + return languageIsEpsilon(regexp.getStructure()); +} + +template < class SymbolType > +bool LanguageIsEpsilon::languageIsEpsilon(const regexp::FormalRegExp < SymbolType > & regexp) { + return languageIsEpsilon(regexp.getRegExp()); +} + +// ---------------------------------------------------------------------------- + +template < class SymbolType > +bool LanguageIsEpsilon::languageIsEpsilon(const regexp::UnboundedRegExpElement < SymbolType > & regexp) { + return regexp.template accept<bool, LanguageIsEpsilon::Unbounded < SymbolType >>(); +} + +template < class SymbolType > +bool LanguageIsEpsilon::languageIsEpsilon(const regexp::UnboundedRegExpStructure < SymbolType > & regexp) { + return languageIsEpsilon(regexp.getStructure()); +} + +template < class SymbolType > +bool LanguageIsEpsilon::languageIsEpsilon(const regexp::UnboundedRegExp < SymbolType > & regexp) { + return languageIsEpsilon(regexp.getRegExp()); +} + +// --------------------------------------------------------------------------- + +template < class SymbolType > +bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpAlternation < SymbolType > & alternation) { + bool existsEpsilon = false; + return std::all_of ( alternation.getElements ( ).begin ( ), alternation.getElements ( ).end ( ), [ & ] ( const auto & e ) { + bool isEpsilon = e.template accept < bool, LanguageIsEpsilon::Unbounded < SymbolType > > ( ); + existsEpsilon = existsEpsilon || isEpsilon; + return isEpsilon || LanguageIsEmpty::isLanguageEmpty ( e ); + } ) && existsEpsilon; +} + +template < class SymbolType > +bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpConcatenation < SymbolType > & concatenation) { + return std::all_of ( concatenation.getElements ( ).begin ( ), concatenation.getElements ( ).end ( ), [ ] ( const UnboundedRegExpElement < SymbolType > & element ) { + return element.template accept < bool, LanguageIsEpsilon::Unbounded < SymbolType > > ( ); + } ); +} + +template < class SymbolType > +bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpIteration < SymbolType > & iteration) { + return LanguageIsEmpty::isLanguageEmpty ( iteration.getElement ( ) ) || iteration.getElement().template accept<bool, LanguageIsEpsilon::Unbounded < SymbolType > >(); +} + +template < class SymbolType > +bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpSymbol < SymbolType > &) { + return false; +} + +template < class SymbolType > +bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpEpsilon < SymbolType > &) { + return true; +} + +template < class SymbolType > +bool LanguageIsEpsilon::Unbounded < SymbolType >::visit(const regexp::UnboundedRegExpEmpty < SymbolType > &) { + return false; +} + +// ---------------------------------------------------------------------------- + +template < class SymbolType > +bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpAlternation < SymbolType > & alternation) { + // ALL {eps,empty} + {eps,empty} but not ALL empty + auto leftEmpty = LanguageIsEmpty::isLanguageEmpty ( alternation.getLeftElement ( ) ); + auto rightEmpty = LanguageIsEmpty::isLanguageEmpty ( alternation.getLeftElement ( ) ); + auto leftEps = alternation.getLeftElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>(); + auto rightEps = alternation.getRightElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>(); + + return ! ( leftEmpty && rightEmpty ) && ( leftEmpty || leftEps ) && ( rightEmpty || rightEps ); +} + +template < class SymbolType > +bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpConcatenation < SymbolType > & concatenation) { + return concatenation.getLeftElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>() && concatenation.getRightElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType >>(); +} + +template < class SymbolType > +bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpIteration < SymbolType > & iteration) { + return LanguageIsEmpty::isLanguageEmpty ( iteration.getElement ( ) ) || iteration.getElement().template accept<bool, LanguageIsEpsilon::Formal < SymbolType > >(); +} + +template < class SymbolType > +bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpSymbol < SymbolType > &) { + return false; +} + +template < class SymbolType > +bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpEmpty < SymbolType > &) { + return false; +} + +template < class SymbolType > +bool LanguageIsEpsilon::Formal < SymbolType >::visit(const regexp::FormalRegExpEpsilon < SymbolType > &) { + return true; +} + +} diff --git a/alib2algo/test-src/regexp/properties/RegExpPropertiesTest.cpp b/alib2algo/test-src/regexp/properties/RegExpPropertiesTest.cpp index ae23c06412..48c3bdab73 100644 --- a/alib2algo/test-src/regexp/properties/RegExpPropertiesTest.cpp +++ b/alib2algo/test-src/regexp/properties/RegExpPropertiesTest.cpp @@ -4,6 +4,7 @@ #include <regexp/string/UnboundedRegExp.h> #include <regexp/properties/LanguageContainsEpsilon.h> #include <regexp/properties/LanguageIsEmpty.h> +#include <regexp/properties/LanguageIsEpsilon.h> TEST_CASE ( "RegExp properties", "[unit][algo][regexp][properties]" ) { SECTION ( "Contains epsilon" ) { @@ -65,4 +66,49 @@ TEST_CASE ( "RegExp properties", "[unit][algo][regexp][properties]" ) { CHECK(! regexp::properties::LanguageIsEmpty::isLanguageEmpty(re)); } } + + SECTION ( "Is exactly epsilon language" ) { + std::string inp; + bool expected; + + SECTION ( "1" ) { + inp = "(#E #0 ) + ( #0 a + (b ( #0 (a*) ) ) )"; + expected = false; + } + + SECTION ( "2" ) { + inp = "#E"; + expected = true; + } + + SECTION ( "3" ) { + inp = "#0"; + expected = false; + } + + SECTION ( "4" ) { + inp = "#E + #0"; + expected = true; + } + + SECTION ( "5" ) { + inp = "(#E** + #0)* + #E + a b c"; + expected = false; + } + + SECTION ( "6" ) { + inp = "(#E** + #0)* + #E + #0*"; + expected = true; + } + + SECTION ( "7" ) { + inp = "a* + #E"; + expected = false; + } + + INFO ( inp ); + INFO ( expected ); + regexp::UnboundedRegExp < > reU = factory::StringDataFactory::fromString ( inp ); + CHECK ( regexp::properties::LanguageIsEpsilon::languageIsEpsilon ( reU ) == expected ); + } } -- GitLab