From 15d89fdc328e3b96d3ec22008a34e1ecfbbf63de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Pecka?= <peckato1@fit.cvut.cz> Date: Mon, 14 Apr 2014 13:19:13 +0200 Subject: [PATCH] libaregexptree: New api for derivations and integrals --- aconversions/src/re2fa/Brzozowski.cpp | 4 +- .../re2rg/re2rrg/BrzozowskiDerivationRRG.cpp | 5 +-- aderivation/src/aderivation.cpp | 10 +---- aintegral/src/aintegral.cpp | 10 +---- libaregexptree/src/RegExpDerivation.cpp | 29 ++++++++------- libaregexptree/src/RegExpDerivation.h | 24 +++++++----- libaregexptree/src/RegExpIntegral.cpp | 37 ++++++++++--------- libaregexptree/src/RegExpIntegral.h | 23 ++++++++---- 8 files changed, 70 insertions(+), 72 deletions(-) diff --git a/aconversions/src/re2fa/Brzozowski.cpp b/aconversions/src/re2fa/Brzozowski.cpp index 3c2048ace6..90e268a227 100644 --- a/aconversions/src/re2fa/Brzozowski.cpp +++ b/aconversions/src/re2fa/Brzozowski.cpp @@ -51,8 +51,7 @@ FSM Brzozowski::convert( void ) for( const auto & a : alphabet ) { - RegExpElement* dSymbol = new RegExpSymbol( a.getSymbol( ) ); - RegExp derived = deriv.derivation( list<RegExpElement*>( 1, dSymbol ) ); + RegExp derived = deriv.derivation( a ); derived = opt.optimize( derived ); // no "trash state" (though algorithm probably considers it) @@ -64,7 +63,6 @@ FSM Brzozowski::convert( void ) m_transitions.insert( Transition( regexp, Symbol( a.getSymbol( ) ), derived ) ); } - delete dSymbol; } } diff --git a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp index a0798d9c29..eff5b812bf 100644 --- a/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp +++ b/aconversions/src/re2rg/re2rrg/BrzozowskiDerivationRRG.cpp @@ -53,8 +53,7 @@ RightRegularGrammar BrzozowskiDerivationRRG::convert( void ) for( const auto & a : alphabet ) { - RegExpElement* dSymbol = new RegExpSymbol( a.getSymbol( ) ); - RegExp derived = deriv.derivation( list<RegExpElement*>( 1, dSymbol ) ); + RegExp derived = deriv.derivation( a ); derived = opt.optimize( derived ); if( ! derived.isEmpty( ) ) @@ -64,8 +63,6 @@ RightRegularGrammar BrzozowskiDerivationRRG::convert( void ) m_transitions.insert( Transition( regexp, Symbol( a.getSymbol( ) ), derived ) ); } - - delete dSymbol; } } diff --git a/aderivation/src/aderivation.cpp b/aderivation/src/aderivation.cpp index 018c8da654..aafb5aeee0 100644 --- a/aderivation/src/aderivation.cpp +++ b/aderivation/src/aderivation.cpp @@ -27,21 +27,15 @@ int main(int argc, char** argv) SaxInterface::parseMemory(input, tokens); RegExp re = RegExpParser::parse(tokens); - list<RegExpElement*> dString; + list<Symbol> dString; for( int i = 1; i < argc ; i++ ) { string symbol( argv[ i ] ); // cout << "'" << symbol << "'" << endl; - if( symbol == "" ) - dString.push_back( new RegExpEpsilon( ) ); - else - dString.push_back( new RegExpSymbol( symbol ) ); + dString.push_back( Symbol( symbol ) ); } RegExpDerivation d( re ); d.derivation( dString ).toXML( cout ); - - for( const auto & symbol : dString ) - delete symbol; } catch( AlibException & e ) { diff --git a/aintegral/src/aintegral.cpp b/aintegral/src/aintegral.cpp index c5599a0a94..cb6b62fd6c 100644 --- a/aintegral/src/aintegral.cpp +++ b/aintegral/src/aintegral.cpp @@ -27,21 +27,15 @@ int main(int argc, char** argv) SaxInterface::parseMemory(input, tokens); RegExp re = RegExpParser::parse(tokens); - list<RegExpElement*> dString; + list<Symbol> dString; for( int i = 1; i < argc ; i++ ) { string symbol( argv[ i ] ); // cout << "'" << symbol << "'" << endl; - if( symbol == "" ) - dString.push_back( new RegExpEpsilon( ) ); - else - dString.push_back( new RegExpSymbol( symbol ) ); + dString.push_back( Symbol( symbol ) ); } RegExpIntegral i( re ); i.integral( dString ).toXML( cout ); - - for( const auto & symbol : dString ) - delete symbol; } catch( AlibException & e ) { diff --git a/libaregexptree/src/RegExpDerivation.cpp b/libaregexptree/src/RegExpDerivation.cpp index f52f5cb62c..444e864cef 100644 --- a/libaregexptree/src/RegExpDerivation.cpp +++ b/libaregexptree/src/RegExpDerivation.cpp @@ -16,7 +16,7 @@ RegExpDerivation::RegExpDerivation( const RegExp & re ) : m_re( re ) } -RegExp RegExpDerivation::derivation ( const list<RegExpElement*> & dString ) const +RegExp RegExpDerivation::derivation ( const list<Symbol> & dString ) const { // TODO: change dString to WORD object when implemented. @@ -24,12 +24,7 @@ RegExp RegExpDerivation::derivation ( const list<RegExpElement*> & dString ) con for( const auto & dSymbol : dString ) { - if( dynamic_cast<RegExpEpsilon*>( dSymbol ) ) - continue; - else if( dynamic_cast<RegExpSymbol*>( dSymbol ) ) - derivedRegExp = derivation( oldRegExp, * dynamic_cast<RegExpSymbol*>( dSymbol ) ); - else - throw AlibException( "RegExpDerivation::derivation - invalid/unknown RegExpElement passed in dString." ); + derivedRegExp = derivation( oldRegExp, dSymbol ); delete oldRegExp; oldRegExp = derivedRegExp; @@ -40,7 +35,13 @@ RegExp RegExpDerivation::derivation ( const list<RegExpElement*> & dString ) con return ret; } -RegExpElement * RegExpDerivation::derivation( const RegExpElement * node, const RegExpSymbol & dSymbol ) const +RegExp RegExpDerivation::derivation ( const Symbol & symbol ) const +{ + RegExp ret = derivation( m_re.getRegExp( ), symbol ); + return ret; +} + +RegExpElement * RegExpDerivation::derivation( const RegExpElement * node, const Symbol & dSymbol ) const { const Alternation * alternation = dynamic_cast<const Alternation*>( node ); const Concatenation * concatenation = dynamic_cast<const Concatenation*>( node ); @@ -65,7 +66,7 @@ RegExpElement * RegExpDerivation::derivation( const RegExpElement * node, const throw AlibException( "RegExpDerivation::derivation() - unknown RegExpElement node" ); } -RegExpElement * RegExpDerivation::derivation( const Alternation * element, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpDerivation::derivation( const Alternation * element, const Symbol & dSymbol ) const { Alternation* ret = new Alternation( ); @@ -75,7 +76,7 @@ RegExpElement * RegExpDerivation::derivation( const Alternation * element, const return ret; } -RegExpElement * RegExpDerivation::derivation( const Concatenation * element, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpDerivation::derivation( const Concatenation * element, const Symbol & dSymbol ) const { Alternation* alt = new Alternation( ); @@ -99,7 +100,7 @@ RegExpElement * RegExpDerivation::derivation( const Concatenation * element, con return alt; } -RegExpElement * RegExpDerivation::derivation( const Iteration * element, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpDerivation::derivation( const Iteration * element, const Symbol & dSymbol ) const { Concatenation* ret = new Concatenation( ); @@ -109,7 +110,7 @@ RegExpElement * RegExpDerivation::derivation( const Iteration * element, const R return ret; } -RegExpElement * RegExpDerivation::derivation( const RegExpSymbol * element, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpDerivation::derivation( const RegExpSymbol * element, const Symbol & dSymbol ) const { if( dSymbol == element->getSymbol( ) ) return new RegExpEpsilon( ); @@ -117,12 +118,12 @@ RegExpElement * RegExpDerivation::derivation( const RegExpSymbol * element, cons return new RegExpEmpty( ); } -RegExpElement * RegExpDerivation::derivation( const RegExpEpsilon * element, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpDerivation::derivation( const RegExpEpsilon * element, const Symbol & dSymbol ) const { return new RegExpEmpty( ); } -RegExpElement * RegExpDerivation::derivation( const RegExpEmpty * element, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpDerivation::derivation( const RegExpEmpty * element, const Symbol & dSymbol ) const { return new RegExpEmpty( ); } diff --git a/libaregexptree/src/RegExpDerivation.h b/libaregexptree/src/RegExpDerivation.h index c9e0cf9504..a4fcafe252 100644 --- a/libaregexptree/src/RegExpDerivation.h +++ b/libaregexptree/src/RegExpDerivation.h @@ -34,19 +34,25 @@ public: /** * returns derivation of regexp over word dString * @param dString list of RegExpElements representing word. Derivation will be over this word. - * @see RegExpElement * @return Derivation of regexp over dString */ - regexp::RegExp derivation( const std::list<regexp::RegExpElement*> & dString ) const; + regexp::RegExp derivation( const std::list<alphabet::Symbol> & dString ) const; + + /** + * returns derivation of regexp over one-symbol word + * @param symbol one-symbol word + * @return Derivation of regexp over word + */ + regexp::RegExp derivation( const alphabet::Symbol & symbol ) const; private: - regexp::RegExpElement * derivation( const regexp::RegExpElement * element, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * derivation( const regexp::Alternation * element, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * derivation( const regexp::Concatenation * element, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * derivation( const regexp::Iteration * element, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * derivation( const regexp::RegExpSymbol * element, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * derivation( const regexp::RegExpEpsilon * element, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * derivation( const regexp::RegExpEmpty * element, const regexp::RegExpSymbol & dSymbol ) const; + regexp::RegExpElement * derivation( const regexp::RegExpElement * element, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * derivation( const regexp::Alternation * element, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * derivation( const regexp::Concatenation * element, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * derivation( const regexp::Iteration * element, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * derivation( const regexp::RegExpSymbol * element, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * derivation( const regexp::RegExpEpsilon * element, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * derivation( const regexp::RegExpEmpty * element, const alphabet::Symbol & dSymbol ) const; /** * stores original regexp diff --git a/libaregexptree/src/RegExpIntegral.cpp b/libaregexptree/src/RegExpIntegral.cpp index ee5a569682..e8365e661c 100644 --- a/libaregexptree/src/RegExpIntegral.cpp +++ b/libaregexptree/src/RegExpIntegral.cpp @@ -15,7 +15,7 @@ RegExpIntegral::RegExpIntegral( const RegExp & re ) : m_re( re ) } -RegExp RegExpIntegral::integral( const list<RegExpElement*> & dString ) const +RegExp RegExpIntegral::integral( const list<Symbol> & dString ) const { // TODO: change dString to WORD object when implemented. @@ -25,12 +25,7 @@ RegExp RegExpIntegral::integral( const list<RegExpElement*> & dString ) const { const auto & dSymbol = * it; - if( dynamic_cast<RegExpEpsilon*>( dSymbol ) ) // integral V d\eps = V - continue; - else if( dynamic_cast<RegExpSymbol*>( dSymbol ) ) - integralRegExp = integral( oldRegExp, * dynamic_cast<RegExpSymbol*>( dSymbol ) ); - else - throw AlibException( "RegExpIntegral::integral - invalid/unknown RegExpElement passed in dString." ); + integralRegExp = integral( oldRegExp, dSymbol ); delete oldRegExp; oldRegExp = integralRegExp; @@ -41,7 +36,13 @@ RegExp RegExpIntegral::integral( const list<RegExpElement*> & dString ) const return ret; } -RegExpElement * RegExpIntegral::integral( const RegExpElement * node, const RegExpSymbol & dSymbol ) const +RegExp RegExpIntegral::integral ( const Symbol & symbol ) const +{ + RegExp ret = integral( m_re.getRegExp( ), symbol ); + return ret; +} + +RegExpElement * RegExpIntegral::integral( const RegExpElement * node, const Symbol & dSymbol ) const { const Alternation * alternation = dynamic_cast<const Alternation*>( node ); const Concatenation * concatenation = dynamic_cast<const Concatenation*>( node ); @@ -66,7 +67,7 @@ RegExpElement * RegExpIntegral::integral( const RegExpElement * node, const RegE throw AlibException( "RegExpDerivation::derivation() - unknown RegExpElement node" ); } -RegExpElement * RegExpIntegral::integral( const Alternation * node, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpIntegral::integral( const Alternation * node, const Symbol & dSymbol ) const { Alternation * alt = new Alternation( ); @@ -76,10 +77,10 @@ RegExpElement * RegExpIntegral::integral( const Alternation * node, const RegExp return alt; } -RegExpElement * RegExpIntegral::integral( const Concatenation * node, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpIntegral::integral( const Concatenation * node, const Symbol & dSymbol ) const { Concatenation * concat = new Concatenation( ); - concat->getElements( ).push_back( dSymbol.clone( ) ); + concat->getElements( ).push_back( new RegExpSymbol( dSymbol.getSymbol( ) ) ); for( const auto & child : node->getElements( ) ) concat->getElements( ).push_back( child->clone( ) ); @@ -87,28 +88,28 @@ RegExpElement * RegExpIntegral::integral( const Concatenation * node, const RegE return concat; } -RegExpElement * RegExpIntegral::integral( const Iteration * node, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpIntegral::integral( const Iteration * node, const Symbol & dSymbol ) const { Concatenation * concat = new Concatenation( ); - concat->getElements( ).push_back( dSymbol.clone( ) ); + concat->getElements( ).push_back( new RegExpSymbol( dSymbol.getSymbol( ) ) ); concat->getElements( ).push_back( node->getElement( )->clone( ) ); return concat; } -RegExpElement * RegExpIntegral::integral( const RegExpSymbol * node, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpIntegral::integral( const RegExpSymbol * node, const Symbol & dSymbol ) const { Concatenation * concat = new Concatenation( ); - concat->getElements( ).push_back( dSymbol.clone( ) ); + concat->getElements( ).push_back( new RegExpSymbol( dSymbol.getSymbol( ) ) ); concat->getElements( ).push_back( node->clone( ) ); return concat; } -RegExpElement * RegExpIntegral::integral( const RegExpEpsilon * node, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpIntegral::integral( const RegExpEpsilon * node, const Symbol & dSymbol ) const { - return dSymbol.clone( ); + return new RegExpSymbol( dSymbol.getSymbol( ) ); } -RegExpElement * RegExpIntegral::integral( const RegExpEmpty * node, const RegExpSymbol & dSymbol ) const +RegExpElement * RegExpIntegral::integral( const RegExpEmpty * node, const Symbol & dSymbol ) const { return new RegExpEmpty( ); } diff --git a/libaregexptree/src/RegExpIntegral.h b/libaregexptree/src/RegExpIntegral.h index 15b27a4c80..36c55a50ec 100644 --- a/libaregexptree/src/RegExpIntegral.h +++ b/libaregexptree/src/RegExpIntegral.h @@ -30,16 +30,23 @@ public: * @see RegExpElement * @return Integral of regexp over dString */ - regexp::RegExp integral( const std::list<regexp::RegExpElement*> & dString ) const; + regexp::RegExp integral( const std::list<alphabet::Symbol> & dString ) const; + + /** + * returns integral of regexp over one-symbol word + * @param symbol one-symbol word + * @return integral of regexp over word + */ + regexp::RegExp integral( const alphabet::Symbol & symbol ) const; private: - regexp::RegExpElement * integral( const regexp::RegExpElement * node, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * integral( const regexp::Alternation * node, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * integral( const regexp::Concatenation * node, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * integral( const regexp::Iteration * node, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * integral( const regexp::RegExpSymbol * node, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * integral( const regexp::RegExpEpsilon * node, const regexp::RegExpSymbol & dSymbol ) const; - regexp::RegExpElement * integral( const regexp::RegExpEmpty * node, const regexp::RegExpSymbol & dSymbol ) const; + regexp::RegExpElement * integral( const regexp::RegExpElement * node, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * integral( const regexp::Alternation * node, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * integral( const regexp::Concatenation * node, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * integral( const regexp::Iteration * node, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * integral( const regexp::RegExpSymbol * node, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * integral( const regexp::RegExpEpsilon * node, const alphabet::Symbol & dSymbol ) const; + regexp::RegExpElement * integral( const regexp::RegExpEmpty * node, const alphabet::Symbol & dSymbol ) const; /** * stores original regular expression -- GitLab