From 6bf87b61207ebe5dbf0167991dffc56ea1893997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Tr=C3=A1vn=C3=AD=C4=8Dek?= <Jan.Travnicek@fit.cvut.cz> Date: Tue, 20 Jul 2021 21:30:33 +0200 Subject: [PATCH] algo: pattern matching of extended tree patterns + tests --- .../src/arbology/exact/ExactPatternMatch.cpp | 2 + .../src/arbology/exact/ExactPatternMatch.h | 94 +++++++++++++++++-- .../exact/ExactPatternMatchingAutomaton.cpp | 2 + .../exact/ExactPatternMatchingAutomaton.h | 26 ++++- .../src/arbology/exact/KnuthMorrisPratt.cpp | 2 + .../src/arbology/exact/KnuthMorrisPratt.h | 11 ++- .../src/tree/exact/ForwardOccurrenceTest.h | 11 +++ tests/cppaql/arbologyTest.cpp | 53 ++++++++++- 8 files changed, 185 insertions(+), 16 deletions(-) diff --git a/alib2algo/src/arbology/exact/ExactPatternMatch.cpp b/alib2algo/src/arbology/exact/ExactPatternMatch.cpp index cee5db63f9..fe50a6f679 100644 --- a/alib2algo/src/arbology/exact/ExactPatternMatch.cpp +++ b/alib2algo/src/arbology/exact/ExactPatternMatch.cpp @@ -4,10 +4,12 @@ namespace { auto ExactPatternMatchUnrankedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::UnrankedTree < > &, const tree::UnrankedPattern < > & > ( arbology::exact::ExactPatternMatch::match ); +auto ExactPatternMatchUnrankedExtendedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::UnrankedTree < > &, const tree::UnrankedExtendedPattern < > & > ( arbology::exact::ExactPatternMatch::match ); auto ExactPatternMatchUnorderedRankedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::UnorderedRankedTree < > &, const tree::UnorderedRankedPattern < > & > ( arbology::exact::ExactPatternMatch::match ); auto ExactPatternMatchUnorderedUnrankedRankedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::UnorderedUnrankedTree < > &, const tree::UnorderedUnrankedPattern < > & > ( arbology::exact::ExactPatternMatch::match ); auto ExactPatternMatchRankedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::RankedTree < > &, const tree::RankedPattern < > & > ( arbology::exact::ExactPatternMatch::match ); auto ExactPatternMatchRankedNonlinearPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::RankedTree < > &, const tree::RankedNonlinearPattern < > & > ( arbology::exact::ExactPatternMatch::match ); +auto ExactPatternMatchRankedExtendedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::RankedTree < > &, const tree::RankedExtendedPattern < > & > ( arbology::exact::ExactPatternMatch::match ); auto ExactPatternMatchPrefixRankedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::PrefixRankedTree < > &, const tree::PrefixRankedPattern < > & > ( arbology::exact::ExactPatternMatch::match ); auto ExactPatternMatchPrefixRankedNonlinearPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::PrefixRankedTree < > &, const tree::PrefixRankedNonlinearPattern < > & > ( arbology::exact::ExactPatternMatch::match ); auto ExactPatternMatchPrefixRankedBarPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatch, ext::set < unsigned >, const tree::PrefixRankedBarTree < > &, const tree::PrefixRankedBarPattern < > & > ( arbology::exact::ExactPatternMatch::match ); diff --git a/alib2algo/src/arbology/exact/ExactPatternMatch.h b/alib2algo/src/arbology/exact/ExactPatternMatch.h index 86215a9e3a..2de8da0679 100644 --- a/alib2algo/src/arbology/exact/ExactPatternMatch.h +++ b/alib2algo/src/arbology/exact/ExactPatternMatch.h @@ -11,6 +11,7 @@ #include <tree/ranked/RankedTree.h> #include <tree/ranked/RankedPattern.h> #include <tree/ranked/RankedNonlinearPattern.h> +#include <tree/ranked/RankedExtendedPattern.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedPattern.h> #include <tree/ranked/PrefixRankedNonlinearPattern.h> @@ -19,6 +20,7 @@ #include <tree/ranked/PrefixRankedBarNonlinearPattern.h> #include <tree/unranked/UnrankedTree.h> #include <tree/unranked/UnrankedPattern.h> +#include <tree/unranked/UnrankedExtendedPattern.h> #include <tree/unranked/UnorderedUnrankedTree.h> #include <tree/unranked/UnorderedUnrankedPattern.h> @@ -41,6 +43,8 @@ public: template < class SymbolType > static ext::set < unsigned > match ( const tree::UnrankedTree < SymbolType > & subject, const tree::UnrankedPattern < SymbolType > & pattern ); + template < class SymbolType > + static ext::set < unsigned > match ( const tree::UnrankedTree < SymbolType > & subject, const tree::UnrankedExtendedPattern < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::UnorderedRankedTree < SymbolType > & subject, const tree::UnorderedRankedPattern < SymbolType > & pattern ); @@ -49,6 +53,8 @@ public: static ext::set < unsigned > match ( const tree::RankedTree < SymbolType > & subject, const tree::RankedPattern < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::RankedTree < SymbolType > & subject, const tree::RankedNonlinearPattern < SymbolType > & pattern ); + template < class SymbolType > + static ext::set < unsigned > match ( const tree::RankedTree < SymbolType > & subject, const tree::RankedExtendedPattern < SymbolType > & pattern ); template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ); @@ -64,17 +70,23 @@ private: template < class SymbolType > static bool matchHelper ( const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const SymbolType & subtreeGap ); template < class SymbolType > - static bool matchHelper ( const ext::tree < common::ranked_symbol < SymbolType > > & subject, const ext::tree < common::ranked_symbol < SymbolType > > & pattern, const common::ranked_symbol < SymbolType > & subtreeVariable ); + static bool matchHelper ( const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const SymbolType & subtreeGap, const SymbolType & nodeWildcard ); + template < class SymbolType > + static bool matchHelper ( const ext::tree < common::ranked_symbol < SymbolType > > & subject, const ext::tree < common::ranked_symbol < SymbolType > > & pattern, const common::ranked_symbol < SymbolType > & subtreeVariable, const ext::set < common::ranked_symbol < SymbolType > > & nodeWildcards ); template < class SymbolType > static bool matchHelper ( const ext::tree < common::ranked_symbol < SymbolType > > & subject, const ext::tree < common::ranked_symbol < SymbolType > > & pattern, const common::ranked_symbol < SymbolType > & subtreeVariable, const ext::set < common::ranked_symbol < SymbolType > > & nonlinearVariables, const ext::tree < common::ranked_symbol < unsigned > > & repeats, ext::map < common::ranked_symbol < SymbolType >, unsigned > & variablesSetting ); template < class SymbolType > static bool matchHelper ( typename ext::vector < ext::tree < SymbolType > >::const_iterator subjectIter, typename ext::vector < ext::tree < SymbolType > >::const_iterator subjectEnd, typename ext::vector < ext::tree < SymbolType > >::const_iterator patternIter, typename ext::vector < ext::tree < SymbolType > >::const_iterator patternEnd, const SymbolType & subtreeVariable, const SymbolType & subtreeGap ); + template < class SymbolType > + static bool matchHelper ( typename ext::vector < ext::tree < SymbolType > >::const_iterator subjectIter, typename ext::vector < ext::tree < SymbolType > >::const_iterator subjectEnd, typename ext::vector < ext::tree < SymbolType > >::const_iterator patternIter, typename ext::vector < ext::tree < SymbolType > >::const_iterator patternEnd, const SymbolType & subtreeVariable, const SymbolType & subtreeGap, const SymbolType & nodeWildcard ); template < class SymbolType > - static void matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable ); + static void matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const ext::set < SymbolType > & nodeWildcards ); template < class SymbolType > static void matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const SymbolType & subtreeGap ); + template < class SymbolType > + static void matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const SymbolType & subtreeGap, const SymbolType & nodeWildcard ); template < class SymbolType, class RepeatsType > static void matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const ext::set < SymbolType > & nonlinearVariables, const ext::tree < RepeatsType > & repeats ); @@ -129,14 +141,50 @@ bool ExactPatternMatch::matchHelper ( const ext::tree < SymbolType > & subject, } template < class SymbolType > -bool ExactPatternMatch::matchHelper ( const ext::tree < common::ranked_symbol < SymbolType > > & subject, const ext::tree < common::ranked_symbol < SymbolType > > & pattern, const common::ranked_symbol < SymbolType > & subtreeVariable ) { +bool ExactPatternMatch::matchHelper ( typename ext::vector < ext::tree < SymbolType > >::const_iterator subjectIter, typename ext::vector < ext::tree < SymbolType > >::const_iterator subjectEnd, typename ext::vector < ext::tree < SymbolType > >::const_iterator patternIter, typename ext::vector < ext::tree < SymbolType > >::const_iterator patternEnd, const SymbolType & subtreeVariable, const SymbolType & subtreeGap, const SymbolType & nodeWildcard ) { + if ( patternIter == patternEnd ) + return subjectIter == subjectEnd; + + if ( patternIter->getData ( ) == subtreeGap ) { + if ( matchHelper ( subjectIter, subjectEnd, std::next ( patternIter ), patternEnd, subtreeVariable, subtreeGap, nodeWildcard ) ) + return true; + + while ( subjectIter != subjectEnd ) { + ++ subjectIter; + if ( matchHelper ( subjectIter, subjectEnd, patternIter, patternEnd, subtreeVariable, subtreeGap, nodeWildcard ) ) + return true; + } + + return false; + } else if ( subjectIter != subjectEnd ) { + if ( matchHelper ( * subjectIter, * patternIter, subtreeVariable, subtreeGap, nodeWildcard ) ) + return matchHelper ( std::next ( subjectIter ), subjectEnd, std::next ( patternIter ), patternEnd, subtreeVariable, subtreeGap, nodeWildcard ); + + return false; + } else { + return false; + } +} + +template < class SymbolType > +bool ExactPatternMatch::matchHelper ( const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const SymbolType & subtreeGap, const SymbolType & nodeWildcard ) { if ( pattern.getData ( ) == subtreeVariable ) return true; - if ( subject.getData ( ) != pattern.getData ( ) ) return false; + if ( subject.getData ( ) != pattern.getData ( ) && nodeWildcard != pattern.getData ( ) ) return false; + + return matchHelper ( subject.getChildren ( ).begin ( ), subject.getChildren ( ).end ( ), pattern.getChildren ( ).begin ( ), pattern.getChildren ( ).end ( ), subtreeVariable, subtreeGap, nodeWildcard ); +} + +template < class SymbolType > +bool ExactPatternMatch::matchHelper ( const ext::tree < common::ranked_symbol < SymbolType > > & subject, const ext::tree < common::ranked_symbol < SymbolType > > & pattern, const common::ranked_symbol < SymbolType > & subtreeVariable, const ext::set < common::ranked_symbol < SymbolType > > & nodeWildcards ) { + if ( pattern.getData ( ) == subtreeVariable ) return true; + + if ( subject.getData ( ) != pattern.getData ( ) && ( ! nodeWildcards.contains ( pattern.getData ( ) ) || pattern.getData ( ).getRank ( ) != subject.getData ( ).getRank ( ) ) ) + return false; // ranked symbols are the same; test for number of children is not needed for ( const ext::tuple < const ext::tree < common::ranked_symbol < SymbolType > > &, const ext::tree < common::ranked_symbol < SymbolType > > & > & childs : ext::make_tuple_foreach ( subject.getChildren ( ), pattern.getChildren ( ) ) ) - if ( !matchHelper ( std::get < 0 > ( childs ), std::get < 1 > ( childs ), subtreeVariable ) ) return false; + if ( !matchHelper ( std::get < 0 > ( childs ), std::get < 1 > ( childs ), subtreeVariable, nodeWildcards ) ) return false; return true; } @@ -165,13 +213,13 @@ bool ExactPatternMatch::matchHelper ( const ext::tree < common::ranked_symbol < } template < class SymbolType > -void ExactPatternMatch::matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable ) { - if ( matchHelper ( subject, pattern, subtreeVariable ) ) occ.insert ( index ); +void ExactPatternMatch::matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const ext::set < SymbolType > & nodeWildcards ) { + if ( matchHelper ( subject, pattern, subtreeVariable, nodeWildcards ) ) occ.insert ( index ); index++; for ( const ext::tree < SymbolType > & child : subject.getChildren ( ) ) - matchInternal ( index, occ, child, pattern, subtreeVariable ); + matchInternal ( index, occ, child, pattern, subtreeVariable, nodeWildcards ); } template < class SymbolType > @@ -184,6 +232,16 @@ void ExactPatternMatch::matchInternal ( unsigned & index, ext::set < unsigned > matchInternal ( index, occ, child, pattern, subtreeVariable, subtreeGap ); } +template < class SymbolType > +void ExactPatternMatch::matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const SymbolType & subtreeGap, const SymbolType & nodeWildcard ) { + if ( matchHelper ( subject, pattern, subtreeVariable, subtreeGap, nodeWildcard ) ) occ.insert ( index ); + + index++; + + for ( const ext::tree < SymbolType > & child : subject.getChildren ( ) ) + matchInternal ( index, occ, child, pattern, subtreeVariable, subtreeGap, nodeWildcard ); +} + template < class SymbolType, class RepeatsType > void ExactPatternMatch::matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable, const ext::set < SymbolType > & nonlinearVariables, const ext::tree < RepeatsType > & repeats ) { ext::map < SymbolType, unsigned > variablesSetting; @@ -307,6 +365,15 @@ ext::set < unsigned > ExactPatternMatch::match ( const tree::UnrankedTree < Symb return occ; } +template < class SymbolType > +ext::set < unsigned > ExactPatternMatch::match ( const tree::UnrankedTree < SymbolType > & subject, const tree::UnrankedExtendedPattern < SymbolType > & pattern ) { + unsigned i = 0; + ext::set < unsigned > occ; + + matchInternal ( i, occ, subject.getContent ( ), pattern.getContent ( ), pattern.getSubtreeWildcard ( ), pattern.getSubtreeGap ( ), pattern.getNodeWildcard ( ) ); + return occ; +} + template < class SymbolType > ext::set < unsigned > ExactPatternMatch::match ( const tree::UnorderedRankedTree < SymbolType > & subject, const tree::UnorderedRankedPattern < SymbolType > & pattern ) { unsigned i = 0; @@ -321,7 +388,7 @@ ext::set < unsigned > ExactPatternMatch::match ( const tree::RankedTree < Symbol unsigned i = 0; ext::set < unsigned > occ; - matchInternal ( i, occ, subject.getContent ( ), pattern.getContent ( ), pattern.getSubtreeWildcard ( ) ); + matchInternal ( i, occ, subject.getContent ( ), pattern.getContent ( ), pattern.getSubtreeWildcard ( ), { } ); return occ; } @@ -336,6 +403,15 @@ ext::set < unsigned > ExactPatternMatch::match ( const tree::RankedTree < Symbol return occ; } +template < class SymbolType > +ext::set < unsigned > ExactPatternMatch::match ( const tree::RankedTree < SymbolType > & subject, const tree::RankedExtendedPattern < SymbolType > & pattern ) { + unsigned i = 0; + ext::set < unsigned > occ; + + matchInternal ( i, occ, subject.getContent ( ), pattern.getContent ( ), pattern.getSubtreeWildcard ( ), pattern.getNodeWildcards ( ) ); + return occ; +} + template < class SymbolType > ext::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ) { ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); diff --git a/alib2algo/src/arbology/exact/ExactPatternMatchingAutomaton.cpp b/alib2algo/src/arbology/exact/ExactPatternMatchingAutomaton.cpp index cefd4ff796..3ba2da0a38 100644 --- a/alib2algo/src/arbology/exact/ExactPatternMatchingAutomaton.cpp +++ b/alib2algo/src/arbology/exact/ExactPatternMatchingAutomaton.cpp @@ -23,4 +23,6 @@ auto ExactPatternMatchingAutomatonUnrankedTree = registration::AbstractRegister auto ExactPatternMatchingAutomatonUnrankedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatchingAutomaton, automaton::NondeterministicZAutomaton < DefaultSymbolType, unsigned >, const tree::UnrankedPattern < > & > ( arbology::exact::ExactPatternMatchingAutomaton::construct ); +auto ExactPatternMatchingAutomatonUnrankedExtendedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatchingAutomaton, automaton::NondeterministicZAutomaton < DefaultSymbolType, unsigned >, const tree::UnrankedExtendedPattern < > & > ( arbology::exact::ExactPatternMatchingAutomaton::construct ); + } /* namespace */ diff --git a/alib2algo/src/arbology/exact/ExactPatternMatchingAutomaton.h b/alib2algo/src/arbology/exact/ExactPatternMatchingAutomaton.h index 5a1a97fd00..6fbce462e8 100644 --- a/alib2algo/src/arbology/exact/ExactPatternMatchingAutomaton.h +++ b/alib2algo/src/arbology/exact/ExactPatternMatchingAutomaton.h @@ -14,6 +14,7 @@ #include <tree/ranked/PrefixRankedBarPattern.h> #include <tree/unranked/UnrankedPattern.h> +#include <tree/unranked/UnrankedExtendedPattern.h> #include <automaton/PDA/InputDrivenNPDA.h> #include <automaton/PDA/VisiblyPushdownNPDA.h> @@ -65,6 +66,9 @@ public: template < class SymbolType > static automaton::NondeterministicZAutomaton < SymbolType, unsigned > construct ( const tree::UnrankedPattern < SymbolType > & pattern ); + template < class SymbolType > + static automaton::NondeterministicZAutomaton < SymbolType, unsigned > construct ( const tree::UnrankedExtendedPattern < SymbolType > & pattern ); + }; template < class SymbolType > @@ -326,10 +330,21 @@ automaton::NondeterministicZAutomaton < SymbolType, unsigned > ExactPatternMatch } template < class SymbolType > -unsigned constructRecursivePattern ( const ext::tree < SymbolType > & node, automaton::NondeterministicZAutomaton < SymbolType, unsigned > & res, const SymbolType & subtreeWildcard, const SymbolType & subtreeGap, unsigned & nextState ) { +automaton::NondeterministicZAutomaton < SymbolType, unsigned > ExactPatternMatchingAutomaton::construct ( const tree::UnrankedPattern < SymbolType > & pattern ) { + return ExactPatternMatchingAutomaton::construct ( tree::UnrankedExtendedPattern ( pattern ) ); +} + +template < class SymbolType > +unsigned constructRecursivePattern ( const ext::tree < SymbolType > & node, automaton::NondeterministicZAutomaton < SymbolType, unsigned > & res, const SymbolType & subtreeWildcard, const SymbolType & subtreeGap, const SymbolType & nodeWildcard, unsigned & nextState ) { unsigned state = nextState ++; res.addState ( state ); - res.addTransition ( node.getData ( ), { }, state ); + if ( node.getData ( ) == nodeWildcard ) { + for ( const SymbolType & symbol : res.getInputAlphabet ( ) ) { + res.addTransition ( symbol, { }, state ); + } + } else { + res.addTransition ( node.getData ( ), { }, state ); + } for ( const ext::tree < SymbolType > & child : node.getChildren ( ) ) { res.addState ( nextState ); @@ -341,7 +356,7 @@ unsigned constructRecursivePattern ( const ext::tree < SymbolType > & node, auto res.addTransition ( state, { }, target ); } else { - unsigned result = constructRecursivePattern ( child, res, subtreeWildcard, subtreeGap, nextState ); + unsigned result = constructRecursivePattern ( child, res, subtreeWildcard, subtreeGap, nodeWildcard, nextState ); res.addTransition ( state, { result }, target ); } state = target; @@ -351,11 +366,12 @@ unsigned constructRecursivePattern ( const ext::tree < SymbolType > & node, auto } template < class SymbolType > -automaton::NondeterministicZAutomaton < SymbolType, unsigned > ExactPatternMatchingAutomaton::construct ( const tree::UnrankedPattern < SymbolType > & pattern ) { +automaton::NondeterministicZAutomaton < SymbolType, unsigned > ExactPatternMatchingAutomaton::construct ( const tree::UnrankedExtendedPattern < SymbolType > & pattern ) { ext::set < SymbolType > alphabet = pattern.getAlphabet ( ); alphabet.erase ( pattern.getSubtreeWildcard ( ) ); alphabet.erase ( pattern.getSubtreeGap ( ) ); + alphabet.erase ( pattern.getNodeWildcard ( ) ); automaton::NondeterministicZAutomaton < SymbolType, unsigned > res; res.setInputAlphabet ( alphabet ); @@ -369,7 +385,7 @@ automaton::NondeterministicZAutomaton < SymbolType, unsigned > ExactPatternMatch unsigned nextState = 1; - res.addFinalState ( constructRecursivePattern ( pattern.getContent ( ), res, pattern.getSubtreeWildcard ( ), pattern.getSubtreeGap ( ), nextState ) ); + res.addFinalState ( constructRecursivePattern ( pattern.getContent ( ), res, pattern.getSubtreeWildcard ( ), pattern.getSubtreeGap ( ), pattern.getNodeWildcard ( ), nextState ) ); return res; } diff --git a/alib2algo/src/arbology/exact/KnuthMorrisPratt.cpp b/alib2algo/src/arbology/exact/KnuthMorrisPratt.cpp index f8f89a96eb..39c8cc5ccd 100644 --- a/alib2algo/src/arbology/exact/KnuthMorrisPratt.cpp +++ b/alib2algo/src/arbology/exact/KnuthMorrisPratt.cpp @@ -13,6 +13,8 @@ auto KnuthMorrisPrattPrefixRankedTreePrefixRankedTree = registration::AbstractRe auto KnuthMorrisPrattPrefixRankedTreePrefixRankedPattern = registration::AbstractRegister < arbology::exact::KnuthMorrisPratt, ext::set < unsigned >, const tree::PrefixRankedTree < > &, const tree::PrefixRankedPattern < > & > ( arbology::exact::KnuthMorrisPratt::match ); +auto KnuthMorrisPrattPrefixRankedTreePrefixRankedExtendedPattern = registration::AbstractRegister < arbology::exact::KnuthMorrisPratt, ext::set < unsigned >, const tree::PrefixRankedTree < > &, const tree::PrefixRankedExtendedPattern < > & > ( arbology::exact::KnuthMorrisPratt::match ); + auto KnuthMorrisPrattPrefixRankedTreePrefixRankedNonlinearPattern = registration::AbstractRegister < arbology::exact::KnuthMorrisPratt, ext::set < unsigned >, const tree::PrefixRankedTree < > &, const tree::PrefixRankedNonlinearPattern < > & > ( arbology::exact::KnuthMorrisPratt::match ); } /* namespace */ diff --git a/alib2algo/src/arbology/exact/KnuthMorrisPratt.h b/alib2algo/src/arbology/exact/KnuthMorrisPratt.h index ef9ad948bc..75db9d77e5 100644 --- a/alib2algo/src/arbology/exact/KnuthMorrisPratt.h +++ b/alib2algo/src/arbology/exact/KnuthMorrisPratt.h @@ -6,6 +6,7 @@ #include <alib/vector> #include <tree/properties/BorderArray.h> +#include <tree/properties/BorderArrayNaive.h> #include <tree/properties/SubtreeJumpTable.h> #include <tree/properties/ExactSubtreeRepeatsNaive.h> #include <tree/exact/ForwardOccurrenceTest.h> @@ -15,6 +16,7 @@ #include <tree/ranked/PrefixRankedBarNonlinearPattern.h> #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedPattern.h> +#include <tree/ranked/PrefixRankedExtendedPattern.h> #include <tree/ranked/PrefixRankedNonlinearPattern.h> namespace arbology { @@ -43,6 +45,8 @@ public: template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ); template < class SymbolType > + static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedExtendedPattern < SymbolType > & pattern ); + template < class SymbolType > static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType > & pattern ); }; @@ -146,8 +150,13 @@ ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < S template < class SymbolType > ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ) { + return match ( subject, tree::PrefixRankedExtendedPattern < SymbolType > ( pattern ) ); +} + +template < class SymbolType > +ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedExtendedPattern < SymbolType > & pattern ) { ext::set < unsigned > occ; - ext::vector < size_t > construct = tree::properties::BorderArray::construct ( pattern ); + ext::vector < size_t > construct = tree::properties::BorderArrayNaive::construct ( pattern ); //measurements::start("Algorithm", measurements::Type::MAIN); diff --git a/alib2algo/src/tree/exact/ForwardOccurrenceTest.h b/alib2algo/src/tree/exact/ForwardOccurrenceTest.h index 5fdfee0dd5..a28a503d5d 100644 --- a/alib2algo/src/tree/exact/ForwardOccurrenceTest.h +++ b/alib2algo/src/tree/exact/ForwardOccurrenceTest.h @@ -4,6 +4,7 @@ #include <tree/ranked/PrefixRankedTree.h> #include <tree/ranked/PrefixRankedPattern.h> +#include <tree/ranked/PrefixRankedExtendedPattern.h> #include <tree/ranked/PrefixRankedNonlinearPattern.h> #include <tree/ranked/PrefixRankedBarTree.h> #include <tree/ranked/PrefixRankedBarPattern.h> @@ -27,6 +28,8 @@ public: template < class SymbolType > static size_t occurrence ( const PrefixRankedTree < SymbolType > & subject, const ext::vector < int > & subjectSubtreeJumpTable, const PrefixRankedPattern < SymbolType > & pattern, size_t subjectPosition ); template < class SymbolType > + static size_t occurrence ( const PrefixRankedTree < SymbolType > & subject, const ext::vector < int > & subjectSubtreeJumpTable, const PrefixRankedExtendedPattern < SymbolType > & pattern, size_t subjectPosition ); + template < class SymbolType > static size_t occurrence ( const PrefixRankedTree < SymbolType > & subject, const ext::vector < int > & subjectSubtreeJumpTable, const tree::PrefixRankedTree < unsigned > & repeats, const PrefixRankedNonlinearPattern < SymbolType > & pattern, size_t subjectPosition ); }; @@ -102,6 +105,11 @@ size_t ForwardOccurrenceTest::occurrence ( const PrefixRankedTree < SymbolType > template < class SymbolType > size_t ForwardOccurrenceTest::occurrence ( const PrefixRankedTree < SymbolType > & subject, const ext::vector < int > & subjectSubtreeJumpTable, const PrefixRankedPattern < SymbolType > & pattern, size_t subjectPosition ) { + return occurrence ( subject, subjectSubtreeJumpTable, tree::PrefixRankedExtendedPattern < SymbolType > ( pattern ), subjectPosition ); +} + +template < class SymbolType > +size_t ForwardOccurrenceTest::occurrence ( const PrefixRankedTree < SymbolType > & subject, const ext::vector < int > & subjectSubtreeJumpTable, const PrefixRankedExtendedPattern < SymbolType > & pattern, size_t subjectPosition ) { // offset to the subject unsigned offset = subjectPosition; size_t j = 0; @@ -110,6 +118,9 @@ size_t ForwardOccurrenceTest::occurrence ( const PrefixRankedTree < SymbolType > if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) // match of symbol offset = offset + 1; + else if ( pattern.getContent ( )[j].getRank ( ) == subject.getContent ( )[offset].getRank ( ) && pattern.getNodeWildcards ( ).contains ( pattern.getContent ( )[j] ) ) + // match node wildcard + offset = offset + 1; else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) // match of variable with subtree offset = subjectSubtreeJumpTable[offset]; diff --git a/tests/cppaql/arbologyTest.cpp b/tests/cppaql/arbologyTest.cpp index 2f66a279ed..bc08362199 100644 --- a/tests/cppaql/arbologyTest.cpp +++ b/tests/cppaql/arbologyTest.cpp @@ -8,11 +8,13 @@ enum class EGenerateType { PATTERN, SUBTREE, NONLINEAR_PATTERN, + EXTENDED_PATTERN, NONLINEAR_PATTERN_SINGLE_VAR, UNORDERED_PATTERN, UNORDERED_SUBTREE, - UNRANKED_PATTERN, + UNRANKED_PATTERN, + UNRANKED_EXTENDED_PATTERN, UNRANKED_SUBJECT, SUBJECT, }; @@ -25,6 +27,8 @@ std::ostream& operator << ( std::ostream& os, const EGenerateType& type ) { return ( os << "SUBTREE" ); case EGenerateType::NONLINEAR_PATTERN: return ( os << "NONLINEAR_PATTERN" ); + case EGenerateType::EXTENDED_PATTERN: + return ( os << "EXTENDED_PATTERN" ); case EGenerateType::NONLINEAR_PATTERN_SINGLE_VAR: return ( os << "NONLINEAR_PATTERN_SINGLE_VAR"); case EGenerateType::SUBJECT: @@ -36,6 +40,8 @@ std::ostream& operator << ( std::ostream& os, const EGenerateType& type ) { case EGenerateType::UNRANKED_PATTERN: return ( os << "UNRANKED_PATTERN" ); + case EGenerateType::UNRANKED_EXTENDED_PATTERN: + return ( os << "UNRANKED_EXTENDED_PATTERN" ); case EGenerateType::UNRANKED_SUBJECT: return ( os << "UNRANKED_SUBJECT" ); default: @@ -48,6 +54,7 @@ const size_t ALPHABET_SIZE = 3; const size_t SUBJECT_HEIGHT = 25; const size_t PATTERN_HEIGHT = 2; const size_t RANDOM_ITERATIONS = 20; +const size_t NODE_WILDCARD_PROBABILITY = 10; static std::string qGen ( const EGenerateType & type, int height, int nodes, int alphSize, const std::string & output ) { std::ostringstream oss; @@ -64,6 +71,9 @@ static std::string qGen ( const EGenerateType & type, int height, int nodes, int case EGenerateType::PATTERN: oss << "tree::generate::RandomRankedPatternFactory"; break; + case EGenerateType::EXTENDED_PATTERN: + oss << "tree::generate::RandomRankedExtendedPatternFactory"; + break; case EGenerateType::NONLINEAR_PATTERN: case EGenerateType::NONLINEAR_PATTERN_SINGLE_VAR: oss << "tree::generate::RandomRankedNonlinearPatternFactory"; @@ -77,6 +87,9 @@ static std::string qGen ( const EGenerateType & type, int height, int nodes, int case EGenerateType::UNRANKED_PATTERN: oss << "tree::generate::RandomUnrankedPatternFactory"; break; + case EGenerateType::UNRANKED_EXTENDED_PATTERN: + oss << "tree::generate::RandomUnrankedExtendedPatternFactory"; + break; } oss << " (int)" << height; @@ -86,6 +99,8 @@ static std::string qGen ( const EGenerateType & type, int height, int nodes, int if ( type == EGenerateType::NONLINEAR_PATTERN || type == EGenerateType::NONLINEAR_PATTERN_SINGLE_VAR ) oss << " (bool)" << ( type == EGenerateType::NONLINEAR_PATTERN_SINGLE_VAR ); oss << " (int)" << 2; /* rank */ + if ( type == EGenerateType::EXTENDED_PATTERN || type == EGenerateType::UNRANKED_EXTENDED_PATTERN ) + oss << " (double)" << NODE_WILDCARD_PROBABILITY; oss << "> $" << output; return oss.str ( ); } @@ -294,6 +309,42 @@ TEST_CASE ( "Arbology tests | unranked pattern", "[integration]" ) { // -------------------------------------------------------------------------------------------------------------------- +TEST_CASE ( "Arbology tests | unranked extended pattern", "[integration]" ) { + auto definition = GENERATE ( as < std::tuple < std::string, std::string, size_t > > ( ), + std::make_tuple ( "Exact Pattern Matching Automaton (Pattern Tree)", + "automaton::run::Occurrences <(arbology::exact::ExactPatternMatchingAutomaton <(tree::GeneralAlphabet::add $pattern <(tree::GeneralAlphabet::get $subject)) | automaton::simplify::ToArcFactored - | automaton::determinize::Determinize - ) $subject", 1000 ) ); + + auto exact = "arbology::exact::ExactPatternMatch $subject $pattern"; + auto pattern = EGenerateType::UNRANKED_EXTENDED_PATTERN; + + SECTION ( "Random tests" ) { + for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) { + CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) ); + runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::UNRANKED_SUBJECT, pattern, std::get < 2 > ( definition ) ); + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +TEST_CASE ( "Arbology tests | ranked extended pattern", "[integration]" ) { + auto definition = GENERATE ( as < std::tuple < std::string, std::string, size_t > > ( ), + std::make_tuple ( "Exact Knuth Morris Pratt (Pattern PrefixRankedExtended)", + "arbology::exact::KnuthMorrisPratt (PrefixRankedTree)$subject (PrefixRankedExtendedPattern)$pattern", 1000 ) ); + + auto exact = "arbology::exact::ExactPatternMatch $subject $pattern"; + auto pattern = EGenerateType::EXTENDED_PATTERN; + + SECTION ( "Random tests" ) { + for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) { + CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) ); + runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::SUBJECT, pattern, std::get < 2 > ( definition ) ); + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + TEST_CASE ( "Arbology tests | pattern ends ", "[integration]" ) { auto definition = GENERATE ( as < std::tuple < std::string, std::string, size_t > > ( ), std::make_tuple ( "Exact Pattern Matching Using Compressed Bit Vectors (PrefixRanked)", -- GitLab