Commit c96791be authored by Ing. Jan Trávníček's avatar Ing. Jan Trávníček Committed by Ing. Jan Trávníček

algo: unranked tree pattern matching using ZAutomata

parent 90e1d874
......@@ -19,4 +19,8 @@ auto ExactPatternMatchingAutomatonUnorderedRankedTree = registration::AbstractRe
auto ExactPatternMatchingAutomatonUnorderedRankedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatchingAutomaton, automaton::UnorderedNFTA < DefaultSymbolType, unsigned >, const tree::UnorderedRankedPattern < > & > ( arbology::exact::ExactPatternMatchingAutomaton::construct );
auto ExactPatternMatchingAutomatonUnrankedTree = registration::AbstractRegister < arbology::exact::ExactPatternMatchingAutomaton, automaton::NondeterministicZAutomaton < DefaultSymbolType, unsigned >, const tree::UnrankedTree < > & > ( arbology::exact::ExactPatternMatchingAutomaton::construct );
auto ExactPatternMatchingAutomatonUnrankedPattern = registration::AbstractRegister < arbology::exact::ExactPatternMatchingAutomaton, automaton::NondeterministicZAutomaton < DefaultSymbolType, unsigned >, const tree::UnrankedPattern < > & > ( arbology::exact::ExactPatternMatchingAutomaton::construct );
} /* namespace */
......@@ -13,11 +13,14 @@
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/unranked/UnrankedPattern.h>
#include <automaton/PDA/InputDrivenNPDA.h>
#include <automaton/PDA/VisiblyPushdownNPDA.h>
#include <automaton/PDA/NPDA.h>
#include <automaton/TA/NFTA.h>
#include <automaton/TA/UnorderedNFTA.h>
#include <automaton/TA/NondeterministicZAutomaton.h>
#include <common/ranked_symbol.hpp>
#include <alphabet/BottomOfTheStackSymbol.h>
......@@ -56,6 +59,12 @@ public:
template < class SymbolType >
static automaton::UnorderedNFTA < SymbolType, unsigned > construct ( const tree::UnorderedRankedPattern < SymbolType > & pattern );
template < class SymbolType >
static automaton::NondeterministicZAutomaton < SymbolType, unsigned > construct ( const tree::UnrankedTree < SymbolType > & pattern );
template < class SymbolType >
static automaton::NondeterministicZAutomaton < SymbolType, unsigned > construct ( const tree::UnrankedPattern < SymbolType > & pattern );
};
template < class SymbolType >
......@@ -311,6 +320,67 @@ automaton::UnorderedNFTA < SymbolType, unsigned > ExactPatternMatchingAutomaton:
return res;
}
template < class SymbolType >
automaton::NondeterministicZAutomaton < SymbolType, unsigned > ExactPatternMatchingAutomaton::construct ( const tree::UnrankedTree < SymbolType > & pattern ) {
return ExactSubtreeMatchingAutomaton::construct ( pattern );
}
template < class SymbolType >
unsigned constructRecursivePattern ( const ext::tree < SymbolType > & node, automaton::NondeterministicZAutomaton < SymbolType, unsigned > & res, const SymbolType & subtreeWildcard, const SymbolType & subtreeGap, unsigned & nextState ) {
if ( node.getData ( ) == subtreeWildcard ) {
return 0;
} else {
unsigned state = nextState ++;
res.addState ( state );
res.addTransition ( node.getData ( ), ext::vector < ext::variant < SymbolType, unsigned > > ( ), state );
for ( const ext::tree < SymbolType > & child : node.getChildren ( ) ) {
res.addState ( nextState );
if ( child.getData ( ) == subtreeGap ) {
res.addTransition ( state, ext::vector < ext::variant < SymbolType, unsigned > > ( 1, 0u ), state );
res.addTransition ( state, ext::vector < ext::variant < SymbolType, unsigned > > ( ), nextState );
state = nextState ++;
} else {
unsigned target = nextState ++;
res.addState ( target );
res.addTransition ( state, ext::vector < ext::variant < SymbolType, unsigned > > ( 1, constructRecursivePattern ( child, res, subtreeWildcard, subtreeGap, nextState ) ), target );
state = target;
}
}
return state;
}
}
template < class SymbolType >
automaton::NondeterministicZAutomaton < SymbolType, unsigned > ExactPatternMatchingAutomaton::construct ( const tree::UnrankedPattern < SymbolType > & pattern ) {
ext::set < SymbolType > alphabet = pattern.getAlphabet ( );
alphabet.erase ( pattern.getSubtreeWildcard ( ) );
alphabet.erase ( pattern.getSubtreeGap ( ) );
automaton::NondeterministicZAutomaton < SymbolType, unsigned > res;
res.setInputAlphabet ( alphabet );
res.addState ( 0 );
for ( const SymbolType & symbol : res.getInputAlphabet ( ) ) {
ext::vector < ext::variant < SymbolType, unsigned > > states;
res.addTransition ( symbol, states, 0 );
states = ext::vector < ext::variant < SymbolType, unsigned > > ( 1, 0u );
res.addTransition ( 0u, states, 0 );
}
unsigned nextState = 1;
res.addFinalState ( constructRecursivePattern ( pattern.getContent ( ), res, pattern.getSubtreeWildcard ( ), pattern.getSubtreeGap ( ), nextState ) );
return res;
}
} /* namespace exact */
} /* namespace arbology */
......
......@@ -11,4 +11,6 @@ auto ExactSubtreeMatchingAutomatonRankedTree = registration::AbstractRegister <
auto ExactSubtreeMatchingAutomatonUnorderedRankedTree = registration::AbstractRegister < arbology::exact::ExactSubtreeMatchingAutomaton, automaton::UnorderedNFTA < DefaultSymbolType, unsigned >, const tree::UnorderedRankedTree < > & > ( arbology::exact::ExactSubtreeMatchingAutomaton::construct );
auto ExactSubtreeMatchingAutomatonUnrankedTree = registration::AbstractRegister < arbology::exact::ExactSubtreeMatchingAutomaton, automaton::NondeterministicZAutomaton < DefaultSymbolType, unsigned >, const tree::UnrankedTree < > & > ( arbology::exact::ExactSubtreeMatchingAutomaton::construct );
} /* namespace */
......@@ -4,10 +4,12 @@
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/RankedTree.h>
#include <tree/ranked/UnorderedRankedTree.h>
#include <tree/unranked/UnrankedTree.h>
#include <automaton/PDA/InputDrivenNPDA.h>
#include <automaton/TA/NFTA.h>
#include <automaton/TA/UnorderedNFTA.h>
#include <automaton/TA/NondeterministicZAutomaton.h>
#include <alphabet/BottomOfTheStackSymbol.h>
......@@ -33,6 +35,8 @@ public:
template < class SymbolType >
static automaton::UnorderedNFTA < SymbolType, unsigned > construct ( const tree::UnorderedRankedTree < SymbolType > & pattern );
template < class SymbolType >
static automaton::NondeterministicZAutomaton < SymbolType, unsigned > construct ( const tree::UnrankedTree < SymbolType > & pattern );
};
template < class SymbolType >
......@@ -139,6 +143,29 @@ automaton::UnorderedNFTA < SymbolType, unsigned > ExactSubtreeMatchingAutomaton:
return res;
}
template < class SymbolType >
unsigned constructRecursive ( const ext::tree < SymbolType > & node, automaton::NondeterministicZAutomaton < SymbolType, unsigned > & res, unsigned & nextState ) {
ext::vector < ext::variant < SymbolType, unsigned > > states;
for ( const ext::tree < SymbolType > & child : node.getChildren ( ) )
states.push_back ( constructRecursive ( child, res, nextState ) );
unsigned state = nextState++;
res.addState ( state );
res.addTransition ( node.getData ( ), states, state );
return state;
}
template < class SymbolType >
automaton::NondeterministicZAutomaton < SymbolType, unsigned > ExactSubtreeMatchingAutomaton::construct ( const tree::UnrankedTree < SymbolType > & pattern ) {
automaton::NondeterministicZAutomaton < SymbolType, unsigned > res;
res.setInputAlphabet ( pattern.getAlphabet ( ) );
unsigned nextState = 0;
res.addFinalState ( constructRecursive ( pattern.getContent ( ), res, nextState ) );
return res;
}
} /* namespace exact */
} /* namespace arbology */
......
......@@ -11,7 +11,9 @@ enum class EGenerateType {
NONLINEAR_PATTERN_SINGLE_VAR,
UNORDERED_PATTERN,
UNORDERED_SUBTREE,
UNRANKED_PATTERN,
UNRANKED_SUBJECT,
SUBJECT,
};
......@@ -31,6 +33,11 @@ std::ostream& operator << ( std::ostream& os, const EGenerateType& type ) {
return ( os << "UNORDERED_PATTERN" );
case EGenerateType::UNORDERED_SUBTREE:
return ( os << "UNORDERED_SUBTREE" );
case EGenerateType::UNRANKED_PATTERN:
return ( os << "UNRANKED_PATTERN" );
case EGenerateType::UNRANKED_SUBJECT:
return ( os << "UNRANKED_SUBJECT" );
default:
return ( os << "Unhandled EGenerateType" );
}
......@@ -51,6 +58,9 @@ static std::string qGen ( const EGenerateType & type, int height, int nodes, int
case EGenerateType::SUBTREE:
oss << "tree::generate::RandomRankedTreeFactory";
break;
case EGenerateType::UNRANKED_SUBJECT:
oss << "tree::generate::RandomUnrankedTreeFactory";
break;
case EGenerateType::PATTERN:
oss << "tree::generate::RandomRankedPatternFactory";
break;
......@@ -64,6 +74,9 @@ static std::string qGen ( const EGenerateType & type, int height, int nodes, int
case EGenerateType::UNORDERED_SUBTREE:
oss << "(UnorderedRankedTree) tree::generate::RandomRankedTreeFactory";
break;
case EGenerateType::UNRANKED_PATTERN:
oss << "tree::generate::RandomUnrankedPatternFactory";
break;
}
oss << " (int)" << height;
......@@ -104,10 +117,10 @@ void runTest ( const std::string & exactPipeline, const std::string &pipeline, c
TimeoutAqlTest ( 10s, qs );
}
void runRandom ( const std::string & exactPipeline, const std::string &pipeline, const EGenerateType &patternType, const size_t& subjSize ) {
void runRandom ( const std::string & exactPipeline, const std::string &pipeline, const EGenerateType &subjectType, const EGenerateType &patternType, const size_t& subjSize ) {
ext::vector < std::string > qs = {
qGen ( patternType, PATTERN_HEIGHT, PATTERN_SIZE, ALPHABET_SIZE, "pattern" ),
qGen ( EGenerateType::SUBJECT, SUBJECT_HEIGHT, subjSize, ALPHABET_SIZE, "subject" ),
qGen ( subjectType, SUBJECT_HEIGHT, subjSize, ALPHABET_SIZE, "subject" ),
"execute " + exactPipeline + " > $res1",
"execute " + pipeline + " > $res2",
"quit compare::PrimitiveCompare <(stats::SizeStat $res1) <(stats::SizeStat $res2)",
......@@ -150,7 +163,7 @@ TEST_CASE ( "Arbology tests | nonlinear pattern", "[integration]" ) {
SECTION ( "Random tests" ) {
for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) {
CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::SUBJECT, pattern, std::get < 2 > ( definition ) );
}
}
}
......@@ -176,7 +189,7 @@ TEST_CASE ( "Arbology tests | nonlinear pattern ends", "[integration]" ) {
SECTION ( "Random tests" ) {
for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) {
CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::SUBJECT, pattern, std::get < 2 > ( definition ) );
}
}
}
......@@ -229,7 +242,7 @@ TEST_CASE ( "Arbology tests | pattern", "[integration]" ) {
SECTION ( "Random tests" ) {
for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) {
CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::SUBJECT, pattern, std::get < 2 > ( definition ) );
}
}
}
......@@ -254,7 +267,25 @@ TEST_CASE ( "Arbology tests | unordered pattern", "[integration]" ) {
SECTION ( "Random tests" ) {
for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) {
CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::SUBJECT, pattern, std::get < 2 > ( definition ) );
}
}
}
// --------------------------------------------------------------------------------------------------------------------
TEST_CASE ( "Arbology tests | unranked pattern", "[integration]" ) {
auto definition = GENERATE ( as < std::tuple < std::string, std::string, size_t > > ( ),
std::make_tuple ( "Exact Pattern Matching Automaton (Pattern Tree)",
"automaton::run::Occurrences <(arbology::exact::ExactPatternMatchingAutomaton <(tree::GeneralAlphabet::add $pattern <(tree::GeneralAlphabet::get $subject)) | automaton::simplify::ToArcFactored - | automaton::determinize::Determinize - ) $subject", 1000 ) );
auto exact = "arbology::exact::ExactPatternMatch $subject $pattern";
auto pattern = EGenerateType::UNRANKED_PATTERN;
SECTION ( "Random tests" ) {
for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) {
CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::UNRANKED_SUBJECT, pattern, std::get < 2 > ( definition ) );
}
}
}
......@@ -286,7 +317,7 @@ TEST_CASE ( "Arbology tests | pattern ends ", "[integration]" ) {
SECTION ( "Random tests" ) {
for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) {
CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::SUBJECT, pattern, std::get < 2 > ( definition ) );
}
}
}
......@@ -316,7 +347,7 @@ TEST_CASE ( "Arbology tests | subtree", "[integration]" ) {
SECTION ( "Random tests" ) {
for ( size_t i = 0; i < RANDOM_ITERATIONS; i++ ) {
CAPTURE ( std::get < 0 > ( definition ), std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), pattern, std::get < 2 > ( definition ) );
runRandom ( exact, std::get < 1 > ( definition ), EGenerateType::SUBJECT, pattern, std::get < 2 > ( definition ) );
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment