Skip to content
Snippets Groups Projects
Commit ec97398e authored by Jan Trávníček's avatar Jan Trávníček
Browse files

query full and linear index for trees

parent 7ccb6b85
No related branches found
No related tags found
No related merge requests found
Pipeline #
/*
* FullAndLinearIndexPatterns.cpp
*
* Created on: 2. 1. 2017
* Author: Jan Travnicek
*/
#include "FullAndLinearIndexPatterns.h"
#include <tree/ranked/PrefixRankedPattern.h>
namespace arbology {
namespace query {
std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > & fullAndLinearIndex, const tree::RankedTreeWrapper & tree ) {
return dispatch ( fullAndLinearIndex, tree.getData ( ) );
}
auto fullAndLinearIndexPatternsPrefixRankedPattern = FullAndLinearIndexPatterns::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedPattern < > > ( FullAndLinearIndexPatterns::query );
} /* namespace query */
} /* namespace arbology */
/*
* FullAndLinearIndexPatterns.h
*
* Created on: 2. 1. 2017
* Author: Jan Travnicek
*/
#ifndef FULL_AND_LINEAR_INDEX_PATTERNS_H_
#define FULL_AND_LINEAR_INDEX_PATTERNS_H_
#include <indexes/arbology/FullAndLinearIndex.h>
#include <tree/RankedTreeWrapper.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <core/multipleDispatch.hpp>
#include <global/GlobalData.h>
#include <stringology/query/PositionHeapFactors.h>
namespace arbology {
namespace query {
/**
* Query full and linear index for given tree.
*
*/
class FullAndLinearIndexPatterns : public std::SingleDispatchFirstStaticParam < FullAndLinearIndexPatterns, std::set < unsigned >, const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > &, const tree::RankedTreeBase & > {
template < class SymbolType, class RankType >
static std::vector < std::pair < unsigned, unsigned > > FindOccurrences ( const indexes::stringology::PositionHeap < std::ranked_symbol < SymbolType, RankType > > & stringIndex, const std::vector < std::ranked_symbol < SymbolType, RankType > > & string ) {
std::vector < std::pair < unsigned, unsigned > > res;
for ( unsigned occurrence : stringology::query::PositionHeapFactors::query ( stringIndex, string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( string ) ) ) {
res.push_back ( std::make_pair ( occurrence, occurrence + string.size ( ) ) );
}
return res;
}
static std::vector < std::pair < unsigned, unsigned > > MergeOccurrences ( const std::vector < std::pair < unsigned, unsigned > > & prevOcc, const std::vector < std::pair < unsigned, unsigned > > & subOcc, std::vector < unsigned > & rev ) {
std::vector < std::pair < unsigned, unsigned > > res;
for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) {
rev [ occurrence.second ] = occurrence.first;
}
for ( const std::pair < unsigned, unsigned > & subOccurrence : subOcc ) {
if ( rev [ subOccurrence.first ] != ( unsigned ) -1 )
res.push_back ( std::make_pair ( rev [ subOccurrence.first ], subOccurrence.second ) );
}
for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) {
rev [ occurrence.second ] = ( unsigned ) -1;
}
return res;
}
public:
/**
* Query a suffix trie
* @param suffix trie to query
* @param tree tree to query by
* @return occurences of factors
*/
static std::set < unsigned > query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > & fullAndLinearIndex, const tree::RankedTreeWrapper & pattern );
template < class SymbolType, class RankType >
static std::set < unsigned > query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & compressedBitParallelTreeIndex, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
};
template < class SymbolType, class RankType >
std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & fullAndLinearIndex, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
std::vector < unsigned > rev ( fullAndLinearIndex.getString ( ).size ( ), ( unsigned ) -1 );
std::vector < std::vector < std::ranked_symbol < SymbolType, RankType > > > treePatternParts;
treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) );
for ( const std::ranked_symbol < SymbolType, RankType > & symbol : pattern.getContent ( ) ) {
if ( pattern.getSubtreeWildcard ( ) == symbol ) {
treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) );
} else {
treePatternParts.back ( ).push_back ( symbol );
}
}
std::vector < std::pair < unsigned, unsigned > > prevOcc = FindOccurrences ( fullAndLinearIndex.getStringIndex ( ) , treePatternParts [ 0 ] );
for ( unsigned i = 1; i < treePatternParts.size ( ); ++ i ) {
for ( std::pair < unsigned, unsigned > & occurrence : prevOcc )
occurrence.second = fullAndLinearIndex.getJumps ( ) [ occurrence.second ];
if ( ! treePatternParts [ i ].empty ( ) )
prevOcc = MergeOccurrences ( prevOcc, FindOccurrences ( fullAndLinearIndex.getStringIndex ( ), treePatternParts [ i ] ), rev );
}
std::set < unsigned > res;
for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) {
res.insert ( occurrence.first );
}
return res;
}
} /* namespace query */
} /* namespace arbology */
#endif /* FULL_AND_LINEAR_INDEX_PATTERNS_H_ */
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <stringology/query/BitParallelismFactors.h> #include <stringology/query/BitParallelismFactors.h>
#include <stringology/query/CompressedBitParallelismFactors.h> #include <stringology/query/CompressedBitParallelismFactors.h>
#include <arbology/query/CompressedBitParallelismPatterns.h> #include <arbology/query/CompressedBitParallelismPatterns.h>
#include <arbology/query/FullAndLinearIndexPatterns.h>
   
int main ( int argc, char * argv[] ) { int main ( int argc, char * argv[] ) {
try { try {
...@@ -36,6 +37,7 @@ int main ( int argc, char * argv[] ) { ...@@ -36,6 +37,7 @@ int main ( int argc, char * argv[] ) {
allowed.push_back ( "bitParallelismFactors" ); allowed.push_back ( "bitParallelismFactors" );
allowed.push_back ( "compressedBitParallelismFactors" ); allowed.push_back ( "compressedBitParallelismFactors" );
allowed.push_back ( "compressedBitParallelismPatterns" ); allowed.push_back ( "compressedBitParallelismPatterns" );
allowed.push_back ( "fullAndLinearIndexPatterns" );
TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); TCLAP::ValuesConstraint < std::string > allowedVals ( allowed );
   
TCLAP::ValueArg < std::string > query ( "q", "query", "Query index", false, "exactFactorMatch", & allowedVals ); TCLAP::ValueArg < std::string > query ( "q", "query", "Query index", false, "exactFactorMatch", & allowedVals );
...@@ -140,6 +142,19 @@ int main ( int argc, char * argv[] ) { ...@@ -140,6 +142,19 @@ int main ( int argc, char * argv[] ) {
measurements::end ( ); measurements::end ( );
measurements::start ( "Output write", measurements::Type::AUXILIARY ); measurements::start ( "Output write", measurements::Type::AUXILIARY );
   
alib::XmlDataFactory::toStdout ( res );
} else if ( query.getValue ( ) == "fullAndLinearIndexPatterns" ) {
indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > fullAndLinearIndex = alib::XmlDataFactory::fromTokens < indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > > ( sax::FromXMLParserHelper::parseInput ( indexInput ) );
tree::RankedTreeWrapper pattern = alib::XmlDataFactory::fromTokens < tree::RankedTreeWrapper > ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) );
measurements::end ( );
measurements::start ( "Algorithm", measurements::Type::MAIN );
std::set < unsigned > res = arbology::query::FullAndLinearIndexPatterns::query ( fullAndLinearIndex, pattern );
measurements::end ( );
measurements::start ( "Output write", measurements::Type::AUXILIARY );
alib::XmlDataFactory::toStdout ( res ); alib::XmlDataFactory::toStdout ( res );
} else { } else {
throw exception::CommonException ( "Invalid algorithm" ); throw exception::CommonException ( "Invalid algorithm" );
......
...@@ -410,6 +410,8 @@ function runTestNonlinearPatternEnds { ...@@ -410,6 +410,8 @@ function runTestNonlinearPatternEnds {
clearResults clearResults
} }
   
runTestPattern "Exact Pattern Matching Using Full And Linear Index (PrefixRanked)" "./aarbology2 -a fullAndLinearIndex -s <(./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q fullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size"
runTestPatternEnds "Exact Pattern Matching Using Compressed Bit Vectors (PrefixRanked)" "./aarbology2 -a compressedBitParallelIndex -s <(./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q compressedBitParallelismPatterns -i - -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" runTestPatternEnds "Exact Pattern Matching Using Compressed Bit Vectors (PrefixRanked)" "./aarbology2 -a compressedBitParallelIndex -s <(./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q compressedBitParallelismPatterns -i - -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size"
   
runTestPatternEnds "Exact Pattern Matching Automaton (PrefixRanked)" "./aarbology2 -a exactPatternMatchingAutomaton -p <(./acast2 -t PrefixRankedPattern -i <(./aaccess2 --tree alphabet -o add -i \"\$PATTERN_FILE\" -a <( ./aaccess2 --tree alphabet -o get -i \"\$SUBJECT_FILE\" ) ) ) | ./adeterminize2 | ./arun2 -t occurrences -a - -i <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" | ./acast2 -t LinearString ) | ./astat2 -p size" runTestPatternEnds "Exact Pattern Matching Automaton (PrefixRanked)" "./aarbology2 -a exactPatternMatchingAutomaton -p <(./acast2 -t PrefixRankedPattern -i <(./aaccess2 --tree alphabet -o add -i \"\$PATTERN_FILE\" -a <( ./aaccess2 --tree alphabet -o get -i \"\$SUBJECT_FILE\" ) ) ) | ./adeterminize2 | ./arun2 -t occurrences -a - -i <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" | ./acast2 -t LinearString ) | ./astat2 -p size"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment