From 55b7c2c6bde6849fe85e60a36264f1be937ecf64 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Fri, 14 Apr 2017 20:26:51 +0200 Subject: [PATCH] full and linear index ranked bar notation --- .../FullAndLinearIndexConstruction.cpp | 1 + .../indexing/FullAndLinearIndexConstruction.h | 9 +++++ .../query/FullAndLinearIndexPatterns.cpp | 1 + .../query/FullAndLinearIndexPatterns.h | 39 ++++++++++++++++++- tests.aarbology.sh | 2 + 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp index 9203d13fb3..e29d4d88be 100644 --- a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp +++ b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp @@ -16,6 +16,7 @@ indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, } auto fullAndLinearIndexConstructionPrefixRankedTree = FullAndLinearIndexConstruction::RegistratorWrapper < indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, tree::PrefixRankedTree < > > ( FullAndLinearIndexConstruction::construct ); +auto fullAndLinearIndexConstructionPrefixRankedBarTree = FullAndLinearIndexConstruction::RegistratorWrapper < indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, tree::PrefixRankedBarTree < > > ( FullAndLinearIndexConstruction::construct ); } /* namespace indexing */ diff --git a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h index ff9bcddfea..ddf6bc72d1 100644 --- a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h +++ b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h @@ -11,6 +11,7 @@ #include <indexes/arbology/FullAndLinearIndex.h> #include <tree/RankedTreeWrapper.h> #include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedBarTree.h> #include <core/multipleDispatch.hpp> #include <tree/properties/SubtreeJumpTable.h> #include <stringology/indexing/PositionHeapNaive.h> @@ -35,6 +36,9 @@ public: template < class SymbolType, class RankType > static indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > construct ( const tree::PrefixRankedTree < SymbolType, RankType > & tree ); + + template < class SymbolType, class RankType > + static indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree ); }; template < class SymbolType, class RankType > @@ -42,6 +46,11 @@ indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankTyp return indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > ( stringology::indexing::PositionHeapNaive::construct ( string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( w ) ), tree::properties::SubtreeJumpTable::compute ( w ) ); } +template < class SymbolType, class RankType > +indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > FullAndLinearIndexConstruction::construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & w ) { + return indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > ( stringology::indexing::PositionHeapNaive::construct ( string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( w ) ), tree::properties::SubtreeJumpTable::compute ( w ) ); +} + } /* namespace indexing */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp index 7364576279..dde3553975 100644 --- a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp +++ b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp @@ -18,6 +18,7 @@ std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbolog } auto fullAndLinearIndexPatternsPrefixRankedPattern = FullAndLinearIndexPatterns::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedPattern < > > ( FullAndLinearIndexPatterns::query ); +auto fullAndLinearIndexPatternsPrefixRankedBarPattern = FullAndLinearIndexPatterns::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarPattern < > > ( FullAndLinearIndexPatterns::query ); } /* namespace query */ diff --git a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h index 58a507d2b5..fb6e605437 100644 --- a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h +++ b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h @@ -10,7 +10,8 @@ #include <indexes/arbology/FullAndLinearIndex.h> #include <tree/RankedTreeWrapper.h> -#include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedPattern.h> +#include <tree/ranked/PrefixRankedBarPattern.h> #include <core/multipleDispatch.hpp> #include <global/GlobalData.h> @@ -64,6 +65,9 @@ public: template < class SymbolType, class RankType > static std::set < unsigned > query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & compressedBitParallelTreeIndex, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ); + + template < class SymbolType, class RankType > + static std::set < unsigned > query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & compressedBitParallelTreeIndex, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ); }; template < class SymbolType, class RankType > @@ -98,6 +102,39 @@ std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbolog return res; } +template < class SymbolType, class RankType > +std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & fullAndLinearIndex, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { + std::vector < unsigned > rev ( fullAndLinearIndex.getString ( ).size ( ), ( unsigned ) -1 ); + + std::vector < std::vector < std::ranked_symbol < SymbolType, RankType > > > treePatternParts; + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + for ( typename std::vector < std::ranked_symbol < SymbolType, RankType > >::const_iterator symbolIter = pattern.getContent ( ).begin ( ); symbolIter != pattern.getContent ( ).end ( ); ++symbolIter ) { + if ( pattern.getSubtreeWildcard ( ) == * symbolIter ) { + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + ++ symbolIter; + } else { + treePatternParts.back ( ).push_back ( * symbolIter ); + } + } + + std::vector < std::pair < unsigned, unsigned > > prevOcc = FindOccurrences ( fullAndLinearIndex.getStringIndex ( ) , treePatternParts [ 0 ] ); + + for ( unsigned i = 1; i < treePatternParts.size ( ); ++ i ) { + for ( std::pair < unsigned, unsigned > & occurrence : prevOcc ) + occurrence.second = fullAndLinearIndex.getJumps ( ) [ occurrence.second ]; + + if ( ! treePatternParts [ i ].empty ( ) ) + prevOcc = MergeOccurrences ( prevOcc, FindOccurrences ( fullAndLinearIndex.getStringIndex ( ), treePatternParts [ i ] ), rev ); + } + + std::set < unsigned > res; + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + res.insert ( occurrence.first ); + } + + return res; +} + } /* namespace query */ } /* namespace arbology */ diff --git a/tests.aarbology.sh b/tests.aarbology.sh index 7bb7ce56e4..f3918ca479 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -410,6 +410,8 @@ function runTestNonlinearPatternEnds { clearResults } +runTestPattern "Exact Pattern Matching Using Full And Linear Index (PrefixRankedBar)" "./aarbology2 -a fullAndLinearIndex -s <(./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q fullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" + runTestPattern "Exact Pattern Matching Using Full And Linear Index (PrefixRanked)" "./aarbology2 -a fullAndLinearIndex -s <(./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q fullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" runTestPattern "Exact Pattern Matching Using Compressed Bit Vectors (PrefixRankedBar)" "./aarbology2 -a compressedBitParallelIndex -s <(./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q compressedBitParallelismPatterns -i - -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" -- GitLab