diff --git a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp index 9203d13fb3b2ad1822423cb846dadd26b73f2d47..e29d4d88befb9f1cf31b90edf3419afc9a505127 100644 --- a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp +++ b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp @@ -16,6 +16,7 @@ indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, } auto fullAndLinearIndexConstructionPrefixRankedTree = FullAndLinearIndexConstruction::RegistratorWrapper < indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, tree::PrefixRankedTree < > > ( FullAndLinearIndexConstruction::construct ); +auto fullAndLinearIndexConstructionPrefixRankedBarTree = FullAndLinearIndexConstruction::RegistratorWrapper < indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, tree::PrefixRankedBarTree < > > ( FullAndLinearIndexConstruction::construct ); } /* namespace indexing */ diff --git a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h index ff9bcddfea65e6754295cf5d549c759a8b5d3d54..ddf6bc72d173683e2525a3afb3ae553666a060bf 100644 --- a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h +++ b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h @@ -11,6 +11,7 @@ #include <indexes/arbology/FullAndLinearIndex.h> #include <tree/RankedTreeWrapper.h> #include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedBarTree.h> #include <core/multipleDispatch.hpp> #include <tree/properties/SubtreeJumpTable.h> #include <stringology/indexing/PositionHeapNaive.h> @@ -35,6 +36,9 @@ public: template < class SymbolType, class RankType > static indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > construct ( const tree::PrefixRankedTree < SymbolType, RankType > & tree ); + + template < class SymbolType, class RankType > + static indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree ); }; template < class SymbolType, class RankType > @@ -42,6 +46,11 @@ indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankTyp return indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > ( stringology::indexing::PositionHeapNaive::construct ( string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( w ) ), tree::properties::SubtreeJumpTable::compute ( w ) ); } +template < class SymbolType, class RankType > +indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > FullAndLinearIndexConstruction::construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & w ) { + return indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > ( stringology::indexing::PositionHeapNaive::construct ( string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( w ) ), tree::properties::SubtreeJumpTable::compute ( w ) ); +} + } /* namespace indexing */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp index 736457627943350fd455062d8d695b2e4af5b4d6..dde35539758eee43d1822ca8660b4823ce9a8995 100644 --- a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp +++ b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp @@ -18,6 +18,7 @@ std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbolog } auto fullAndLinearIndexPatternsPrefixRankedPattern = FullAndLinearIndexPatterns::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedPattern < > > ( FullAndLinearIndexPatterns::query ); +auto fullAndLinearIndexPatternsPrefixRankedBarPattern = FullAndLinearIndexPatterns::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarPattern < > > ( FullAndLinearIndexPatterns::query ); } /* namespace query */ diff --git a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h index 58a507d2b574de553848fb7d7dbaaa86aaa56a19..fb6e6054379cff5eedf9d1b2a9db3e215f720d13 100644 --- a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h +++ b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h @@ -10,7 +10,8 @@ #include <indexes/arbology/FullAndLinearIndex.h> #include <tree/RankedTreeWrapper.h> -#include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedPattern.h> +#include <tree/ranked/PrefixRankedBarPattern.h> #include <core/multipleDispatch.hpp> #include <global/GlobalData.h> @@ -64,6 +65,9 @@ public: template < class SymbolType, class RankType > static std::set < unsigned > query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & compressedBitParallelTreeIndex, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ); + + template < class SymbolType, class RankType > + static std::set < unsigned > query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & compressedBitParallelTreeIndex, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ); }; template < class SymbolType, class RankType > @@ -98,6 +102,39 @@ std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbolog return res; } +template < class SymbolType, class RankType > +std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & fullAndLinearIndex, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { + std::vector < unsigned > rev ( fullAndLinearIndex.getString ( ).size ( ), ( unsigned ) -1 ); + + std::vector < std::vector < std::ranked_symbol < SymbolType, RankType > > > treePatternParts; + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + for ( typename std::vector < std::ranked_symbol < SymbolType, RankType > >::const_iterator symbolIter = pattern.getContent ( ).begin ( ); symbolIter != pattern.getContent ( ).end ( ); ++symbolIter ) { + if ( pattern.getSubtreeWildcard ( ) == * symbolIter ) { + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + ++ symbolIter; + } else { + treePatternParts.back ( ).push_back ( * symbolIter ); + } + } + + std::vector < std::pair < unsigned, unsigned > > prevOcc = FindOccurrences ( fullAndLinearIndex.getStringIndex ( ) , treePatternParts [ 0 ] ); + + for ( unsigned i = 1; i < treePatternParts.size ( ); ++ i ) { + for ( std::pair < unsigned, unsigned > & occurrence : prevOcc ) + occurrence.second = fullAndLinearIndex.getJumps ( ) [ occurrence.second ]; + + if ( ! treePatternParts [ i ].empty ( ) ) + prevOcc = MergeOccurrences ( prevOcc, FindOccurrences ( fullAndLinearIndex.getStringIndex ( ), treePatternParts [ i ] ), rev ); + } + + std::set < unsigned > res; + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + res.insert ( occurrence.first ); + } + + return res; +} + } /* namespace query */ } /* namespace arbology */ diff --git a/tests.aarbology.sh b/tests.aarbology.sh index 7bb7ce56e4f6504e5632604aa063ce0bb68a3316..f3918ca479e207d16868f1c9a8e58195b66cc26b 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -410,6 +410,8 @@ function runTestNonlinearPatternEnds { clearResults } +runTestPattern "Exact Pattern Matching Using Full And Linear Index (PrefixRankedBar)" "./aarbology2 -a fullAndLinearIndex -s <(./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q fullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" + runTestPattern "Exact Pattern Matching Using Full And Linear Index (PrefixRanked)" "./aarbology2 -a fullAndLinearIndex -s <(./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q fullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" runTestPattern "Exact Pattern Matching Using Compressed Bit Vectors (PrefixRankedBar)" "./aarbology2 -a compressedBitParallelIndex -s <(./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q compressedBitParallelismPatterns -i - -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size"