diff --git a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp new file mode 100644 index 0000000000000000000000000000000000000000..736457627943350fd455062d8d695b2e4af5b4d6 --- /dev/null +++ b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.cpp @@ -0,0 +1,24 @@ +/* + * FullAndLinearIndexPatterns.cpp + * + * Created on: 2. 1. 2017 + * Author: Jan Travnicek + */ + +#include "FullAndLinearIndexPatterns.h" + +#include <tree/ranked/PrefixRankedPattern.h> + +namespace arbology { + +namespace query { + +std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > & fullAndLinearIndex, const tree::RankedTreeWrapper & tree ) { + return dispatch ( fullAndLinearIndex, tree.getData ( ) ); +} + +auto fullAndLinearIndexPatternsPrefixRankedPattern = FullAndLinearIndexPatterns::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedPattern < > > ( FullAndLinearIndexPatterns::query ); + +} /* namespace query */ + +} /* namespace arbology */ diff --git a/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h new file mode 100644 index 0000000000000000000000000000000000000000..58a507d2b574de553848fb7d7dbaaa86aaa56a19 --- /dev/null +++ b/alib2algo/src/arbology/query/FullAndLinearIndexPatterns.h @@ -0,0 +1,105 @@ +/* + * FullAndLinearIndexPatterns.h + * + * Created on: 2. 1. 2017 + * Author: Jan Travnicek + */ + +#ifndef FULL_AND_LINEAR_INDEX_PATTERNS_H_ +#define FULL_AND_LINEAR_INDEX_PATTERNS_H_ + +#include <indexes/arbology/FullAndLinearIndex.h> +#include <tree/RankedTreeWrapper.h> +#include <tree/ranked/PrefixRankedTree.h> +#include <core/multipleDispatch.hpp> +#include <global/GlobalData.h> + +#include <stringology/query/PositionHeapFactors.h> + +namespace arbology { + +namespace query { + +/** + * Query full and linear index for given tree. + * + */ + +class FullAndLinearIndexPatterns : public std::SingleDispatchFirstStaticParam < FullAndLinearIndexPatterns, std::set < unsigned >, const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > &, const tree::RankedTreeBase & > { + template < class SymbolType, class RankType > + static std::vector < std::pair < unsigned, unsigned > > FindOccurrences ( const indexes::stringology::PositionHeap < std::ranked_symbol < SymbolType, RankType > > & stringIndex, const std::vector < std::ranked_symbol < SymbolType, RankType > > & string ) { + std::vector < std::pair < unsigned, unsigned > > res; + for ( unsigned occurrence : stringology::query::PositionHeapFactors::query ( stringIndex, string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( string ) ) ) { + res.push_back ( std::make_pair ( occurrence, occurrence + string.size ( ) ) ); + } + return res; + } + + static std::vector < std::pair < unsigned, unsigned > > MergeOccurrences ( const std::vector < std::pair < unsigned, unsigned > > & prevOcc, const std::vector < std::pair < unsigned, unsigned > > & subOcc, std::vector < unsigned > & rev ) { + std::vector < std::pair < unsigned, unsigned > > res; + + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + rev [ occurrence.second ] = occurrence.first; + } + + for ( const std::pair < unsigned, unsigned > & subOccurrence : subOcc ) { + if ( rev [ subOccurrence.first ] != ( unsigned ) -1 ) + res.push_back ( std::make_pair ( rev [ subOccurrence.first ], subOccurrence.second ) ); + } + + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + rev [ occurrence.second ] = ( unsigned ) -1; + } + + return res; + } +public: + /** + * Query a suffix trie + * @param suffix trie to query + * @param tree tree to query by + * @return occurences of factors + */ + static std::set < unsigned > query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > & fullAndLinearIndex, const tree::RankedTreeWrapper & pattern ); + + template < class SymbolType, class RankType > + static std::set < unsigned > query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & compressedBitParallelTreeIndex, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ); +}; + +template < class SymbolType, class RankType > +std::set < unsigned > FullAndLinearIndexPatterns::query ( const indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & fullAndLinearIndex, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) { + std::vector < unsigned > rev ( fullAndLinearIndex.getString ( ).size ( ), ( unsigned ) -1 ); + + std::vector < std::vector < std::ranked_symbol < SymbolType, RankType > > > treePatternParts; + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + for ( const std::ranked_symbol < SymbolType, RankType > & symbol : pattern.getContent ( ) ) { + if ( pattern.getSubtreeWildcard ( ) == symbol ) { + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + } else { + treePatternParts.back ( ).push_back ( symbol ); + } + } + + std::vector < std::pair < unsigned, unsigned > > prevOcc = FindOccurrences ( fullAndLinearIndex.getStringIndex ( ) , treePatternParts [ 0 ] ); + + for ( unsigned i = 1; i < treePatternParts.size ( ); ++ i ) { + for ( std::pair < unsigned, unsigned > & occurrence : prevOcc ) + occurrence.second = fullAndLinearIndex.getJumps ( ) [ occurrence.second ]; + + if ( ! treePatternParts [ i ].empty ( ) ) + prevOcc = MergeOccurrences ( prevOcc, FindOccurrences ( fullAndLinearIndex.getStringIndex ( ), treePatternParts [ i ] ), rev ); + } + + std::set < unsigned > res; + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + res.insert ( occurrence.first ); + } + + return res; +} + +} /* namespace query */ + +} /* namespace arbology */ + +#endif /* FULL_AND_LINEAR_INDEX_PATTERNS_H_ */ diff --git a/aquery2/src/aquery.cpp b/aquery2/src/aquery.cpp index b5ec252f630917c719ec1614777c8c5f3a4d1ea2..e825f1a46ea1e05f1418ddc06fc831fa2efead27 100644 --- a/aquery2/src/aquery.cpp +++ b/aquery2/src/aquery.cpp @@ -21,6 +21,7 @@ #include <stringology/query/BitParallelismFactors.h> #include <stringology/query/CompressedBitParallelismFactors.h> #include <arbology/query/CompressedBitParallelismPatterns.h> +#include <arbology/query/FullAndLinearIndexPatterns.h> int main ( int argc, char * argv[] ) { try { @@ -36,6 +37,7 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "bitParallelismFactors" ); allowed.push_back ( "compressedBitParallelismFactors" ); allowed.push_back ( "compressedBitParallelismPatterns" ); + allowed.push_back ( "fullAndLinearIndexPatterns" ); TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); TCLAP::ValueArg < std::string > query ( "q", "query", "Query index", false, "exactFactorMatch", & allowedVals ); @@ -140,6 +142,19 @@ int main ( int argc, char * argv[] ) { measurements::end ( ); measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( res ); + } else if ( query.getValue ( ) == "fullAndLinearIndexPatterns" ) { + indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > fullAndLinearIndex = alib::XmlDataFactory::fromTokens < indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > > ( sax::FromXMLParserHelper::parseInput ( indexInput ) ); + tree::RankedTreeWrapper pattern = alib::XmlDataFactory::fromTokens < tree::RankedTreeWrapper > ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + std::set < unsigned > res = arbology::query::FullAndLinearIndexPatterns::query ( fullAndLinearIndex, pattern ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( res ); } else { throw exception::CommonException ( "Invalid algorithm" ); diff --git a/tests.aarbology.sh b/tests.aarbology.sh index 4188340f3c1356297d5c62d9dc5a7a6d2e695bad..75bb84263f9065d1a0c96d5bd34970a542f23831 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -410,6 +410,8 @@ function runTestNonlinearPatternEnds { clearResults } +runTestPattern "Exact Pattern Matching Using Full And Linear Index (PrefixRanked)" "./aarbology2 -a fullAndLinearIndex -s <(./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q fullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" + runTestPatternEnds "Exact Pattern Matching Using Compressed Bit Vectors (PrefixRanked)" "./aarbology2 -a compressedBitParallelIndex -s <(./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q compressedBitParallelismPatterns -i - -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" runTestPatternEnds "Exact Pattern Matching Automaton (PrefixRanked)" "./aarbology2 -a exactPatternMatchingAutomaton -p <(./acast2 -t PrefixRankedPattern -i <(./aaccess2 --tree alphabet -o add -i \"\$PATTERN_FILE\" -a <( ./aaccess2 --tree alphabet -o get -i \"\$SUBJECT_FILE\" ) ) ) | ./adeterminize2 | ./arun2 -t occurrences -a - -i <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" | ./acast2 -t LinearString ) | ./astat2 -p size"