From 20ad7965a1c1644842c2c7487104d12611d4bed0 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Tue, 22 Aug 2017 16:11:49 +0200 Subject: [PATCH] add Nonlinear Compressed Bit Parallel Index and query --- aarbology2/src/aarbology.cpp | 14 + ...CompressedBitParallelIndexConstruction.cpp | 23 ++ ...arCompressedBitParallelIndexConstruction.h | 64 ++++ .../NonlinearFullAndLinearIndexConstruction.h | 6 +- ...linearCompressedBitParallelismPatterns.cpp | 25 ++ ...onlinearCompressedBitParallelismPatterns.h | 121 ++++++++ ...onlinearCompressedBitParallelTreeIndex.cpp | 15 + .../NonlinearCompressedBitParallelTreeIndex.h | 277 ++++++++++++++++++ .../ranked/PrefixRankedBarNonlinearPattern.h | 2 +- aquery2/src/aquery.cpp | 15 + tests.aarbology.sh | 2 + 11 files changed, 561 insertions(+), 3 deletions(-) create mode 100644 alib2algo/src/arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.cpp create mode 100644 alib2algo/src/arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.h create mode 100644 alib2algo/src/arbology/query/NonlinearCompressedBitParallelismPatterns.cpp create mode 100644 alib2algo/src/arbology/query/NonlinearCompressedBitParallelismPatterns.h create mode 100644 alib2data/src/indexes/arbology/NonlinearCompressedBitParallelTreeIndex.cpp create mode 100644 alib2data/src/indexes/arbology/NonlinearCompressedBitParallelTreeIndex.h diff --git a/aarbology2/src/aarbology.cpp b/aarbology2/src/aarbology.cpp index 5ff33174f8..fc4183bc02 100644 --- a/aarbology2/src/aarbology.cpp +++ b/aarbology2/src/aarbology.cpp @@ -34,6 +34,7 @@ #include <tree/NormalizeTreeLabels.h> #include <arbology/transform/BeginToEndIndex.h> #include <arbology/indexing/CompressedBitParallelIndexConstruction.h> +#include <arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.h> #include <arbology/indexing/FullAndLinearIndexConstruction.h> #include <arbology/indexing/NonlinearFullAndLinearIndexConstruction.h> @@ -57,6 +58,7 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "exactTreePatternAutomaton" ); allowed.push_back ( "exactNonlinearTreePatternAutomaton" ); allowed.push_back ( "compressedBitParallelIndex" ); + allowed.push_back ( "nonlinearCompressedBitParallelIndex" ); allowed.push_back ( "fullAndLinearIndex" ); allowed.push_back ( "nonlinearFullAndLinearIndex" ); @@ -301,6 +303,18 @@ int main ( int argc, char * argv[] ) { measurements::start ( "Output write", measurements::Type::AUXILIARY ); alib::XmlDataFactory::toStdout ( compressedBitParallelIndex ); + } else if ( algorithm.getValue ( ) == "nonlinearCompressedBitParallelIndex" ) { + tree::RankedTreeWrapper subject = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > > nonlinearCompressedBitParallelIndex = arbology::indexing::NonlinearCompressedBitParallelIndexConstruction::construct ( subject ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + + alib::XmlDataFactory::toStdout ( nonlinearCompressedBitParallelIndex ); } else if ( algorithm.getValue ( ) == "fullAndLinearIndex" ) { tree::RankedTreeWrapper subject = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) ); diff --git a/alib2algo/src/arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.cpp b/alib2algo/src/arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.cpp new file mode 100644 index 0000000000..1af36e1608 --- /dev/null +++ b/alib2algo/src/arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.cpp @@ -0,0 +1,23 @@ +/* + * NonlinearCompressedBitParallelIndexConstruction.cpp + * + * Created on: 22. 8. 2017 + * Author: Jan Travnicek + */ + +#include "NonlinearCompressedBitParallelIndexConstruction.h" +#include <registration/AlgoRegistration.hpp> + +namespace arbology { + +namespace indexing { + +indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > > NonlinearCompressedBitParallelIndexConstruction::construct ( const tree::RankedTreeWrapper & tree ) { + return dispatch ( tree.getData ( ) ); +} + +auto nonlinearcompressedBitParallelIndexConstructionPrefixRankedBarTree = registration::OverloadRegister < NonlinearCompressedBitParallelIndexConstruction, indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > >, tree::PrefixRankedBarTree < > > ( NonlinearCompressedBitParallelIndexConstruction::construct ); + +} /* namespace indexing */ + +} /* namespace arbology */ diff --git a/alib2algo/src/arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.h b/alib2algo/src/arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.h new file mode 100644 index 0000000000..5de3233204 --- /dev/null +++ b/alib2algo/src/arbology/indexing/NonlinearCompressedBitParallelIndexConstruction.h @@ -0,0 +1,64 @@ +/* + * NonlinearCompressedBitParallelIndexConstruction.h + * + * Created on: 22. 8. 2017 + * Author: Jan Travnicek + */ + +#ifndef ARBOLOGY_NONLINEAR_COMPRESSED_BIT_PARALLEL_INDEX_CONSTRUCTION_H_ +#define ARBOLOGY_NONLINEAR_COMPRESSED_BIT_PARALLEL_INDEX_CONSTRUCTION_H_ + +#include <indexes/arbology/NonlinearCompressedBitParallelTreeIndex.h> +#include <tree/RankedTreeWrapper.h> +#include <tree/ranked/PrefixRankedBarTree.h> +#include <core/multipleDispatch.hpp> +#include <exception/CommonException.h> +#include <tree/properties/SubtreeJumpTable.h> +#include <tree/properties/ExactSubtreeRepeatsNaive.h> + +namespace arbology { + +namespace indexing { + +/** + * Constructs a nonlinear compressed bit parallel index for given tree. + * + */ + +class NonlinearCompressedBitParallelIndexConstruction : public alib::SingleDispatch < NonlinearCompressedBitParallelIndexConstruction, indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > >, const tree::RankedTreeBase & > { +public: + /** + * Creates nonlinear compressed bit parallel index for trees + * @param tree tree to construct the index for + * @return the index + */ + static indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > > construct ( const tree::RankedTreeWrapper & tree ); + + template < class SymbolType, class RankType > + static indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < SymbolType, RankType > > construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree ); +}; + +template < class SymbolType, class RankType > +indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < SymbolType, RankType > > NonlinearCompressedBitParallelIndexConstruction::construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & w ) { + ext::map < common::ranked_symbol < SymbolType, RankType >, common::SparseBoolVector > res; + + for ( const common::ranked_symbol < SymbolType, RankType > & symbol : w.getAlphabet ( ) ) + res [ symbol ].resize ( w.getContent ( ).size ( ) ); + + for ( unsigned i = 0; i < w.getContent ( ).size ( ); ++i ) + res [ w.getContent ( ) [ i ] ] [ i ] = true; + + ext::vector < common::ranked_symbol < unsigned, RankType > > content = tree::properties::ExactSubtreeRepeatsNaive::repeats ( w ).getContent ( ); + + ext::vector < unsigned > repeats; + for ( const common::ranked_symbol < unsigned, RankType > & symbol : content ) + repeats.push_back ( symbol.getSymbol ( ) ); + + return indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < SymbolType, RankType > > ( w.getAlphabet ( ), res, tree::properties::SubtreeJumpTable::compute ( w ), repeats ); +} + +} /* namespace indexing */ + +} /* namespace arbology */ + +#endif /* ARBOLOGY_NONLINEAR_COMPRESSED_BIT_PARALLEL_INDEX_CONSTRUCTION_H_ */ diff --git a/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.h b/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.h index b2fda17a68..5ccd247613 100644 --- a/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.h +++ b/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.h @@ -43,8 +43,9 @@ public: template < class SymbolType, class RankType > indexes::arbology::NonlinearFullAndLinearIndex < common::ranked_symbol < SymbolType, RankType > > NonlinearFullAndLinearIndexConstruction::construct ( const tree::PrefixRankedTree < SymbolType, RankType > & w ) { - ext::vector < unsigned > repeats; ext::vector < common::ranked_symbol < unsigned, RankType > > content = tree::properties::ExactSubtreeRepeatsNaive::repeats ( w ).getContent ( ); + + ext::vector < unsigned > repeats; for ( const common::ranked_symbol < unsigned, RankType > & symbol : content ) repeats.push_back ( symbol.getSymbol ( ) ); @@ -53,8 +54,9 @@ indexes::arbology::NonlinearFullAndLinearIndex < common::ranked_symbol < SymbolT template < class SymbolType, class RankType > indexes::arbology::NonlinearFullAndLinearIndex < common::ranked_symbol < SymbolType, RankType > > NonlinearFullAndLinearIndexConstruction::construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & w ) { - ext::vector < unsigned > repeats; ext::vector < common::ranked_symbol < unsigned, RankType > > content = tree::properties::ExactSubtreeRepeatsNaive::repeats ( w ).getContent ( ); + + ext::vector < unsigned > repeats; for ( const common::ranked_symbol < unsigned, RankType > & symbol : content ) repeats.push_back ( symbol.getSymbol ( ) ); diff --git a/alib2algo/src/arbology/query/NonlinearCompressedBitParallelismPatterns.cpp b/alib2algo/src/arbology/query/NonlinearCompressedBitParallelismPatterns.cpp new file mode 100644 index 0000000000..22ffc4d6b4 --- /dev/null +++ b/alib2algo/src/arbology/query/NonlinearCompressedBitParallelismPatterns.cpp @@ -0,0 +1,25 @@ +/* + * NonlinearCompressedBitParallelismPatterns.cpp + * + * Created on: 2. 1. 2017 + * Author: Jan Travnicek + */ + +#include "NonlinearCompressedBitParallelismPatterns.h" + +#include <tree/ranked/PrefixRankedPattern.h> +#include <registration/AlgoRegistration.hpp> + +namespace arbology { + +namespace query { + +ext::set < unsigned > NonlinearCompressedBitParallelismPatterns::query ( const indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > > & nonlinearCompressedBitParallelIndex, const tree::RankedTreeWrapper & tree ) { + return dispatch ( nonlinearCompressedBitParallelIndex, tree.getData ( ) ); +} + +auto NonlinearCompressedBitParallelismPatternsPrefixRankedBarPattern = registration::OverloadRegister < NonlinearCompressedBitParallelismPatterns, ext::set < unsigned >, tree::PrefixRankedBarNonlinearPattern < > > ( NonlinearCompressedBitParallelismPatterns::query ); + +} /* namespace query */ + +} /* namespace arbology */ diff --git a/alib2algo/src/arbology/query/NonlinearCompressedBitParallelismPatterns.h b/alib2algo/src/arbology/query/NonlinearCompressedBitParallelismPatterns.h new file mode 100644 index 0000000000..d4bda33f39 --- /dev/null +++ b/alib2algo/src/arbology/query/NonlinearCompressedBitParallelismPatterns.h @@ -0,0 +1,121 @@ +/* + * NonlinearCompressedBitParallelismPatterns.h + * + * Created on: 2. 1. 2017 + * Author: Jan Travnicek + */ + +#ifndef NONLINEAR_COMPRESSED_BIT_PARALLELISM_PATTERNS_H_ +#define NONLINEAR_COMPRESSED_BIT_PARALLELISM_PATTERNS_H_ + +#include <indexes/arbology/NonlinearCompressedBitParallelTreeIndex.h> +#include <tree/RankedTreeWrapper.h> +#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> +#include <core/multipleDispatch.hpp> +#include <global/GlobalData.h> + +namespace arbology { + +namespace query { + +/** + * Query nonlinearCompressed bit parallel index for given tree. + * + */ + +class NonlinearCompressedBitParallelismPatterns : public alib::SingleDispatchFirstStaticParam < NonlinearCompressedBitParallelismPatterns, ext::set < unsigned >, const indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > > &, const tree::RankedTreeBase & > { + +public: + /** + * Query a suffix trie + * @param suffix trie to query + * @param tree tree to query by + * @return occurences of factors + */ + static ext::set < unsigned > query ( const indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > > & nonlinearCompressedBitParallelIndex, const tree::RankedTreeWrapper & pattern ); + + template < class SymbolType, class RankType > + static ext::set < unsigned > query ( const indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < SymbolType, RankType > > & nonlinearCompressedBitParallelTreeIndex, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ); +}; + +template < class SymbolType, class RankType > +bool include ( unsigned i, const ext::vector < unsigned > & repeats, const ext::vector < int > & jumps, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { + ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; + + // clear the current state of variable to subtree repeat + variablesSetting.clear(); + + // index to the pattern + int j = pattern.getContent ( ).size ( ) - 1; + + while ( j >= 0 ) { + if ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) { + i = jumps[i]; + j = j - 2; + + // check nonlinear variable + if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j + 1 ] ) ) { + auto setting = variablesSetting.find ( pattern.getContent ( )[ j + 1 ] ); + + if ( setting != variablesSetting.end ( ) && repeats [ i + 1 ] != setting->second ) + break; + + variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j + 1 ], repeats [ i + 1 ] ) ); + } + } else { + // match of symbol + i = i - 1; + j = j - 1; + } + } + + return j == -1; + +} + +template < class SymbolType, class RankType > +ext::set < unsigned > NonlinearCompressedBitParallelismPatterns::query ( const indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < SymbolType, RankType > > & nonlinearCompressedBitParallelIndex, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { + auto symbolIter = pattern.getContent ( ).begin ( ); + + typename ext::map < common::ranked_symbol < SymbolType, RankType >, common::SparseBoolVector >::const_iterator symbolVectorIter = nonlinearCompressedBitParallelIndex.getData ( ).find ( * symbolIter ); + + if ( symbolVectorIter == nonlinearCompressedBitParallelIndex.getData ( ).end ( ) ) + return { }; + + common::SparseBoolVector indexVector = symbolVectorIter->second; + + for ( ++ symbolIter; symbolIter != pattern.getContent ( ).end ( ); ++ symbolIter ) { + if ( * symbolIter == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( * symbolIter ) ) { + common::SparseBoolVector newVector; + newVector.resize ( indexVector.size ( ) ); + + for ( unsigned i : ( indexVector << 1 ) ) + newVector [ nonlinearCompressedBitParallelIndex.getJumps ( ) [ i ] - 1 ] = true; + + indexVector = newVector; + + ++ symbolIter; + } else { + symbolVectorIter = nonlinearCompressedBitParallelIndex.getData ( ).find ( * symbolIter ); + + if ( symbolVectorIter == nonlinearCompressedBitParallelIndex.getData ( ).end ( ) ) + return { }; + + indexVector = ( indexVector << 1 ) & symbolVectorIter->second; + } + } + + ext::set < unsigned > res; + + for ( unsigned i : indexVector ) + if ( include ( i, nonlinearCompressedBitParallelIndex.getRepeats ( ), nonlinearCompressedBitParallelIndex.getJumps ( ), pattern ) ) + res.insert ( i + 1 ); + + return res; +} + +} /* namespace query */ + +} /* namespace arbology */ + +#endif /* NONLINEAR_COMPRESSED_BIT_PARALLELISM_PATTERNS_H_ */ diff --git a/alib2data/src/indexes/arbology/NonlinearCompressedBitParallelTreeIndex.cpp b/alib2data/src/indexes/arbology/NonlinearCompressedBitParallelTreeIndex.cpp new file mode 100644 index 0000000000..9ae75396ac --- /dev/null +++ b/alib2data/src/indexes/arbology/NonlinearCompressedBitParallelTreeIndex.cpp @@ -0,0 +1,15 @@ +/* + * NonlinearCompressedBitParallelTreeIndex.cpp + * + * Created on: 22. 8. 2017 + * Author: Jan Travnicek + */ + +#include "NonlinearCompressedBitParallelTreeIndex.h" +#include <registration/TypeRegistration.hpp> + +namespace alib { + +auto arbologyNonlinearCompressedBitParallelIndexType = registration::TypeRegister < alib::Object, indexes::arbology::NonlinearCompressedBitParallelTreeIndex < > > ( ); + +} /* namespace alib */ diff --git a/alib2data/src/indexes/arbology/NonlinearCompressedBitParallelTreeIndex.h b/alib2data/src/indexes/arbology/NonlinearCompressedBitParallelTreeIndex.h new file mode 100644 index 0000000000..c05066f071 --- /dev/null +++ b/alib2data/src/indexes/arbology/NonlinearCompressedBitParallelTreeIndex.h @@ -0,0 +1,277 @@ +/* + * NonlinearCompressedBitParallelTreeIndex.h + * + * Created on: 22. 8. 2017 + * Author: Jan Travnicek + */ + +#ifndef ARBOLOGY_NONLINEAR_COMPRESSED_BIT_PARALLEL_INDEX_H_ +#define ARBOLOGY_NONLINEAR_COMPRESSED_BIT_PARALLEL_INDEX_H_ + +#include <string> +#include <iostream> +#include <sstream> + +#include <common/DefaultSymbolType.h> + +#include <core/components.hpp> +#include <exception/CommonException.h> + +#include <object/UniqueObject.h> +#include <object/ObjectBase.h> + +#include <sax/FromXMLParserHelper.h> +#include <core/xmlApi.hpp> + +#include <container/ObjectsSet.h> +#include <container/ObjectsMap.h> +#include <container/ObjectsVector.h> +#include <common/SparseBoolVector.hpp> +#include <primitive/Bool.h> + +#include <alphabet/common/SymbolNormalize.h> + +namespace indexes { + +namespace arbology { + +class GeneralAlphabet; + +/** + * Represents regular expression parsed from the XML. Regular expression is stored + * as a tree of RegExpElement. + */ +template < class SymbolType = DefaultSymbolType > +class NonlinearCompressedBitParallelTreeIndex final : public alib::ObjectBase, public alib::Components < NonlinearCompressedBitParallelTreeIndex < SymbolType >, SymbolType, ext::tuple < GeneralAlphabet >, ext::tuple < > > { +protected: + ext::map < SymbolType, common::SparseBoolVector > m_vectors; + ext::vector < int > m_jumpTable; + ext::vector < unsigned > m_repeats; + +public: + /** + * @copydoc SuffixTrieNode::clone() const + */ + virtual ObjectBase * clone ( ) const; + + /** + * @copydoc SuffixTrieNode::plunder() const + */ + virtual ObjectBase * plunder ( ) &&; + + explicit NonlinearCompressedBitParallelTreeIndex ( ext::set < SymbolType > alphabet, ext::map < SymbolType, common::SparseBoolVector > vectors, ext::vector < int > jumpTable, ext::vector < unsigned > repeats ); + + /** + * @return Root node of the trie + */ + const ext::map < SymbolType, common::SparseBoolVector > & getData ( ) const; + + /** + * @return subtree jump table + */ + const ext::vector < int > & getJumps ( ) const; + + const ext::vector < unsigned > & getRepeats ( ) const; + + ext::vector < SymbolType > getString ( ) const; + + const ext::set < SymbolType > & getAlphabet ( ) const { + return this->template accessComponent < GeneralAlphabet > ( ).get ( ); + } + + /** + * Sets the nonlinearcompressedBit vector for given symbol + * @param tree root node to set + */ + void setNonlinearCompressedBitVectorForSymbol ( SymbolType symbol, common::SparseBoolVector data ); + + /** + * Removes symbol from the alphabet of symbol available in the regular expression + * @param symbol removed symbol from the alphabet + */ + bool removeSymbolFromAlphabet ( const SymbolType & symbol ) { + return this->template accessComponent < GeneralAlphabet > ( ).remove ( symbol ); + } + + /** + * Prints XML representation of the tree to the output stream. + * @param out output stream to which print the tree + * @param tree tree to print + */ + virtual void operator >>( std::ostream & out ) const; + + virtual int compare ( const ObjectBase & other ) const { + if ( ext::type_index ( typeid ( * this ) ) == ext::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); + + return ext::type_index ( typeid ( * this ) ) - ext::type_index ( typeid ( other ) ); + } + + virtual int compare ( const NonlinearCompressedBitParallelTreeIndex & other ) const; + + virtual explicit operator std::string ( ) const; + + static const std::string & getXmlTagName ( ) { + static std::string xmlTagName = "NonlinearCompressedBitParallelTreeIndex"; + + return xmlTagName; + } + + static NonlinearCompressedBitParallelTreeIndex parse ( ext::deque < sax::Token >::iterator & input ); + + void compose ( ext::deque < sax::Token > & out ) const; + + virtual alib::ObjectBase * inc ( ) &&; + + typedef NonlinearCompressedBitParallelTreeIndex < > normalized_type; + + virtual ObjectBase * normalize ( ) && { + if ( typeid ( NonlinearCompressedBitParallelTreeIndex < > ) == typeid ( NonlinearCompressedBitParallelTreeIndex < SymbolType > ) ) + return this; + + ext::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) ); + ext::map < DefaultSymbolType, common::SparseBoolVector > vectors; + for ( std::pair < SymbolType, common::SparseBoolVector > && vector : ext::make_moveable_map ( m_vectors ) ) + vectors.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( vector.first ) ), std::move ( vector.second ) ) ); + + return new NonlinearCompressedBitParallelTreeIndex < > ( std::move ( alphabet ), std::move ( vectors ), std::move ( m_jumpTable ), std::move ( m_repeats ) ); + } +}; + +} /* namespace arbology */ + +} /* namespace indexes */ + +namespace indexes { + +namespace arbology { + +template < class SymbolType > +NonlinearCompressedBitParallelTreeIndex < SymbolType >::NonlinearCompressedBitParallelTreeIndex ( ext::set < SymbolType > alphabet, ext::map < SymbolType, common::SparseBoolVector > vectors, ext::vector < int > jumpTable, ext::vector < unsigned > repeats ) : alib::Components < NonlinearCompressedBitParallelTreeIndex, SymbolType, ext::tuple < GeneralAlphabet >, ext::tuple < > > ( ext::make_tuple ( std::move ( alphabet ) ), ext::tuple < > ( ) ), m_vectors ( std::move ( vectors ) ), m_jumpTable ( std::move ( jumpTable ) ), m_repeats ( std::move ( repeats ) ) { +} + +template < class SymbolType > +alib::ObjectBase * NonlinearCompressedBitParallelTreeIndex < SymbolType >::clone ( ) const { + return new NonlinearCompressedBitParallelTreeIndex ( * this ); +} + +template < class SymbolType > +alib::ObjectBase * NonlinearCompressedBitParallelTreeIndex < SymbolType >::plunder ( ) && { + return new NonlinearCompressedBitParallelTreeIndex ( std::move ( * this ) ); +} + +template < class SymbolType > +const ext::map < SymbolType, common::SparseBoolVector > & NonlinearCompressedBitParallelTreeIndex < SymbolType >::getData ( ) const { + return m_vectors; +} + +template < class SymbolType > +const ext::vector < int > & NonlinearCompressedBitParallelTreeIndex < SymbolType >::getJumps ( ) const { + return m_jumpTable; +} + +template < class SymbolType > +const ext::vector < unsigned > & NonlinearCompressedBitParallelTreeIndex < SymbolType >::getRepeats ( ) const { + return m_repeats; +} + +template < class SymbolType > +ext::vector < SymbolType > NonlinearCompressedBitParallelTreeIndex < SymbolType >::getString ( ) const { + ext::vector < SymbolType > res; + + unsigned index = 0; + + do { + for ( const std::pair < const SymbolType, common::SparseBoolVector > & nonlinearcompressedBitVector : m_vectors ) + if ( nonlinearcompressedBitVector.second.size ( ) > index && nonlinearcompressedBitVector.second [ index ] ) { + res.push_back ( nonlinearcompressedBitVector.first ); + continue; + } + + } while ( res.size ( ) == index ++ + 1 ); + + return res; +} + +template < class SymbolType > +void NonlinearCompressedBitParallelTreeIndex < SymbolType >::setNonlinearCompressedBitVectorForSymbol ( SymbolType symbol, common::SparseBoolVector data ) { + this->m_vectors [ symbol ] = std::move ( data ); +} + +template < class SymbolType > +void NonlinearCompressedBitParallelTreeIndex < SymbolType >::operator >>( std::ostream & out ) const { + out << "(NonlinearCompressedBitParallelTreeIndex " << this->m_vectors << ", " << m_jumpTable << ")"; +} + +template < class SymbolType > +int NonlinearCompressedBitParallelTreeIndex < SymbolType >::compare ( const NonlinearCompressedBitParallelTreeIndex & other ) const { + auto first = ext::tie ( getData ( ), getAlphabet ( ), getJumps ( ), getRepeats ( ) ); + auto second = ext::tie ( other.getData ( ), other.getAlphabet ( ), other.getJumps ( ), getRepeats ( ) ); + + static ext::compare < decltype ( first ) > comp; + + return comp ( first, second ); +} + +template < class SymbolType > +NonlinearCompressedBitParallelTreeIndex < SymbolType >::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +template < class SymbolType > +NonlinearCompressedBitParallelTreeIndex < SymbolType > NonlinearCompressedBitParallelTreeIndex < SymbolType >::parse ( ext::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, NonlinearCompressedBitParallelTreeIndex::getXmlTagName ( ) ); + ext::set < SymbolType > alphabet = alib::xmlApi < ext::set < SymbolType > >::parse ( input ); + ext::map < SymbolType, common::SparseBoolVector > data = alib::xmlApi < ext::map < SymbolType, common::SparseBoolVector > >::parse ( input ); + ext::vector < int > jumps = alib::xmlApi < ext::vector < int > >::parse ( input ); + ext::vector < unsigned > repeats = alib::xmlApi < ext::vector < unsigned > >::parse ( input ); + + NonlinearCompressedBitParallelTreeIndex < SymbolType > res ( std::move ( alphabet ), std::move ( data ), std::move ( jumps ), std::move ( repeats ) ); + + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, NonlinearCompressedBitParallelTreeIndex::getXmlTagName ( ) ); + return res; +} + +template < class SymbolType > +void NonlinearCompressedBitParallelTreeIndex < SymbolType >::compose ( ext::deque < sax::Token > & out ) const { + out.emplace_back ( NonlinearCompressedBitParallelTreeIndex::getXmlTagName ( ), sax::Token::TokenType::START_ELEMENT ); + alib::xmlApi < ext::set < SymbolType > >::compose ( out, getAlphabet ( ) ); + alib::xmlApi < ext::map < SymbolType, common::SparseBoolVector > >::compose ( out, getData ( ) ); + alib::xmlApi < ext::vector < int > >::compose ( out, getJumps ( ) ); + alib::xmlApi < ext::vector < unsigned > >::compose ( out, getRepeats ( ) ); + out.emplace_back ( NonlinearCompressedBitParallelTreeIndex::getXmlTagName ( ), sax::Token::TokenType::END_ELEMENT ); +} + +template < class SymbolType > +alib::ObjectBase * NonlinearCompressedBitParallelTreeIndex < SymbolType >::inc ( ) && { + return new alib::UniqueObject ( alib::Object ( std::move ( * this ) ), primitive::Integer ( 0 ) ); +} + +} /* namespace arbology */ + +} /* namespace indexes */ + +namespace alib { + +template < class SymbolType > +class ComponentConstraint < indexes::arbology::NonlinearCompressedBitParallelTreeIndex < SymbolType >, SymbolType, indexes::arbology::GeneralAlphabet > { +public: + static bool used ( const indexes::arbology::NonlinearCompressedBitParallelTreeIndex < SymbolType > & index, const SymbolType & symbol ) { + const ext::map < SymbolType, common::SparseBoolVector > & content = index.getData ( ); + + return content.find ( symbol ) != content.end ( ); + } + + static bool available ( const indexes::arbology::NonlinearCompressedBitParallelTreeIndex < SymbolType > &, const SymbolType & ) { + return true; + } + + static void valid ( const indexes::arbology::NonlinearCompressedBitParallelTreeIndex < SymbolType > &, const SymbolType & ) { + } + +}; + +} /* namespace alib */ + +#endif /* ARBOLOGY_NONLINEAR_COMPRESSED_BIT_PARALLEL_INDEX_H_ */ diff --git a/alib2data/src/tree/ranked/PrefixRankedBarNonlinearPattern.h b/alib2data/src/tree/ranked/PrefixRankedBarNonlinearPattern.h index 7873ce71ea..585533d7f0 100644 --- a/alib2data/src/tree/ranked/PrefixRankedBarNonlinearPattern.h +++ b/alib2data/src/tree/ranked/PrefixRankedBarNonlinearPattern.h @@ -161,7 +161,7 @@ PrefixRankedBarNonlinearPattern < SymbolType, RankType >::PrefixRankedBarNonline } template < class SymbolType, class RankType > -PrefixRankedBarNonlinearPattern < SymbolType, RankType >::PrefixRankedBarNonlinearPattern ( ext::set < common::ranked_symbol < SymbolType, RankType > > bars, common::ranked_symbol < SymbolType, RankType > variablesBar, common::ranked_symbol < SymbolType, RankType > subtreeWildcard, ext::set < common::ranked_symbol < SymbolType, RankType > > nonlinearVariables, ext::vector < common::ranked_symbol < SymbolType, RankType > > data ) : PrefixRankedBarNonlinearPattern ( bars, variablesBar, subtreeWildcard, nonlinearVariables, ext::set < common::ranked_symbol < SymbolType, RankType > > ( data.begin ( ), data.end ( ) ) + bars + ext::set < common::ranked_symbol < SymbolType, RankType > > { variablesBar, subtreeWildcard }, data ) { +PrefixRankedBarNonlinearPattern < SymbolType, RankType >::PrefixRankedBarNonlinearPattern ( ext::set < common::ranked_symbol < SymbolType, RankType > > bars, common::ranked_symbol < SymbolType, RankType > variablesBar, common::ranked_symbol < SymbolType, RankType > subtreeWildcard, ext::set < common::ranked_symbol < SymbolType, RankType > > nonlinearVariables, ext::vector < common::ranked_symbol < SymbolType, RankType > > data ) : PrefixRankedBarNonlinearPattern ( bars, variablesBar, subtreeWildcard, nonlinearVariables, ext::set < common::ranked_symbol < SymbolType, RankType > > ( data.begin ( ), data.end ( ) ) + bars + ext::set < common::ranked_symbol < SymbolType, RankType > > { variablesBar, subtreeWildcard } + nonlinearVariables, data ) { } template < class SymbolType, class RankType > diff --git a/aquery2/src/aquery.cpp b/aquery2/src/aquery.cpp index 158ac92135..b95b2a1e18 100644 --- a/aquery2/src/aquery.cpp +++ b/aquery2/src/aquery.cpp @@ -21,6 +21,7 @@ #include <stringology/query/BitParallelismFactors.h> #include <stringology/query/CompressedBitParallelismFactors.h> #include <arbology/query/CompressedBitParallelismPatterns.h> +#include <arbology/query/NonlinearCompressedBitParallelismPatterns.h> #include <arbology/query/FullAndLinearIndexPatterns.h> #include <arbology/query/NonlinearFullAndLinearIndexPatterns.h> @@ -38,6 +39,7 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "bitParallelismFactors" ); allowed.push_back ( "compressedBitParallelismFactors" ); allowed.push_back ( "compressedBitParallelismPatterns" ); + allowed.push_back ( "nonlinearCompressedBitParallelismPatterns" ); allowed.push_back ( "fullAndLinearIndexPatterns" ); allowed.push_back ( "nonlinearFullAndLinearIndexPatterns" ); TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); @@ -144,6 +146,19 @@ int main ( int argc, char * argv[] ) { measurements::end ( ); measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( res ); + } else if ( query.getValue ( ) == "nonlinearCompressedBitParallelismPatterns" ) { + indexes::arbology::NonlinearCompressedBitParallelTreeIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > > nonlinearCompressedBitParallelTreeIndex = alib::XmlDataFactory::fromTokens ( sax::FromXMLParserHelper::parseInput ( indexInput ) ); + tree::RankedTreeWrapper pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + ext::set < unsigned > res = arbology::query::NonlinearCompressedBitParallelismPatterns::query ( nonlinearCompressedBitParallelTreeIndex, pattern ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( res ); } else if ( query.getValue ( ) == "fullAndLinearIndexPatterns" ) { indexes::arbology::FullAndLinearIndex < common::ranked_symbol < DefaultSymbolType, DefaultRankType > > fullAndLinearIndex = alib::XmlDataFactory::fromTokens ( sax::FromXMLParserHelper::parseInput ( indexInput ) ); diff --git a/tests.aarbology.sh b/tests.aarbology.sh index e18aa76100..54638e2655 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -410,6 +410,8 @@ function runTestNonlinearPatternEnds { clearResults } +runTestNonlinearPattern "Exact Nonlinear Pattern Matching Using Compressed Bit Vectors (PrefixRankedBar)" "./aarbology2 -a nonlinearCompressedBitParallelIndex -s <(./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q nonlinearCompressedBitParallelismPatterns -i - -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" + runTestNonlinearPattern "Exact Nonlinear Pattern Matching Using Full And Linear Index (PrefixRanked)" "./aarbology2 -a nonlinearFullAndLinearIndex -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q nonlinearFullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedNonlinearPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" runTestNonlinearPattern "Exact Nonlinear Pattern Matching Using Full And Linear Index (PrefixRankedBar)" "./aarbology2 -a nonlinearFullAndLinearIndex -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q nonlinearFullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" -- GitLab