diff --git a/aarbology2/src/aarbology.cpp b/aarbology2/src/aarbology.cpp index bbf729c66810c7c49ff8c7ecbcee8c2e837f7fb2..d3c492cdeb302204180bf1f635b57e2844873c99 100644 --- a/aarbology2/src/aarbology.cpp +++ b/aarbology2/src/aarbology.cpp @@ -33,6 +33,7 @@ #include <arbology/transform/BeginToEndIndex.h> #include <arbology/indexing/CompressedBitParallelIndexConstruction.h> #include <arbology/indexing/FullAndLinearIndexConstruction.h> +#include <arbology/indexing/NonlinearFullAndLinearIndexConstruction.h> int main ( int argc, char * argv[] ) { try { @@ -53,10 +54,11 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "exactSubtreeAutomaton" ); allowed.push_back ( "exactTreePatternAutomaton" ); allowed.push_back ( "exactNonlinearTreePatternAutomaton" ); - allowed.push_back ( "exactSubtreeRepeatsNaive" ); allowed.push_back ( "compressedBitParallelIndex" ); allowed.push_back ( "fullAndLinearIndex" ); + allowed.push_back ( "nonlinearFullAndLinearIndex" ); + allowed.push_back ( "exactSubtreeRepeatsNaive" ); allowed.push_back ( "badCharacterShiftTable" ); TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); @@ -283,6 +285,18 @@ int main ( int argc, char * argv[] ) { measurements::start ( "Output write", measurements::Type::AUXILIARY ); alib::XmlDataFactory::toStdout ( fullAndLinearIndex ); + } else if ( algorithm.getValue ( ) == "nonlinearFullAndLinearIndex" ) { + tree::RankedTreeWrapper subject = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > nonlinearFullAndLinearIndex = arbology::indexing::NonlinearFullAndLinearIndexConstruction::construct ( subject ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + + alib::XmlDataFactory::toStdout ( nonlinearFullAndLinearIndex ); } else if ( algorithm.getValue ( ) == "badCharacterShiftTable" ) { tree::RankedTreeWrapper pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); diff --git a/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.cpp b/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..63fbe9709630eb34a3206c4910cbda221d8a5e36 --- /dev/null +++ b/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.cpp @@ -0,0 +1,23 @@ +/* + * NonlinearFullAndLinearIndexConstruction.cpp + * + * Created on: 13. Apr 2017 + * Author: Jan Travnicek + */ + +#include "NonlinearFullAndLinearIndexConstruction.h" + +namespace arbology { + +namespace indexing { + +indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > NonlinearFullAndLinearIndexConstruction::construct ( const tree::RankedTreeWrapper & tree ) { + return dispatch ( tree.getData ( ) ); +} + +auto nonlinearFullAndLinearIndexConstructionPrefixRankedTree = NonlinearFullAndLinearIndexConstruction::RegistratorWrapper < indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, tree::PrefixRankedTree < > > ( NonlinearFullAndLinearIndexConstruction::construct ); +auto nonlinearFullAndLinearIndexConstructionPrefixRankedBarTree = NonlinearFullAndLinearIndexConstruction::RegistratorWrapper < indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, tree::PrefixRankedBarTree < > > ( NonlinearFullAndLinearIndexConstruction::construct ); + +} /* namespace indexing */ + +} /* namespace arbology */ diff --git a/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.h b/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.h new file mode 100644 index 0000000000000000000000000000000000000000..7c9a5e49860fa93b1927071ec8e08563f391922a --- /dev/null +++ b/alib2algo/src/arbology/indexing/NonlinearFullAndLinearIndexConstruction.h @@ -0,0 +1,68 @@ +/* + * NonlinearFullAndLinearIndexConstruction.h + * + * Created on: 13. Apr 2017 + * Author: Jan Travnicek + */ + +#ifndef ARBOLOGY_NONLINEAR_FULL_AND_LINEAR_INDEX_CONSTRUCTION_H_ +#define ARBOLOGY_NONLINEAR_FULL_AND_LINEAR_INDEX_CONSTRUCTION_H_ + +#include <indexes/arbology/NonlinearFullAndLinearIndex.h> +#include <tree/RankedTreeWrapper.h> +#include <tree/ranked/PrefixRankedTree.h> +#include <core/multipleDispatch.hpp> +#include <tree/properties/SubtreeJumpTable.h> +#include <tree/properties/ExactSubtreeRepeatsNaive.h> +#include <stringology/indexing/PositionHeapNaive.h> + +namespace arbology { + +namespace indexing { + +/** + * Constructs a compressed bit parallel index for given tree. + * + */ + +class NonlinearFullAndLinearIndexConstruction : public std::SingleDispatch < NonlinearFullAndLinearIndexConstruction, indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, const tree::RankedTreeBase & > { +public: + /** + * Creates compressed bit parallel index for trees + * @param tree tree to construct the index for + * @return the index + */ + static indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > construct ( const tree::RankedTreeWrapper & tree ); + + template < class SymbolType, class RankType > + static indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > construct ( const tree::PrefixRankedTree < SymbolType, RankType > & tree ); + + template < class SymbolType, class RankType > + static indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree ); +}; + +template < class SymbolType, class RankType > +indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > NonlinearFullAndLinearIndexConstruction::construct ( const tree::PrefixRankedTree < SymbolType, RankType > & w ) { + std::vector < unsigned > repeats; + std::vector < std::ranked_symbol < unsigned, RankType > > content = tree::properties::ExactSubtreeRepeatsNaive::repeats ( w ).getContent ( ); + for ( const std::ranked_symbol < unsigned, RankType > & symbol : content ) + repeats.push_back ( symbol.getSymbol ( ) ); + + return indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > ( stringology::indexing::PositionHeapNaive::construct ( string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( w ) ), tree::properties::SubtreeJumpTable::compute ( w ), repeats ); +} + +template < class SymbolType, class RankType > +indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > NonlinearFullAndLinearIndexConstruction::construct ( const tree::PrefixRankedBarTree < SymbolType, RankType > & w ) { + std::vector < unsigned > repeats; + std::vector < std::ranked_symbol < unsigned, RankType > > content = tree::properties::ExactSubtreeRepeatsNaive::repeats ( w ).getContent ( ); + for ( const std::ranked_symbol < unsigned, RankType > & symbol : content ) + repeats.push_back ( symbol.getSymbol ( ) ); + + return indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > ( stringology::indexing::PositionHeapNaive::construct ( string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( w ) ), tree::properties::SubtreeJumpTable::compute ( w ), repeats ); +} + +} /* namespace indexing */ + +} /* namespace arbology */ + +#endif /* ARBOLOGY_NONLINEAR_FULL_AND_LINEAR_INDEX_CONSTRUCTION_H_ */ diff --git a/alib2algo/src/arbology/query/NonlinearFullAndLinearIndexPatterns.cpp b/alib2algo/src/arbology/query/NonlinearFullAndLinearIndexPatterns.cpp new file mode 100644 index 0000000000000000000000000000000000000000..957011a4c6956f8f807b797e43c299726e6f23fa --- /dev/null +++ b/alib2algo/src/arbology/query/NonlinearFullAndLinearIndexPatterns.cpp @@ -0,0 +1,25 @@ +/* + * NonlinearFullAndLinearIndexPatterns.cpp + * + * Created on: 2. 1. 2017 + * Author: Jan Travnicek + */ + +#include "NonlinearFullAndLinearIndexPatterns.h" + +#include <tree/ranked/PrefixRankedPattern.h> + +namespace arbology { + +namespace query { + +std::set < unsigned > NonlinearFullAndLinearIndexPatterns::query ( const indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > & nonlinearFullAndLinearIndex, const tree::RankedTreeWrapper & tree ) { + return dispatch ( nonlinearFullAndLinearIndex, tree.getData ( ) ); +} + +auto nonlinearFullAndLinearIndexPatternsPrefixRankedPattern = NonlinearFullAndLinearIndexPatterns::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedNonlinearPattern < > > ( NonlinearFullAndLinearIndexPatterns::query ); +auto nonlinearFullAndLinearIndexPatternsPrefixRankedBarPattern = NonlinearFullAndLinearIndexPatterns::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarNonlinearPattern < > > ( NonlinearFullAndLinearIndexPatterns::query ); + +} /* namespace query */ + +} /* namespace arbology */ diff --git a/alib2algo/src/arbology/query/NonlinearFullAndLinearIndexPatterns.h b/alib2algo/src/arbology/query/NonlinearFullAndLinearIndexPatterns.h new file mode 100644 index 0000000000000000000000000000000000000000..5edfa755d96ced54e2ca9b8689d91f118b1eb8b0 --- /dev/null +++ b/alib2algo/src/arbology/query/NonlinearFullAndLinearIndexPatterns.h @@ -0,0 +1,172 @@ +/* + * NonlinearFullAndLinearIndexPatterns.h + * + * Created on: 2. 1. 2017 + * Author: Jan Travnicek + */ + +#ifndef NONLINEAR_FULL_AND_LINEAR_INDEX_PATTERNS_H_ +#define NONLINEAR_FULL_AND_LINEAR_INDEX_PATTERNS_H_ + +#include <indexes/arbology/NonlinearFullAndLinearIndex.h> +#include <tree/RankedTreeWrapper.h> +#include <tree/ranked/PrefixRankedNonlinearPattern.h> +#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> +#include <core/multipleDispatch.hpp> +#include <global/GlobalData.h> + +#include <stringology/query/PositionHeapFactors.h> + +namespace arbology { + +namespace query { + +/** + * Query full and linear index for given tree. + * + */ + +class NonlinearFullAndLinearIndexPatterns : public std::SingleDispatchFirstStaticParam < NonlinearFullAndLinearIndexPatterns, std::set < unsigned >, const indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > &, const tree::RankedTreeBase & > { + template < class SymbolType, class RankType > + static std::vector < std::pair < unsigned, unsigned > > FindOccurrences ( const indexes::stringology::PositionHeap < std::ranked_symbol < SymbolType, RankType > > & stringIndex, const std::vector < std::ranked_symbol < SymbolType, RankType > > & string ) { + std::vector < std::pair < unsigned, unsigned > > res; + for ( unsigned occurrence : stringology::query::PositionHeapFactors::query ( stringIndex, string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( string ) ) ) { + res.push_back ( std::make_pair ( occurrence, occurrence + string.size ( ) ) ); + } + return res; + } + + static std::vector < std::pair < unsigned, unsigned > > MergeOccurrences ( const std::vector < std::pair < unsigned, unsigned > > & prevOcc, const std::vector < std::pair < unsigned, unsigned > > & subOcc, std::vector < unsigned > & rev ) { + std::vector < std::pair < unsigned, unsigned > > res; + + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + rev [ occurrence.second ] = occurrence.first; + } + + for ( const std::pair < unsigned, unsigned > & subOccurrence : subOcc ) { + if ( rev [ subOccurrence.first ] != ( unsigned ) -1 ) + res.push_back ( std::make_pair ( rev [ subOccurrence.first ], subOccurrence.second ) ); + } + + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + rev [ occurrence.second ] = ( unsigned ) -1; + } + + return res; + } +public: + /** + * Query a suffix trie + * @param suffix trie to query + * @param tree tree to query by + * @return occurences of factors + */ + static std::set < unsigned > query ( const indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > & fullAndLinearIndex, const tree::RankedTreeWrapper & pattern ); + + template < class SymbolType, class RankType > + static std::set < unsigned > query ( const indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & compressedBitParallelTreeIndex, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ); + + template < class SymbolType, class RankType > + static std::set < unsigned > query ( const indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & compressedBitParallelTreeIndex, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ); +}; + +template < class SymbolType, class RankType > +std::set < unsigned > NonlinearFullAndLinearIndexPatterns::query ( const indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & fullAndLinearIndex, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) { + std::map < std::pair < unsigned, std::ranked_symbol < SymbolType, RankType > >, unsigned > nonlinearVariablesMap; + std::vector < unsigned > rev ( fullAndLinearIndex.getString ( ).size ( ), ( unsigned ) -1 ); + + std::vector < std::vector < std::ranked_symbol < SymbolType, RankType > > > treePatternParts; + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + for ( const std::ranked_symbol < SymbolType, RankType > & symbol : pattern.getContent ( ) ) { + if ( pattern.getSubtreeWildcard ( ) == symbol || pattern.getNonlinearVariables ( ).count ( symbol ) ) { + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + treePatternParts.back ( ).push_back ( symbol ); + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + } else { + treePatternParts.back ( ).push_back ( symbol ); + } + } + + std::vector < std::pair < unsigned, unsigned > > prevOcc = FindOccurrences ( fullAndLinearIndex.getStringIndex ( ) , treePatternParts [ 0 ] ); + + for ( unsigned i = 1; i < treePatternParts.size ( ); ++ i ) { + for ( std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + if ( pattern.getNonlinearVariables ( ).count ( treePatternParts [ i ].back ( ) ) ) { + auto variableSettingIter = nonlinearVariablesMap.find ( std::make_pair ( occurrence.first, treePatternParts [ i ].back ( ) ) ); + if ( variableSettingIter == nonlinearVariablesMap.end ( ) ) + nonlinearVariablesMap.insert ( std::make_pair ( std::make_pair ( occurrence.first, treePatternParts [ i ].back ( ) ), fullAndLinearIndex.getRepeats ( ) [ occurrence.second ] ) ); + else if ( variableSettingIter->second != fullAndLinearIndex.getRepeats ( ) [ occurrence.second ] ) + occurrence.first = ( unsigned ) -1; + } + occurrence.second = fullAndLinearIndex.getJumps ( ) [ occurrence.second ]; + } + + ++ i; + + if ( ! treePatternParts [ i ].empty ( ) ) + prevOcc = MergeOccurrences ( prevOcc, FindOccurrences ( fullAndLinearIndex.getStringIndex ( ), treePatternParts [ i ] ), rev ); + } + + std::set < unsigned > res; + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + if ( occurrence.first != ( unsigned ) -1 ) + res.insert ( occurrence.first ); + } + + return res; +} + +template < class SymbolType, class RankType > +std::set < unsigned > NonlinearFullAndLinearIndexPatterns::query ( const indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > & fullAndLinearIndex, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { + std::map < std::pair < unsigned, std::ranked_symbol < SymbolType, RankType > >, unsigned > nonlinearVariablesMap; + std::vector < unsigned > rev ( fullAndLinearIndex.getString ( ).size ( ), ( unsigned ) -1 ); + + std::vector < std::vector < std::ranked_symbol < SymbolType, RankType > > > treePatternParts; + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + for ( const std::ranked_symbol < SymbolType, RankType > & symbol : pattern.getContent ( ) ) { + if ( symbol == pattern.getVariablesBar ( ) ) + continue; + + if ( pattern.getSubtreeWildcard ( ) == symbol || pattern.getNonlinearVariables ( ).count ( symbol ) ) { + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + treePatternParts.back ( ).push_back ( symbol ); + treePatternParts.push_back ( std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); + } else { + treePatternParts.back ( ).push_back ( symbol ); + } + } + + std::vector < std::pair < unsigned, unsigned > > prevOcc = FindOccurrences ( fullAndLinearIndex.getStringIndex ( ) , treePatternParts [ 0 ] ); + + for ( unsigned i = 1; i < treePatternParts.size ( ); ++ i ) { + for ( std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + if ( pattern.getNonlinearVariables ( ).count ( treePatternParts [ i ].back ( ) ) ) { + auto variableSettingIter = nonlinearVariablesMap.find ( std::make_pair ( occurrence.first, treePatternParts [ i ].back ( ) ) ); + if ( variableSettingIter == nonlinearVariablesMap.end ( ) ) + nonlinearVariablesMap.insert ( std::make_pair ( std::make_pair ( occurrence.first, treePatternParts [ i ].back ( ) ), fullAndLinearIndex.getRepeats ( ) [ occurrence.second ] ) ); + else if ( variableSettingIter->second != fullAndLinearIndex.getRepeats ( ) [ occurrence.second ] ) + occurrence.first = ( unsigned ) -1; + } + occurrence.second = fullAndLinearIndex.getJumps ( ) [ occurrence.second ]; + } + + ++ i; + + if ( ! treePatternParts [ i ].empty ( ) ) + prevOcc = MergeOccurrences ( prevOcc, FindOccurrences ( fullAndLinearIndex.getStringIndex ( ), treePatternParts [ i ] ), rev ); + } + + std::set < unsigned > res; + for ( const std::pair < unsigned, unsigned > & occurrence : prevOcc ) { + if ( occurrence.first != ( unsigned ) -1 ) + res.insert ( occurrence.first ); + } + + return res; +} + +} /* namespace query */ + +} /* namespace arbology */ + +#endif /* NONLINEAR_FULL_AND_LINEAR_INDEX_PATTERNS_H_ */ diff --git a/alib2data/src/indexes/arbology/NonlinearFullAndLinearIndex.cpp b/alib2data/src/indexes/arbology/NonlinearFullAndLinearIndex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..83aef508f4e6ea0902dcaabbc29b2a3a9b39e0a0 --- /dev/null +++ b/alib2data/src/indexes/arbology/NonlinearFullAndLinearIndex.cpp @@ -0,0 +1,14 @@ +/* + * NonlinearFullAndLinearIndex.cpp + * + * Created on: Apr 13, 2017 + * Author: Jan Travnicek + */ + +#include "NonlinearFullAndLinearIndex.h" + +namespace alib { + +auto arbologyNonlinearFullAndLinearIndexParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::arbology::NonlinearFullAndLinearIndex < > > ( ); + +} /* namespace alib */ diff --git a/alib2data/src/indexes/arbology/NonlinearFullAndLinearIndex.h b/alib2data/src/indexes/arbology/NonlinearFullAndLinearIndex.h new file mode 100644 index 0000000000000000000000000000000000000000..9629fe3d4fec045dfdf09695ff3eb32936e9758f --- /dev/null +++ b/alib2data/src/indexes/arbology/NonlinearFullAndLinearIndex.h @@ -0,0 +1,233 @@ +/* + * NonlinearFullAndLinearIndex.h + * + * Created on: Apr 13, 2017 + * Author: Jan Travnicek + */ + +#ifndef ARBOLOGY_NONLINEAR_FULL_AND_LINEAR_INDEX_H_ +#define ARBOLOGY_NONLINEAR_FULL_AND_LINEAR_INDEX_H_ + +#include <string> +#include <iostream> +#include <sstream> + +#include <common/DefaultSymbolType.h> + +#include <object/UniqueObject.h> +#include <object/ObjectBase.h> + +#include <sax/FromXMLParserHelper.h> +#include <core/xmlApi.hpp> + +#include <container/ObjectsVector.h> + +#include <indexes/stringology/PositionHeap.h> + +#include <alphabet/common/SymbolNormalize.h> + +namespace indexes { + +namespace arbology { + +class GeneralAlphabet; + +/** + * Represents regular expression parsed from the XML. Regular expression is stored + * as a tree of RegExpElement. + */ +template < class SymbolType = DefaultSymbolType > +class NonlinearFullAndLinearIndex : public alib::ObjectBase { +protected: + indexes::stringology::PositionHeap < SymbolType > m_StringIndex; + std::vector < int > m_JumpTable; + std::vector < unsigned > m_Repeats; + +public: + /** + * @copydoc SuffixTrieNode::clone() const + */ + virtual ObjectBase * clone ( ) const; + + /** + * @copydoc SuffixTrieNode::plunder() const + */ + virtual ObjectBase * plunder ( ) &&; + + explicit NonlinearFullAndLinearIndex ( indexes::stringology::PositionHeap < SymbolType > stringIndex, std::vector < int > jumpTable, std::vector < unsigned > repeats ); + + /** + * @return Root node of the trie + */ + const indexes::stringology::PositionHeap < SymbolType > & getStringIndex ( ) const; + + /** + * @return subtree jump table + */ + const std::vector < int > & getJumps ( ) const; + + /** + * @return repeats + */ + const std::vector < unsigned > & getRepeats ( ) const; + + const std::vector < SymbolType > & getString ( ) const; + + const std::set < SymbolType > & getAlphabet ( ) const { + return m_StringIndex.getAlphabet ( ); + } + + /** + * Sets the compressedBit vector for given symbol + * @param tree root node to set + */ + void setStringIndex ( indexes::stringology::PositionHeap < SymbolType > stringIndex ); + + /** + * Removes symbol from the alphabet of symbol available in the regular expression + * @param symbol removed symbol from the alphabet + */ + bool removeSymbolFromAlphabet ( const SymbolType & symbol ) { + return m_StringIndex.removeSymbolFromAlphabet ( symbol ); + } + + /** + * Prints XML representation of the tree to the output stream. + * @param out output stream to which print the tree + * @param tree tree to print + */ + virtual void operator >>( std::ostream & out ) const; + + virtual int compare ( const ObjectBase & other ) const { + if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); + + return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) ); + } + + virtual int compare ( const NonlinearFullAndLinearIndex & other ) const; + + virtual explicit operator std::string ( ) const; + + static const std::string & getXmlTagName ( ) { + static std::string xmlTagName = "NonlinearFullAndLinearIndex"; + + return xmlTagName; + } + + static NonlinearFullAndLinearIndex parse ( std::deque < sax::Token >::iterator & input ); + + void compose ( std::deque < sax::Token > & out ) const; + + virtual alib::ObjectBase * inc ( ) &&; + + virtual ObjectBase * normalize ( ) && { + if ( typeid ( NonlinearFullAndLinearIndex < > ) == typeid ( NonlinearFullAndLinearIndex < SymbolType > ) ) + return this; + + indexes::stringology::PositionHeap < DefaultSymbolType > stringIndex = std::move ( m_StringIndex ).normalizeRaw ( ); + + return new NonlinearFullAndLinearIndex < > ( std::move ( stringIndex ), std::move ( m_JumpTable ), std::move ( m_Repeats ) ); + } +}; + +} /* namespace arbology */ + +} /* namespace indexes */ + +namespace indexes { + +namespace arbology { + +template < class SymbolType > +NonlinearFullAndLinearIndex < SymbolType >::NonlinearFullAndLinearIndex ( indexes::stringology::PositionHeap < SymbolType > stringIndex, std::vector < int > jumpTable, std::vector < unsigned > repeats ) : m_StringIndex ( std::move ( stringIndex ) ), m_JumpTable ( jumpTable ), m_Repeats ( repeats ) { +} + +template < class SymbolType > +alib::ObjectBase * NonlinearFullAndLinearIndex < SymbolType >::clone ( ) const { + return new NonlinearFullAndLinearIndex ( * this ); +} + +template < class SymbolType > +alib::ObjectBase * NonlinearFullAndLinearIndex < SymbolType >::plunder ( ) && { + return new NonlinearFullAndLinearIndex ( std::move ( * this ) ); +} + +template < class SymbolType > +const indexes::stringology::PositionHeap < SymbolType > & NonlinearFullAndLinearIndex < SymbolType >::getStringIndex ( ) const { + return m_StringIndex; +} + +template < class SymbolType > +const std::vector < int > & NonlinearFullAndLinearIndex < SymbolType >::getJumps ( ) const { + return m_JumpTable; +} + +template < class SymbolType > +const std::vector < unsigned > & NonlinearFullAndLinearIndex < SymbolType >::getRepeats ( ) const { + return m_Repeats; +} + +template < class SymbolType > +const std::vector < SymbolType > & NonlinearFullAndLinearIndex < SymbolType >::getString ( ) const { + return m_StringIndex.getString ( ); +} + +template < class SymbolType > +void NonlinearFullAndLinearIndex < SymbolType >::setStringIndex ( indexes::stringology::PositionHeap < SymbolType > stringIndex ) { + this->m_StringIndex = std::move ( stringIndex ); +} + +template < class SymbolType > +void NonlinearFullAndLinearIndex < SymbolType >::operator >>( std::ostream & out ) const { + out << "(NonlinearFullAndLinearIndex " << this->m_StringIndex << ", " << m_JumpTable << ")"; +} + +template < class SymbolType > +int NonlinearFullAndLinearIndex < SymbolType >::compare ( const NonlinearFullAndLinearIndex & other ) const { + auto first = std::tie ( getStringIndex ( ), getJumps ( ), getRepeats ( ) ); + auto second = std::tie ( other.getStringIndex ( ), other.getJumps ( ), other.getRepeats ( ) ); + + static std::compare < decltype ( first ) > comp; + + return comp ( first, second ); +} + +template < class SymbolType > +NonlinearFullAndLinearIndex < SymbolType >::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +template < class SymbolType > +NonlinearFullAndLinearIndex < SymbolType > NonlinearFullAndLinearIndex < SymbolType >::parse ( std::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, NonlinearFullAndLinearIndex::getXmlTagName ( ) ); + indexes::stringology::PositionHeap < SymbolType > stringIndex = alib::xmlApi < indexes::stringology::PositionHeap < SymbolType > >::parse ( input ); + std::vector < int > jumps = alib::xmlApi < std::vector < int > >::parse ( input ); + std::vector < unsigned > repeats = alib::xmlApi < std::vector < unsigned > >::parse ( input ); + + NonlinearFullAndLinearIndex < SymbolType > res ( std::move ( stringIndex ), std::move ( jumps ), std::move ( repeats ) ); + + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, NonlinearFullAndLinearIndex::getXmlTagName ( ) ); + return res; +} + +template < class SymbolType > +void NonlinearFullAndLinearIndex < SymbolType >::compose ( std::deque < sax::Token > & out ) const { + out.emplace_back ( NonlinearFullAndLinearIndex::getXmlTagName ( ), sax::Token::TokenType::START_ELEMENT ); + alib::xmlApi < indexes::stringology::PositionHeap < SymbolType > >::compose ( out, getStringIndex ( ) ); + alib::xmlApi < std::vector < int > >::compose ( out, getJumps ( ) ); + alib::xmlApi < std::vector < unsigned > >::compose ( out, getRepeats ( ) ); + out.emplace_back ( NonlinearFullAndLinearIndex::getXmlTagName ( ), sax::Token::TokenType::END_ELEMENT ); +} + +template < class SymbolType > +alib::ObjectBase * NonlinearFullAndLinearIndex < SymbolType >::inc ( ) && { + return new alib::UniqueObject ( alib::Object ( std::move ( * this ) ), primitive::Integer ( 0 ) ); +} + +} /* namespace arbology */ + +} /* namespace indexes */ + +#endif /* ARBOLOGY_NONLINEAR_FULL_AND_LINEAR_INDEX_H_ */ diff --git a/aquery2/src/aquery.cpp b/aquery2/src/aquery.cpp index a400703c4ee37fa2a6eeb289541d5e3f0202465b..2c82ab8ddcc9ba9e3c11d6884fc35b8a914603e2 100644 --- a/aquery2/src/aquery.cpp +++ b/aquery2/src/aquery.cpp @@ -22,6 +22,7 @@ #include <stringology/query/CompressedBitParallelismFactors.h> #include <arbology/query/CompressedBitParallelismPatterns.h> #include <arbology/query/FullAndLinearIndexPatterns.h> +#include <arbology/query/NonlinearFullAndLinearIndexPatterns.h> int main ( int argc, char * argv[] ) { try { @@ -38,6 +39,7 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "compressedBitParallelismFactors" ); allowed.push_back ( "compressedBitParallelismPatterns" ); allowed.push_back ( "fullAndLinearIndexPatterns" ); + allowed.push_back ( "nonlinearFullAndLinearIndexPatterns" ); TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); TCLAP::ValueArg < std::string > query ( "q", "query", "Query index", false, "exactFactorMatch", & allowedVals ); @@ -155,6 +157,19 @@ int main ( int argc, char * argv[] ) { measurements::end ( ); measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( res ); + } else if ( query.getValue ( ) == "nonlinearFullAndLinearIndexPatterns" ) { + indexes::arbology::NonlinearFullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > nonlinearFullAndLinearIndex = alib::XmlDataFactory::fromTokens ( sax::FromXMLParserHelper::parseInput ( indexInput ) ); + tree::RankedTreeWrapper pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + std::set < unsigned > res = arbology::query::NonlinearFullAndLinearIndexPatterns::query ( nonlinearFullAndLinearIndex, pattern ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + alib::XmlDataFactory::toStdout ( res ); } else { throw exception::CommonException ( "Invalid algorithm" ); diff --git a/tests.aarbology.sh b/tests.aarbology.sh index f3918ca479e207d16868f1c9a8e58195b66cc26b..6edcb0927be8b80bb669822daba0d531f0bc45e2 100755 --- a/tests.aarbology.sh +++ b/tests.aarbology.sh @@ -410,6 +410,10 @@ function runTestNonlinearPatternEnds { clearResults } +runTestNonlinearPattern "Exact Nonlinear Pattern Matching Using Full And Linear Index (PrefixRanked)" "./aarbology2 -a nonlinearFullAndLinearIndex -s <( ./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q nonlinearFullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedNonlinearPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" + +runTestNonlinearPattern "Exact Nonlinear Pattern Matching Using Full And Linear Index (PrefixRankedBar)" "./aarbology2 -a nonlinearFullAndLinearIndex -s <( ./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q nonlinearFullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedBarNonlinearPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" + runTestPattern "Exact Pattern Matching Using Full And Linear Index (PrefixRankedBar)" "./aarbology2 -a fullAndLinearIndex -s <(./acast2 -t PrefixRankedBarTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q fullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedBarPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size" runTestPattern "Exact Pattern Matching Using Full And Linear Index (PrefixRanked)" "./aarbology2 -a fullAndLinearIndex -s <(./acast2 -t PrefixRankedTree -i \"\$SUBJECT_FILE\" ) | ./aquery2 -q fullAndLinearIndexPatterns -i - -p <( ./acast2 -t PrefixRankedPattern -i \"\$PATTERN_FILE\" ) | ./astat2 -p size"