diff --git a/aarbology2/src/aarbology.cpp b/aarbology2/src/aarbology.cpp index de0dbf57faac0df35b6b2ca70fce68d88ff483d1..ee3adfd8769589d95878d7569798e282c16ff97d 100644 --- a/aarbology2/src/aarbology.cpp +++ b/aarbology2/src/aarbology.cpp @@ -32,6 +32,7 @@ #include <tree/properties/ExactSubtreeRepeatsNaive.h> #include <arbology/transform/BeginToEndIndex.h> #include <arbology/indexing/CompressedBitParallelIndexConstruction.h> +#include <arbology/indexing/FullAndLinearIndexConstruction.h> int main ( int argc, char * argv[] ) { try { @@ -54,6 +55,7 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "exactNonlinearTreePatternAutomaton" ); allowed.push_back ( "exactSubtreeRepeatsNaive" ); allowed.push_back ( "compressedBitParallelIndex" ); + allowed.push_back ( "fullAndLinearIndex" ); TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); TCLAP::ValueArg < std::string > algorithm ( "a", "algorithm", "Execute algorithm", false, "exactSubtreeMatch", & allowedVals ); @@ -267,6 +269,18 @@ int main ( int argc, char * argv[] ) { measurements::start ( "Output write", measurements::Type::AUXILIARY ); alib::XmlDataFactory::toStdout ( compressedBitParallelIndex ); + } else if ( algorithm.getValue ( ) == "fullAndLinearIndex" ) { + tree::RankedTreeWrapper subject = alib::XmlDataFactory::fromTokens < tree::RankedTreeWrapper > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > fullAndLinearIndex = arbology::indexing::FullAndLinearIndexConstruction::construct ( subject ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + + alib::XmlDataFactory::toStdout ( fullAndLinearIndex ); } else { throw exception::CommonException ( "Invalid algorithm" ); } diff --git a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9203d13fb3b2ad1822423cb846dadd26b73f2d47 --- /dev/null +++ b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.cpp @@ -0,0 +1,22 @@ +/* + * FullAndLinearIndexConstruction.cpp + * + * Created on: 6. 2. 2017 + * Author: Jan Travnicek + */ + +#include "FullAndLinearIndexConstruction.h" + +namespace arbology { + +namespace indexing { + +indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > FullAndLinearIndexConstruction::construct ( const tree::RankedTreeWrapper & tree ) { + return dispatch ( tree.getData ( ) ); +} + +auto fullAndLinearIndexConstructionPrefixRankedTree = FullAndLinearIndexConstruction::RegistratorWrapper < indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, tree::PrefixRankedTree < > > ( FullAndLinearIndexConstruction::construct ); + +} /* namespace indexing */ + +} /* namespace arbology */ diff --git a/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h new file mode 100644 index 0000000000000000000000000000000000000000..ff9bcddfea65e6754295cf5d549c759a8b5d3d54 --- /dev/null +++ b/alib2algo/src/arbology/indexing/FullAndLinearIndexConstruction.h @@ -0,0 +1,49 @@ +/* + * FullAndLinearIndexConstruction.h + * + * Created on: 6. 2. 2017 + * Author: Jan Travnicek + */ + +#ifndef ARBOLOGY_FULL_AND_LINEAR_INDEX_CONSTRUCTION_H_ +#define ARBOLOGY_FULL_AND_LINEAR_INDEX_CONSTRUCTION_H_ + +#include <indexes/arbology/FullAndLinearIndex.h> +#include <tree/RankedTreeWrapper.h> +#include <tree/ranked/PrefixRankedTree.h> +#include <core/multipleDispatch.hpp> +#include <tree/properties/SubtreeJumpTable.h> +#include <stringology/indexing/PositionHeapNaive.h> + +namespace arbology { + +namespace indexing { + +/** + * Constructs a compressed bit parallel index for given tree. + * + */ + +class FullAndLinearIndexConstruction : public std::SingleDispatch < FullAndLinearIndexConstruction, indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > >, const tree::RankedTreeBase & > { +public: + /** + * Creates compressed bit parallel index for trees + * @param tree tree to construct the index for + * @return the index + */ + static indexes::arbology::FullAndLinearIndex < std::ranked_symbol < DefaultSymbolType, DefaultRankType > > construct ( const tree::RankedTreeWrapper & tree ); + + template < class SymbolType, class RankType > + static indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > construct ( const tree::PrefixRankedTree < SymbolType, RankType > & tree ); +}; + +template < class SymbolType, class RankType > +indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > FullAndLinearIndexConstruction::construct ( const tree::PrefixRankedTree < SymbolType, RankType > & w ) { + return indexes::arbology::FullAndLinearIndex < std::ranked_symbol < SymbolType, RankType > > ( stringology::indexing::PositionHeapNaive::construct ( string::LinearString < std::ranked_symbol < SymbolType, RankType > > ( w ) ), tree::properties::SubtreeJumpTable::compute ( w ) ); +} + +} /* namespace indexing */ + +} /* namespace arbology */ + +#endif /* ARBOLOGY_FULL_AND_LINEAR_INDEX_CONSTRUCTION_H_ */ diff --git a/alib2data/src/indexes/arbology/FullAndLinearIndex.cpp b/alib2data/src/indexes/arbology/FullAndLinearIndex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ec593e1bcc18c30709083a3e0c282986604ef246 --- /dev/null +++ b/alib2data/src/indexes/arbology/FullAndLinearIndex.cpp @@ -0,0 +1,14 @@ +/* + * FullAndLinearIndex.cpp + * + * Created on: Jan 8, 2017 + * Author: Jan Travnicek + */ + +#include "FullAndLinearIndex.h" + +namespace alib { + +auto arbologyFullAndLinearIndexParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::arbology::FullAndLinearIndex < > > ( ); + +} /* namespace alib */ diff --git a/alib2data/src/indexes/arbology/FullAndLinearIndex.h b/alib2data/src/indexes/arbology/FullAndLinearIndex.h new file mode 100644 index 0000000000000000000000000000000000000000..ef6e121bf37d14e948931c3889242df4be754380 --- /dev/null +++ b/alib2data/src/indexes/arbology/FullAndLinearIndex.h @@ -0,0 +1,209 @@ +/* + * FullAndLinearIndex.h + * + * Created on: Jan 8, 2017 + * Author: Jan Travnicek + */ + +#ifndef ARBOLOGY_FULL_AND_LINEAR_INDEX_H_ +#define ARBOLOGY_FULL_AND_LINEAR_INDEX_H_ + +#include <string> +#include <iostream> +#include <sstream> + +#include <common/DefaultSymbolType.h> + +#include <object/UniqueObject.h> +#include <object/ObjectBase.h> + +#include <sax/FromXMLParserHelper.h> +#include <core/xmlApi.hpp> + +#include <container/ObjectsVector.h> + +#include <indexes/stringology/PositionHeap.h> + +namespace indexes { + +namespace arbology { + +class GeneralAlphabet; + +/** + * Represents regular expression parsed from the XML. Regular expression is stored + * as a tree of RegExpElement. + */ +template < class SymbolType = DefaultSymbolType > +class FullAndLinearIndex : public alib::ObjectBase { +protected: + indexes::stringology::PositionHeap < SymbolType > m_StringIndex; + std::vector < int > m_JumpTable; + +public: + /** + * @copydoc SuffixTrieNode::clone() const + */ + virtual ObjectBase * clone ( ) const; + + /** + * @copydoc SuffixTrieNode::plunder() const + */ + virtual ObjectBase * plunder ( ) &&; + + explicit FullAndLinearIndex ( indexes::stringology::PositionHeap < SymbolType > stringIndex, std::vector < int > jumpTable ); + + /** + * @return Root node of the trie + */ + const indexes::stringology::PositionHeap < SymbolType > & getStringIndex ( ) const; + + /** + * @return subtree jump table + */ + const std::vector < int > & getJumps ( ) const; + + const std::vector < SymbolType > & getString ( ) const; + + const std::set < SymbolType > & getAlphabet ( ) const { + return m_StringIndex.getAlphabet ( ); + } + + /** + * Sets the compressedBit vector for given symbol + * @param tree root node to set + */ + void setStringIndex ( indexes::stringology::PositionHeap < SymbolType > stringIndex ); + + /** + * Removes symbol from the alphabet of symbol available in the regular expression + * @param symbol removed symbol from the alphabet + */ + bool removeSymbolFromAlphabet ( const SymbolType & symbol ) { + return m_StringIndex.removeSymbolFromAlphabet ( symbol ); + } + + /** + * Prints XML representation of the tree to the output stream. + * @param out output stream to which print the tree + * @param tree tree to print + */ + virtual void operator >>( std::ostream & out ) const; + + virtual int compare ( const ObjectBase & other ) const { + if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other ); + + return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) ); + } + + virtual int compare ( const FullAndLinearIndex & other ) const; + + virtual explicit operator std::string ( ) const; + + static const std::string & getXmlTagName ( ) { + static std::string xmlTagName = "FullAndLinearIndex"; + + return xmlTagName; + } + + static FullAndLinearIndex parse ( std::deque < sax::Token >::iterator & input ); + + void compose ( std::deque < sax::Token > & out ) const; + + virtual alib::ObjectBase * inc ( ) &&; +}; + +} /* namespace arbology */ + +} /* namespace indexes */ + +namespace indexes { + +namespace arbology { + +template < class SymbolType > +FullAndLinearIndex < SymbolType >::FullAndLinearIndex ( indexes::stringology::PositionHeap < SymbolType > stringIndex, std::vector < int > jumpTable ) : m_StringIndex ( std::move ( stringIndex ) ), m_JumpTable ( jumpTable ) { +} + +template < class SymbolType > +alib::ObjectBase * FullAndLinearIndex < SymbolType >::clone ( ) const { + return new FullAndLinearIndex ( * this ); +} + +template < class SymbolType > +alib::ObjectBase * FullAndLinearIndex < SymbolType >::plunder ( ) && { + return new FullAndLinearIndex ( std::move ( * this ) ); +} + +template < class SymbolType > +const indexes::stringology::PositionHeap < SymbolType > & FullAndLinearIndex < SymbolType >::getStringIndex ( ) const { + return m_StringIndex; +} + +template < class SymbolType > +const std::vector < int > & FullAndLinearIndex < SymbolType >::getJumps ( ) const { + return m_JumpTable; +} + +template < class SymbolType > +const std::vector < SymbolType > & FullAndLinearIndex < SymbolType >::getString ( ) const { + return m_StringIndex.getString ( ); +} + +template < class SymbolType > +void FullAndLinearIndex < SymbolType >::setStringIndex ( indexes::stringology::PositionHeap < SymbolType > stringIndex ) { + this->m_StringIndex = std::move ( stringIndex ); +} + +template < class SymbolType > +void FullAndLinearIndex < SymbolType >::operator >>( std::ostream & out ) const { + out << "(FullAndLinearIndex " << this->m_StringIndex << ", " << m_JumpTable << ")"; +} + +template < class SymbolType > +int FullAndLinearIndex < SymbolType >::compare ( const FullAndLinearIndex & other ) const { + auto first = std::tie ( getStringIndex ( ), getJumps ( ) ); + auto second = std::tie ( other.getStringIndex ( ), other.getJumps ( ) ); + + static std::compare < decltype ( first ) > comp; + + return comp ( first, second ); +} + +template < class SymbolType > +FullAndLinearIndex < SymbolType >::operator std::string ( ) const { + std::stringstream ss; + ss << * this; + return ss.str ( ); +} + +template < class SymbolType > +FullAndLinearIndex < SymbolType > FullAndLinearIndex < SymbolType >::parse ( std::deque < sax::Token >::iterator & input ) { + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, FullAndLinearIndex::getXmlTagName ( ) ); + indexes::stringology::PositionHeap < SymbolType > stringIndex = alib::xmlApi < indexes::stringology::PositionHeap < SymbolType > >::parse ( input ); + std::vector < int > jumps = alib::xmlApi < std::vector < int > >::parse ( input ); + + FullAndLinearIndex < SymbolType > res ( std::move ( stringIndex ), std::move ( jumps ) ); + + sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, FullAndLinearIndex::getXmlTagName ( ) ); + return res; +} + +template < class SymbolType > +void FullAndLinearIndex < SymbolType >::compose ( std::deque < sax::Token > & out ) const { + out.emplace_back ( FullAndLinearIndex::getXmlTagName ( ), sax::Token::TokenType::START_ELEMENT ); + alib::xmlApi < indexes::stringology::PositionHeap < SymbolType > >::compose ( out, getStringIndex ( ) ); + alib::xmlApi < std::vector < int > >::compose ( out, getJumps ( ) ); + out.emplace_back ( FullAndLinearIndex::getXmlTagName ( ), sax::Token::TokenType::END_ELEMENT ); +} + +template < class SymbolType > +alib::ObjectBase * FullAndLinearIndex < SymbolType >::inc ( ) && { + return new alib::UniqueObject ( alib::Object ( std::move ( * this ) ), primitive::Integer ( 0 ) ); +} + +} /* namespace arbology */ + +} /* namespace indexes */ + +#endif /* ARBOLOGY_FULL_AND_LINEAR_INDEX_H_ */