From 8072b2464d659fc4932fd590150c5377f081f03f Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Mon, 10 Apr 2017 09:43:41 +0200 Subject: [PATCH] normalize indexes --- .../arbology/CompressedBitParallelTreeIndex.h | 15 ++++- .../src/indexes/arbology/FullAndLinearIndex.h | 8 +++ .../src/indexes/common/IndexesNormalize.h | 58 +++++++++++++++++++ .../indexes/stringology/BitParallelIndex.h | 15 ++++- .../stringology/CompressedBitParallelIndex.h | 15 ++++- .../src/indexes/stringology/PositionHeap.h | 11 ++++ .../src/indexes/stringology/SuffixArray.h | 9 +++ .../src/indexes/stringology/SuffixTrie.h | 10 ++++ 8 files changed, 135 insertions(+), 6 deletions(-) create mode 100644 alib2data/src/indexes/common/IndexesNormalize.h diff --git a/alib2data/src/indexes/arbology/CompressedBitParallelTreeIndex.h b/alib2data/src/indexes/arbology/CompressedBitParallelTreeIndex.h index 8005354788..214f5aa312 100644 --- a/alib2data/src/indexes/arbology/CompressedBitParallelTreeIndex.h +++ b/alib2data/src/indexes/arbology/CompressedBitParallelTreeIndex.h @@ -29,6 +29,8 @@ #include <common/SparseBoolVector.hpp> #include <primitive/Bool.h> +#include <alphabet/common/SymbolNormalize.h> + namespace indexes { namespace arbology { @@ -68,7 +70,7 @@ public: */ const std::vector < int > & getJumps ( ) const; - const std::vector < SymbolType > & getString ( ) const; + std::vector < SymbolType > getString ( ) const; const std::set < SymbolType > & getAlphabet ( ) const { return this->template accessComponent < GeneralAlphabet > ( ).get ( ); @@ -116,6 +118,15 @@ public: void compose ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual ObjectBase * normalize ( ) && { + std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) ); + std::map < DefaultSymbolType, common::SparseBoolVector > vectors; + for ( std::pair < SymbolType, common::SparseBoolVector > && vector : std::make_moveable_map ( m_vectors ) ) + vectors.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( vector.first ) ), std::move ( vector.second ) ) ); + + return new CompressedBitParallelTreeIndex < > ( std::move ( alphabet ), std::move ( vectors ), std::move ( m_jumpTable ) ); + } }; } /* namespace arbology */ @@ -151,7 +162,7 @@ const std::vector < int > & CompressedBitParallelTreeIndex < SymbolType >::getJu } template < class SymbolType > -const std::vector < SymbolType > & CompressedBitParallelTreeIndex < SymbolType >::getString ( ) const { +std::vector < SymbolType > CompressedBitParallelTreeIndex < SymbolType >::getString ( ) const { std::vector < SymbolType > res; unsigned index = 0; diff --git a/alib2data/src/indexes/arbology/FullAndLinearIndex.h b/alib2data/src/indexes/arbology/FullAndLinearIndex.h index ef6e121bf3..d1baf0bf26 100644 --- a/alib2data/src/indexes/arbology/FullAndLinearIndex.h +++ b/alib2data/src/indexes/arbology/FullAndLinearIndex.h @@ -24,6 +24,8 @@ #include <indexes/stringology/PositionHeap.h> +#include <alphabet/common/SymbolNormalize.h> + namespace indexes { namespace arbology { @@ -111,6 +113,12 @@ public: void compose ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual ObjectBase * normalize ( ) && { + indexes::stringology::PositionHeap < DefaultSymbolType > stringIndex = manage_move ( std::move ( m_StringIndex ).normalize ( ) ); + + return new FullAndLinearIndex < > ( std::move ( stringIndex ), std::move ( m_JumpTable ) ); + } }; } /* namespace arbology */ diff --git a/alib2data/src/indexes/common/IndexesNormalize.h b/alib2data/src/indexes/common/IndexesNormalize.h new file mode 100644 index 0000000000..0b3aaaa381 --- /dev/null +++ b/alib2data/src/indexes/common/IndexesNormalize.h @@ -0,0 +1,58 @@ +/* + * IndexesNormalize.h + * + * Created on: Apr 7, 2017 + * Author: Jan Travnicek + */ + +#ifndef INDEXES_NORMALIZE_H_ +#define INDEXES_NORMALIZE_H_ + +#include <vector> +#include <tuple> +#include <set> +#include <variant> +#include <trie> + +#include <object/AnyObject.h> + +#include <alphabet/common/SymbolNormalize.h> + +namespace indexes { + +/** + * This class contains methods to print XML representation of automata to the output stream. + */ +class IndexesNormalize { + template < class SymbolType, class ValueType > + static std::trie < DefaultSymbolType, ValueType > normalizeTrieInner ( std::trie < SymbolType, ValueType > && node ) { + std::map < DefaultSymbolType, std::trie < DefaultSymbolType, ValueType > > children; + + for ( std::pair < SymbolType, std::trie < SymbolType, ValueType > > && child : std::make_moveable_map ( node.getChildren ( ) ) ) { + children.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( child.first ) ), normalizeTrieInner ( std::move ( child.second ) ) ) ); + } + + return std::trie < DefaultSymbolType, ValueType > ( std::move ( node.getData ( ) ), std::move ( children ) ); + } + +public: + template < class SymbolType > + static std::trie < DefaultSymbolType, unsigned > normalizeTrie ( std::trie < SymbolType, unsigned > && trie ); + + template < class SymbolType > + static std::trie < DefaultSymbolType, std::variant < void, unsigned > > normalizeTrie ( std::trie < SymbolType, std::variant < void, unsigned > > && trie ); +}; + +template < class SymbolType > +std::trie < DefaultSymbolType, unsigned > IndexesNormalize::normalizeTrie ( std::trie < SymbolType, unsigned > && trie ) { + return normalizeTrieInner ( std::move ( trie ) ) ; +} + +template < class SymbolType > +std::trie < DefaultSymbolType, std::variant < void, unsigned > > IndexesNormalize::normalizeTrie ( std::trie < SymbolType, std::variant < void, unsigned > > && trie ) { + return normalizeTrieInner ( std::move ( trie ) ) ; +} + +} /* namespace indexes */ + +#endif /* INDEXES_NORMALIZE_H_ */ diff --git a/alib2data/src/indexes/stringology/BitParallelIndex.h b/alib2data/src/indexes/stringology/BitParallelIndex.h index 100a2c9962..8b984af866 100644 --- a/alib2data/src/indexes/stringology/BitParallelIndex.h +++ b/alib2data/src/indexes/stringology/BitParallelIndex.h @@ -28,6 +28,8 @@ #include <container/ObjectsVector.h> #include <primitive/Bool.h> +#include <alphabet/common/SymbolNormalize.h> + namespace indexes { namespace stringology { @@ -61,7 +63,7 @@ public: */ const std::map < SymbolType, std::vector < bool > > & getData ( ) const; - const std::vector < SymbolType > & getString ( ) const; + std::vector < SymbolType > getString ( ) const; const std::set < SymbolType > & getAlphabet ( ) const { return this->template accessComponent < GeneralAlphabet > ( ).get ( ); @@ -109,6 +111,15 @@ public: void compose ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual ObjectBase * normalize ( ) && { + std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) ); + std::map < DefaultSymbolType, std::vector < bool > > vectors; + for ( std::pair < SymbolType, std::vector < bool > > && vector : std::make_moveable_map ( m_vectors ) ) + vectors.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( vector.first ) ), std::move ( vector.second ) ) ); + + return new BitParallelIndex < > ( std::move ( alphabet ), std::move ( vectors ) ); + } }; } /* namespace stringology */ @@ -139,7 +150,7 @@ const std::map < SymbolType, std::vector < bool > > & BitParallelIndex < SymbolT } template < class SymbolType > -const std::vector < SymbolType > & BitParallelIndex < SymbolType >::getString ( ) const { +std::vector < SymbolType > BitParallelIndex < SymbolType >::getString ( ) const { std::vector < SymbolType > res; unsigned index = 0; diff --git a/alib2data/src/indexes/stringology/CompressedBitParallelIndex.h b/alib2data/src/indexes/stringology/CompressedBitParallelIndex.h index 54ceee7133..f2e75d5ef5 100644 --- a/alib2data/src/indexes/stringology/CompressedBitParallelIndex.h +++ b/alib2data/src/indexes/stringology/CompressedBitParallelIndex.h @@ -28,6 +28,8 @@ #include <common/SparseBoolVector.hpp> #include <primitive/Bool.h> +#include <alphabet/common/SymbolNormalize.h> + namespace indexes { namespace stringology { @@ -61,7 +63,7 @@ public: */ const std::map < SymbolType, common::SparseBoolVector > & getData ( ) const; - const std::vector < SymbolType > & getString ( ) const; + std::vector < SymbolType > getString ( ) const; const std::set < SymbolType > & getAlphabet ( ) const { return this->template accessComponent < GeneralAlphabet > ( ).get ( ); @@ -109,6 +111,15 @@ public: void compose ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual ObjectBase * normalize ( ) && { + std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) ); + std::map < DefaultSymbolType, common::SparseBoolVector > vectors; + for ( std::pair < SymbolType, common::SparseBoolVector > && vector : std::make_moveable_map ( m_vectors ) ) + vectors.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( vector.first ) ), std::move ( vector.second ) ) ); + + return new CompressedBitParallelIndex < > ( std::move ( alphabet ), std::move ( vectors ) ); + } }; } /* namespace stringology */ @@ -139,7 +150,7 @@ const std::map < SymbolType, common::SparseBoolVector > & CompressedBitParallelI } template < class SymbolType > -const std::vector < SymbolType > & CompressedBitParallelIndex < SymbolType >::getString ( ) const { +std::vector < SymbolType > CompressedBitParallelIndex < SymbolType >::getString ( ) const { std::vector < SymbolType > res; unsigned index = 0; diff --git a/alib2data/src/indexes/stringology/PositionHeap.h b/alib2data/src/indexes/stringology/PositionHeap.h index 058710b7b4..b72c23fb1d 100644 --- a/alib2data/src/indexes/stringology/PositionHeap.h +++ b/alib2data/src/indexes/stringology/PositionHeap.h @@ -33,6 +33,9 @@ #include <primitive/Unsigned.h> +#include <alphabet/common/SymbolNormalize.h> +#include <indexes/common/IndexesNormalize.h> + namespace indexes { namespace stringology { @@ -118,6 +121,14 @@ public: void compose ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual PositionHeap < > * normalize ( ) && { + std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) ); + std::trie < DefaultSymbolType, unsigned > trie = IndexesNormalize::normalizeTrie ( std::move ( m_trie ) ); + std::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( m_string ) ); + + return new PositionHeap < > ( std::move ( alphabet ), std::move ( trie ), std::move ( string ) ); + } }; } /* namespace stringology */ diff --git a/alib2data/src/indexes/stringology/SuffixArray.h b/alib2data/src/indexes/stringology/SuffixArray.h index 58d74706c8..e9aefeadc5 100644 --- a/alib2data/src/indexes/stringology/SuffixArray.h +++ b/alib2data/src/indexes/stringology/SuffixArray.h @@ -31,6 +31,8 @@ #include <container/ObjectsSet.h> #include <container/ObjectsVector.h> +#include <alphabet/common/SymbolNormalize.h> + namespace indexes { namespace stringology { @@ -113,6 +115,13 @@ public: void compose ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual ObjectBase * normalize ( ) && { + std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) ); + std::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( m_string ) ); + + return new SuffixArray ( std::move ( alphabet ), std::move ( m_data ), std::move ( string ) ); + } }; } /* namespace stringology */ diff --git a/alib2data/src/indexes/stringology/SuffixTrie.h b/alib2data/src/indexes/stringology/SuffixTrie.h index ed2123bc0a..a029a10281 100644 --- a/alib2data/src/indexes/stringology/SuffixTrie.h +++ b/alib2data/src/indexes/stringology/SuffixTrie.h @@ -35,6 +35,9 @@ #include <primitive/Unsigned.h> +#include <alphabet/common/SymbolNormalize.h> +#include <indexes/common/IndexesNormalize.h> + namespace indexes { namespace stringology { @@ -118,6 +121,13 @@ public: void compose ( std::deque < sax::Token > & out ) const; virtual alib::ObjectBase * inc ( ) &&; + + virtual SuffixTrie < > * normalize ( ) && { + std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) ); + std::trie < DefaultSymbolType, std::variant < void, unsigned > > trie = IndexesNormalize::normalizeTrie ( std::move ( m_trie ) ); + + return new SuffixTrie < > ( std::move ( alphabet ), std::move ( trie ) ); + } }; } /* namespace stringology */ -- GitLab