diff --git a/alib2algo/src/tree/properties/ExactSubtreeRepeats.cpp b/alib2algo/src/tree/properties/ExactSubtreeRepeats.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dbf6c3cd590029b9b0effb7efca9016700e8abef --- /dev/null +++ b/alib2algo/src/tree/properties/ExactSubtreeRepeats.cpp @@ -0,0 +1,23 @@ +/* + * ExactSubtreeRepeats.cpp + * + * Created on: 5. 5. 2017 + * Author: Aleksandr Shatrovskii + */ + +#include "ExactSubtreeRepeats.h" +#include <tree/Tree.h> + +namespace tree { + +namespace properties { + +tree::Tree ExactSubtreeRepeats::repeats ( const tree::Tree & tree ) { + return dispatch ( tree.getData ( ) ); +} + +auto ExactRepeatsPostfixRankedTree = ExactSubtreeRepeats::RegistratorWrapper < tree::PostfixRankedTree < unsigned, DefaultRankType >, tree::PostfixRankedTree < > > ( ExactSubtreeRepeats::repeats ); + +} /* namespace properties */ + +} /* namespace tree */ diff --git a/alib2algo/src/tree/properties/ExactSubtreeRepeats.h b/alib2algo/src/tree/properties/ExactSubtreeRepeats.h new file mode 100644 index 0000000000000000000000000000000000000000..8de0d9fd9267919461344bb5e9c9c61d460582ca --- /dev/null +++ b/alib2algo/src/tree/properties/ExactSubtreeRepeats.h @@ -0,0 +1,458 @@ +/* + * ExactSubtreeRepeats.h + * + * Created on: 5. 5. 2017 + * Author: Aleksandr Shatrovskii + */ + +#ifndef _ARBOLOGY_SUBTREE_REPEATS_H_ +#define _ARBOLOGY_SUBTREE_REPEATS_H_ + +#include <alphabet/SymbolFeatures.h> +#include <core/multipleDispatch.hpp> +#include <tree/TreeFeatures.h> + +#include <alphabet/RankedSymbol.h> +#include <deque> +#include <map> +#include <primitive/Unsigned.h> +#include <queue> +#include <stack> +#include <tree> +#include <tuple> +#include <vector> + +#include "SubtreeJumpTable.h" + +#include <global/GlobalData.h> +#include <tree/ranked/PostfixRankedTree.h> + +namespace tree { + +namespace properties { + +/** + * Dynamic computation of subtree repeats + */ +class ExactSubtreeRepeats : public std::SingleDispatch < ExactSubtreeRepeats, tree::Tree, const tree::TreeBase & > { + + class ExactSubtreeRepeatsAux { + template < class SymbolType, class RankType > + void buildMu ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); + template < class SymbolType, class RankType > + void buildP ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); + template < class SymbolType, class RankType > + void buildH ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); + template < class SymbolType, class RankType > + void buildFC ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); + + public: + template < class SymbolType, class RankType > ExactSubtreeRepeatsAux ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); + std::vector < unsigned > mu; + std::vector < unsigned > P; + std::vector < unsigned > H; + std::vector < bool > FC; + std::vector < unsigned > T; + std::vector < unsigned > TL; + std::vector < std::queue < std::tuple < std::deque < unsigned >, unsigned, int > > > LA; + std::list < std::tuple < std::deque < unsigned >, unsigned, int > > found_repeats; + unsigned alphabetSize; + unsigned treeSize; + unsigned sc; + }; + + template < class SymbolType, class RankType > + static void repeatsPostfixRanked ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < unsigned, RankType > > & res ); + static void assignLevel ( std::tuple < std::deque < unsigned >, unsigned, int > triplet, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux ); + template < class SymbolType, class RankType > + static void partition ( std::tuple < std::deque < unsigned >, unsigned, int > triplet, const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux ); + +public: + /** + * Compute a same shaped tree with nodes containing unique subtree ids. + * @return Tree of repeats + */ + static tree::Tree repeats ( const tree::Tree & pattern ); + + /** + * Compute a same shaped tree with nodes containing unique subtree ids. + * @return Tree of repeats + */ + template < class SymbolType, class RankType > + static tree::PostfixRankedTree < unsigned, RankType > repeats ( const tree::PostfixRankedTree < SymbolType, RankType > & tree ); +}; + +template < class SymbolType, class RankType > +ExactSubtreeRepeats::ExactSubtreeRepeatsAux::ExactSubtreeRepeatsAux ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ) { + + sc = 0; + this->treeSize = symbols.size ( ); + buildMu ( symbols ); + buildP ( symbols ); + buildH ( symbols ); + buildFC ( symbols ); + this->T = std::vector < unsigned > ( symbols.size ( ) ); + this->TL = std::vector < unsigned > ( symbols.size ( ) ); + this->LA = std::vector < std::queue < std::tuple < std::deque < unsigned >, unsigned, int > > > ( this->H.back ( ) + 1 ); + + if ( common::GlobalData::verbose ) { + std::clog << "Alphabet size set to " << alphabetSize << std::endl; + std::clog << "Tree size set to " << this->treeSize << std::endl; + std::clog << "Auxiliary structures computed ! " << std::endl; + } +} + +template < class SymbolType, class RankType > +void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildMu ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ) { + // Build mapping mu_map((Symb, Rank) -> Number) + std::map < std::pair < SymbolType, RankType >, unsigned > mu_map; + this->alphabetSize = 0; + + for ( auto it = symbols.begin ( ); it != symbols.end ( ); it++ ) { + auto search = mu_map.find ( make_pair ( it->getSymbol ( ), it->getRank ( ) ) ); + + if ( search == mu_map.end ( ) ) { + mu_map.insert ( std::make_pair ( std::make_pair ( it->getSymbol ( ), it->getRank ( ) ), this->alphabetSize ) ); + mu.push_back ( this->alphabetSize ); + this->alphabetSize += 1; + } else { + mu.push_back ( search->second ); + } + } + + // Test mu_map + if ( common::GlobalData::verbose ) { + for ( auto it = mu_map.begin ( ); it != mu_map.end ( ); it++ ) + std::clog << "map: " << it->first << " -> " << it->second << std::endl; + + std::clog << "mu : "; + + for ( auto it : mu ) + std::clog << it << " "; + + std::clog << std::endl; + } +} + +template < class SymbolType, class RankType > +void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildP ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ) { + // Build parrent array + P = std::vector < unsigned > ( this->treeSize - 1 ); + std::stack < unsigned > RP; + + for ( unsigned i = 0; i < symbols.size ( ); ++i ) { + for ( unsigned j = 0; j < ( unsigned ) ( symbols[i].getRank ( ) ); ++j ) { + P[RP.top ( )] = i; + RP.pop ( ); + } + + RP.push ( i ); + } + + // Test parents + if ( common::GlobalData::verbose ) { + std::clog << " P : "; + + for ( auto it = P.begin ( ); it != P.end ( ); it++ ) std::clog << * it << " "; + + std::clog << std::endl; + } +} + +template < class SymbolType, class RankType > +void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildH ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ) { + // Build height array + H = std::vector < unsigned > ( this->treeSize ); + std::stack < unsigned > RH; + + for ( unsigned i = 0; i < symbols.size ( ); ++i ) { + if ( ( unsigned ) ( symbols[i].getRank ( ) ) == 0 ) { + RH.push ( 0 ); + H[i] = 0; + } else { + unsigned r = 0; + + for ( unsigned j = 0; j < ( unsigned ) ( symbols[i].getRank ( ) ); ++j ) { + unsigned p = RH.top ( ); + + if ( p > r ) + r = p; + + RH.pop ( ); + } + + H[i] = r + 1; + RH.push ( r + 1 ); + } + } + + // Test heights + if ( common::GlobalData::verbose ) { + std::clog << " H : "; + + for ( auto it = H.begin ( ); it != H.end ( ); it++ ) std::clog << * it << " "; + + std::clog << std::endl; + } +} + +template < class SymbolType, class RankType > +void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildFC ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ) { + // Build First child array + FC = std::vector < bool > ( this->treeSize - 1 ); + std::stack < unsigned > RFC; + + for ( unsigned i = 0; i < symbols.size ( ); ++i ) { + if ( ( unsigned ) ( symbols[i].getRank ( ) ) == 0 ) { + RFC.push ( i ); + } else { + for ( unsigned j = 0; j < ( ( unsigned ) ( symbols[i].getRank ( ) ) ) - 1; ++j ) { + unsigned r = RFC.top ( ); + RFC.pop ( ); + FC[r] = false; + } + + unsigned r = RFC.top ( ); + RFC.pop ( ); + FC[r] = true; + RFC.push ( i ); + } + } + + // Test First child + if ( common::GlobalData::verbose ) { + std::clog << "FC : "; + + for ( auto it = FC.begin ( ); it != FC.end ( ); it++ ) std::clog << * it << " "; + + std::clog << std::endl; + } +} + +void ExactSubtreeRepeats::assignLevel ( std::tuple < std::deque < unsigned >, unsigned, int > triplet, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux ) { + + std::queue < unsigned > Q4; + std::deque < unsigned > S; + unsigned l; + int ac; + tie ( S, l, ac ) = triplet; + std::vector < std::deque < unsigned > > An ( aux.treeSize ); + std::vector < bool > Bn ( aux.treeSize ); + + while ( !S.empty ( ) ) { + unsigned i = S.front ( ); + S.pop_front ( ); + unsigned root = i + l - 1; + + if ( root < aux.treeSize - 1 ) + if ( aux.FC[root] == true ) { + unsigned k = aux.H[aux.P[root]]; + An[k].push_back ( i ); + + if ( Bn[k] == false ) { + Bn[k] = true; + Q4.push ( k ); + } + } + + } + + while ( !Q4.empty ( ) ) { + unsigned k = Q4.front ( ); + Q4.pop ( ); + aux.LA[k].push ( std::make_tuple ( An[k], l, 0 ) ); + Bn[k] = false; + + while ( !An[k].empty ( ) ) + An[k].pop_front ( ); + } +} + +template < class SymbolType, class RankType > +void ExactSubtreeRepeats::partition ( std::tuple < std::deque < unsigned >, unsigned, int > triplet, const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux ) { + + std::queue < unsigned > Q1; + std::queue < unsigned > Q2; + std::queue < std::tuple < std::deque < unsigned >, unsigned, int > > Q3; + + std::vector < bool > Bn ( symbols.size ( ) ); + std::vector < bool > Bs ( aux.alphabetSize ); + std::vector < std::tuple < std::deque < unsigned >, unsigned, int > > En ( symbols.size ( ) ); + std::vector < std::tuple < std::deque < unsigned >, unsigned, int > > Es ( aux.alphabetSize ); + + std::deque < unsigned > S; + unsigned l; + int ac; + tie ( S, l, ac ) = triplet; + + while ( !S.empty ( ) ) { + unsigned i = S.front ( ); + S.pop_front ( ); + unsigned r = i + l; + + if ( aux.T[r] != 0 ) { + std::get < 0 > ( En[aux.T[r]] ).push_back ( i ); + + if ( Bn[aux.T[r]] == false ) { + Bn[aux.T[r]] = true; + std::get < 1 > ( En[aux.T[r]] ) = l + aux.TL[r]; + std::get < 2 > ( En[aux.T[r]] ) = ac - 1; + Q1.push ( aux.T[r] ); + } + } else { + unsigned v = aux.mu[r]; + std::get < 0 > ( Es[v] ).push_back ( i ); + + if ( Bs[v] == false ) { + Bs[v] = true; + std::get < 1 > ( Es[v] ) = l + 1; + std::get < 2 > ( Es[v] ) = ac + ( unsigned ) symbols[r].getRank ( ) - 1; + Q2.push ( v ); + } + } + } + + while ( !Q1.empty ( ) ) { + unsigned k = Q1.front ( ); + Q1.pop ( ); + Q3.push ( En[k] ); + En[k] = std::tuple < std::deque < unsigned >, unsigned, int > ( ); + Bn[k] = false; + } + + while ( !Q2.empty ( ) ) { + unsigned k = Q2.front ( ); + Q2.pop ( ); + Q3.push ( Es[k] ); + Es[k] = std::tuple < std::deque < unsigned >, unsigned, int > ( ); + Bs[k] = false; + } + + while ( !Q3.empty ( ) ) { + tie ( S, l, ac ) = Q3.front ( ); + Q3.pop ( ); + + if ( ac == 0 ) { + if ( common::GlobalData::verbose ) + std::clog << " ! Repeat : " << S << " " << l << std::endl; + + aux.found_repeats.push_back ( make_tuple ( S, l, ac ) ); + aux.sc += 1; + + for ( unsigned j : S ) { + aux.T[j] = aux.sc; + aux.TL[j] = l; + } + + ExactSubtreeRepeats::assignLevel ( std::tie ( S, l, ac ), aux ); + } else { + ExactSubtreeRepeats::partition ( std::tie ( S, l, ac ), symbols, aux ); + } + } +} + +template < class SymbolType, class RankType > +void ExactSubtreeRepeats::repeatsPostfixRanked ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < unsigned, RankType > > & res ) { + + ExactSubtreeRepeats::ExactSubtreeRepeatsAux aux = ExactSubtreeRepeats::ExactSubtreeRepeatsAux ( symbols ); + + std::vector < std::deque < unsigned > > As ( aux.alphabetSize ); + std::vector < bool > Bs ( aux.alphabetSize ); + std::vector < unsigned > Cs ( aux.alphabetSize ); + std::queue < unsigned > Q5; + + for ( unsigned i = 0; i < aux.treeSize; ++i ) { + if ( ( unsigned ) symbols[i].getRank ( ) == 0 ) { + unsigned k = aux.mu[i]; + + if ( Bs[k] == false ) { + Bs[k] = true; + Q5.push ( k ); + } + + As[k].push_back ( i ); + + if ( Cs[k] == 0 ) { + aux.sc += 1; + Cs[k] = aux.sc; + } + + aux.T[i] = Cs[k]; + aux.TL[i] = 1; + } else { + aux.T[i] = 0; + aux.TL[i] = 0; + } + } + + // Check As contents + if ( common::GlobalData::verbose ) { + std::clog << "One node repeats (As): "; + + for ( unsigned i = 0; i < As.size ( ); ++i ) + if ( !As[i].empty ( ) ) + std::clog << i << ":" << As[i] << " "; + + std::clog << std::endl; + } + + while ( !Q5.empty ( ) ) { + unsigned k = Q5.front ( ); + Q5.pop ( ); + Bs[k] = 0; + + if ( common::GlobalData::verbose ) + std::clog << " ! Repeat : " << As[k] << " " << 1 << std::endl; + + aux.found_repeats.push_back ( make_tuple ( As[k], 1, 0 ) ); + unsigned l = 1; + int ac = 0; + ExactSubtreeRepeats::assignLevel ( std::tie ( As[k], l, ac ), aux ); + } + + for ( unsigned i = 1; i <= aux.H.back ( ); i++ ) + while ( !aux.LA[i].empty ( ) ) { + ExactSubtreeRepeats::partition ( aux.LA[i].front ( ), symbols, aux ); + aux.LA[i].pop ( ); + } + + // prepare result + std::vector < unsigned > post_repeats ( aux.treeSize ); + + unsigned curr_repeat = aux.found_repeats.size ( ); + + for ( auto it = aux.found_repeats.rbegin ( ); it != aux.found_repeats.rend ( ); ++it ) { + for ( unsigned s : std::get < 0 > ( * it ) ) + post_repeats[s + std::get < 1 > ( * it ) - 1] = curr_repeat; + + curr_repeat -= 1; + } + + if ( common::GlobalData::verbose ) { + std::clog << "Repeat postfix string : "; + + for ( unsigned i = 0; i < post_repeats.size ( ); ++i ) + std::clog << post_repeats[i] << " "; + + std::clog << std::endl; + } + + for ( unsigned i = 0; i < aux.treeSize; i++ ) + res.push_back ( std::ranked_symbol < unsigned, RankType > ( post_repeats[i], symbols[i].getRank ( ) ) ); +} + +template < class SymbolType, class RankType > +tree::PostfixRankedTree < unsigned, RankType > ExactSubtreeRepeats::repeats ( const tree::PostfixRankedTree < SymbolType, RankType > & tree ) { + + std::vector < std::ranked_symbol < unsigned, RankType > > res; + + repeatsPostfixRanked ( tree.getContent ( ), res ); + + return tree::PostfixRankedTree < unsigned, RankType > ( res ); +} + +} /* namespace properties */ + +} /* namespace tree */ + +#endif /* _ARBOLOGY_SUBTREE_REPEATS_H_ */