diff --git a/alib2algo/src/tree/properties/ExactSubtreeRepeats.h b/alib2algo/src/tree/properties/ExactSubtreeRepeats.h index 8de0d9fd9267919461344bb5e9c9c61d460582ca..3f82debc7c2ad2eeae3a32ebcdb444cc2eb1d6e6 100644 --- a/alib2algo/src/tree/properties/ExactSubtreeRepeats.h +++ b/alib2algo/src/tree/properties/ExactSubtreeRepeats.h @@ -36,13 +36,40 @@ namespace properties { */ class ExactSubtreeRepeats : public std::SingleDispatch < ExactSubtreeRepeats, tree::Tree, const tree::TreeBase & > { + /** + * A nested class to hold and efficiently pass auxiliary arrays + */ class ExactSubtreeRepeatsAux { + + /** + * Constructs array mu + * mu is a mapping from a ranked symbol to a number + * @param symbols The tree in postfix notation + */ template < class SymbolType, class RankType > void buildMu ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); + + /** + * Constructs Parrent array P + * P[i] stores the index of the parent node for node i + * @param symbols The tree in postfix notation + */ template < class SymbolType, class RankType > void buildP ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); + + /** + * Constructs array H + * H[i] stores height of the node i + * @param symbols The tree in postfix notation + */ template < class SymbolType, class RankType > void buildH ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); + + /** + * Constructs array FC + * FC[i] is true if node i is the first child of its parent + * @param symbols The tree in postfix notation + */ template < class SymbolType, class RankType > void buildFC ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ); @@ -52,18 +79,39 @@ class ExactSubtreeRepeats : public std::SingleDispatch < ExactSubtreeRepeats, tr std::vector < unsigned > P; std::vector < unsigned > H; std::vector < bool > FC; - std::vector < unsigned > T; - std::vector < unsigned > TL; - std::vector < std::queue < std::tuple < std::deque < unsigned >, unsigned, int > > > LA; - std::list < std::tuple < std::deque < unsigned >, unsigned, int > > found_repeats; - unsigned alphabetSize; - unsigned treeSize; - unsigned sc; + std::vector < unsigned > T; /**< Stores ID of the last s-repeat found at this node. */ + std::vector < unsigned > TL; /**< Complements array T. For every i in TL[i] stores length of s-repeat at T[i]. */ + std::vector < std::queue < std::tuple < std::deque < unsigned >, unsigned, int > > > LA; /**< Level array. At index i stores repeats scheduled for processing at height i. */ + std::list < std::tuple < std::deque < unsigned >, unsigned, int > > found_repeats; /**< Stores found repeats to be accessed later. */ + unsigned alphabetSize; /**< Number of unique ranked symbols. */ + unsigned treeSize; /**< Number of nodes in a tree. */ + unsigned sc; /**< Tracks IDs of subtree repeats. */ }; + /** + * Starting point of the algorithm + * + * @param symbols The tree in postfix notation + * @param res Array reference to store the result + */ template < class SymbolType, class RankType > static void repeatsPostfixRanked ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < unsigned, RankType > > & res ); + + /** + * Assigns repeat triplet to the next height + * Checks whether some of the nodes in triplet->S can define subtrees on heigher levels of the tree. + * @param triplet Triplet to process + * @param aux Reference to auxiliary structures + */ static void assignLevel ( std::tuple < std::deque < unsigned >, unsigned, int > triplet, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux ); + + /** + * Tries to expand triplet (S, l, ac) until it represents a subtree + * + * @param triplet Triplet to process + * @param symbols The tree in postfix notation + * @param aux Reference to auxiliary structures + */ template < class SymbolType, class RankType > static void partition ( std::tuple < std::deque < unsigned >, unsigned, int > triplet, const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux ); @@ -104,7 +152,7 @@ ExactSubtreeRepeats::ExactSubtreeRepeatsAux::ExactSubtreeRepeatsAux ( const std: template < class SymbolType, class RankType > void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildMu ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ) { - // Build mapping mu_map((Symb, Rank) -> Number) + // Build mapping mu_map((Symb, Rank) -> Number) and construct array mu from it std::map < std::pair < SymbolType, RankType >, unsigned > mu_map; this->alphabetSize = 0; @@ -136,7 +184,7 @@ void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildMu ( const std::vector < template < class SymbolType, class RankType > void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildP ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols ) { - // Build parrent array + // Build parent array P = std::vector < unsigned > ( this->treeSize - 1 ); std::stack < unsigned > RP; @@ -263,6 +311,10 @@ void ExactSubtreeRepeats::assignLevel ( std::tuple < std::deque < unsigned >, un aux.LA[k].push ( std::make_tuple ( An[k], l, 0 ) ); Bn[k] = false; + /* This is line 15 (in paper) in the Assign-Level algorithm + * It has no effect here, as I made "An" local and the function ends right after the while loop. + * I leave it here to be consistent with the algorithm. + */ while ( !An[k].empty ( ) ) An[k].pop_front ( ); } @@ -316,6 +368,11 @@ void ExactSubtreeRepeats::partition ( std::tuple < std::deque < unsigned >, unsi unsigned k = Q1.front ( ); Q1.pop ( ); Q3.push ( En[k] ); + + /* The next two lines can be safely removed (Partition(), lines 23-24 in the paper) + * The variables are local and won't be used in the function again. + * Leaving them here to show every step of the algorithm. + */ En[k] = std::tuple < std::deque < unsigned >, unsigned, int > ( ); Bn[k] = false; } @@ -324,6 +381,11 @@ void ExactSubtreeRepeats::partition ( std::tuple < std::deque < unsigned >, unsi unsigned k = Q2.front ( ); Q2.pop ( ); Q3.push ( Es[k] ); + + /* The next two lines can be safely removed (Partition(), lines 28-29 in the paper) + * The variables are local and won't be used in the function again. + * Leaving them here to show every step of the algorithm. + */ Es[k] = std::tuple < std::deque < unsigned >, unsigned, int > ( ); Bs[k] = false; } @@ -416,7 +478,12 @@ void ExactSubtreeRepeats::repeatsPostfixRanked ( const std::vector < std::ranked aux.LA[i].pop ( ); } - // prepare result + /* Prepare result : + * we have collected the triplets at this point and + * need to build a postfix representation of a tree from them. + * S-repeats IDs will be used as node-labels for the root (index_from_S + l) + * of each subtree. + */ std::vector < unsigned > post_repeats ( aux.treeSize ); unsigned curr_repeat = aux.found_repeats.size ( );