From 53e3ece92e01224865a971e63a7b4f03a9d95853 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Fri, 13 Jan 2017 10:55:16 +0100 Subject: [PATCH] template subtree repeats naive algorithm --- .../src/arbology/exact/BoyerMooreHorspool.h | 4 +- .../ExactNonlinearTreePatternAutomaton.cpp | 20 +-- .../ExactNonlinearTreePatternAutomaton.h | 3 +- .../src/arbology/exact/ExactPatternMatch.h | 24 ++-- .../exact/ReversedBoyerMooreHorspool.h | 8 +- .../properties/ExactSubtreeRepeatsNaive.cpp | 98 +------------- .../properties/ExactSubtreeRepeatsNaive.h | 122 +++++++++++++++++- alib2data/src/alphabet/ranked_symbol.hpp | 2 +- 8 files changed, 147 insertions(+), 134 deletions(-) diff --git a/alib2algo/src/arbology/exact/BoyerMooreHorspool.h b/alib2algo/src/arbology/exact/BoyerMooreHorspool.h index 67ad272db2..9f4cd18746 100644 --- a/alib2algo/src/arbology/exact/BoyerMooreHorspool.h +++ b/alib2algo/src/arbology/exact/BoyerMooreHorspool.h @@ -101,9 +101,9 @@ std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTre std::set < unsigned > occ; std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; + std::map < std::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; - tree::PrefixRankedBarTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject unsigned i = 0; diff --git a/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.cpp b/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.cpp index ce4eab5e36..a8125ff4e7 100644 --- a/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.cpp +++ b/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.cpp @@ -24,9 +24,9 @@ automaton::Automaton ExactNonlinearTreePatternAutomaton::construct ( const tree: return dispatch ( tree.getData ( ), subtreeWildcard, nonlinearVariables ); } -void ExactNonlinearTreePatternAutomaton::constructTail ( automaton::InputDrivenNPDA < > & res, const tree::PrefixRankedTree < > & tree, const DefaultSymbolType & subtreeWildcard, const DefaultSymbolType & currentNonlinearVariable, const std::set < DefaultSymbolType > & nonlinearVariables, const std::ranked_symbol < > & subtreeSettings, unsigned subtreeId, std::vector < std::ranked_symbol < > >::const_iterator rankedSymbolsIter, int i, std::vector < std::ranked_symbol < > >::const_iterator subtreeRepeatsIter ) { +void ExactNonlinearTreePatternAutomaton::constructTail ( automaton::InputDrivenNPDA < > & res, const tree::PrefixRankedTree < > & tree, const DefaultSymbolType & subtreeWildcard, const DefaultSymbolType & currentNonlinearVariable, const std::set < DefaultSymbolType > & nonlinearVariables, const std::ranked_symbol < unsigned, DefaultRankType > & subtreeSettings, unsigned subtreeId, std::vector < std::ranked_symbol < > >::const_iterator rankedSymbolsIter, int i, std::vector < std::ranked_symbol < unsigned, DefaultRankType > >::const_iterator subtreeRepeatsIter ) { std::deque < std::pair < size_t, int > > subtreeJumps; - std::deque < std::ranked_symbol < > > subtreeRepeatsStack; + std::deque < std::ranked_symbol < unsigned, DefaultRankType > > subtreeRepeatsStack; for (++ rankedSymbolsIter, ++ subtreeRepeatsIter, ++i; rankedSymbolsIter != tree.getContent ( ).end ( ); ++ rankedSymbolsIter, ++ subtreeRepeatsIter, ++i ) { DefaultSymbolType symbol ( alphabet::RankedSymbol < > { * rankedSymbolsIter } ); @@ -62,13 +62,7 @@ automaton::InputDrivenNPDA < > ExactNonlinearTreePatternAutomaton::constructInte DefaultSymbolType S = DefaultSymbolType ( 'S' ); automaton::InputDrivenNPDA < > res ( DefaultStateType ( 0 ), S ); - tree::PrefixRankedTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( tree ); - std::map < DefaultSymbolType, unsigned > repeatsToIds; - int maxId = 0; - - for ( const std::ranked_symbol < > & repeat : repeats.getContent ( ) ) - if ( !repeatsToIds.count ( repeat.getSymbol ( ) ) ) - repeatsToIds.insert ( std::make_pair ( repeat.getSymbol ( ), maxId++ ) ); + tree::PrefixRankedTree < unsigned, DefaultRankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( tree ); for ( const std::ranked_symbol < > & rankedSymbol : tree.getAlphabet ( ) ) { DefaultSymbolType symbol ( alphabet::RankedSymbol < > { rankedSymbol } ); @@ -86,10 +80,10 @@ automaton::InputDrivenNPDA < > ExactNonlinearTreePatternAutomaton::constructInte int i = 1; std::deque < std::pair < size_t, int > > subtreeJumps; - std::deque < std::ranked_symbol < > > subtreeRepeatsStack; + std::deque < std::ranked_symbol < unsigned, DefaultRankType > > subtreeRepeatsStack; std::vector < std::ranked_symbol < > >::const_iterator rankedSymbolsIter; - std::vector < std::ranked_symbol < > >::const_iterator subtreeRepeatsIter; + std::vector < std::ranked_symbol < unsigned, DefaultRankType > >::const_iterator subtreeRepeatsIter; for ( rankedSymbolsIter = tree.getContent ( ).begin(), subtreeRepeatsIter = repeats.getContent ( ).begin ( ); rankedSymbolsIter != tree.getContent ( ).end ( ); ++ rankedSymbolsIter, ++ subtreeRepeatsIter, ++ i ) { DefaultSymbolType symbol ( alphabet::RankedSymbol < > { * rankedSymbolsIter } ); subtreeJumps.push_back ( std::make_pair ( ( size_t ) rankedSymbolsIter->getRank ( ), i - 1 ) ); @@ -111,8 +105,8 @@ automaton::InputDrivenNPDA < > ExactNonlinearTreePatternAutomaton::constructInte if ( nonlinearVariable != currentNonlinearVariable ) res.addTransition ( jumpSource, nonlinearVariable, currentState ); else { - std::ranked_symbol < > subtreeSettings = subtreeRepeatsStack.back ( ); - unsigned subtreeId = repeatsToIds.find ( subtreeSettings.getSymbol ( ) )->second; + const std::ranked_symbol < unsigned, DefaultRankType > & subtreeSettings = subtreeRepeatsStack.back ( ); + unsigned subtreeId = subtreeSettings.getSymbol ( ); DefaultStateType targetState = DefaultStateType ( i, subtreeId ); res.addState ( targetState ); diff --git a/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.h b/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.h index d8689c35e4..43fa3879ef 100644 --- a/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.h +++ b/alib2algo/src/arbology/exact/ExactNonlinearTreePatternAutomaton.h @@ -11,6 +11,7 @@ #include <automaton/AutomatonFeatures.h> #include <tree/TreeFeatures.h> #include <core/multipleDispatch.hpp> +#include <alphabet/Symbol.h> #include <alphabet/RankedSymbol.h> #include <vector> @@ -22,7 +23,7 @@ namespace exact { class ExactNonlinearTreePatternAutomaton : public std::SingleDispatch < ExactNonlinearTreePatternAutomaton, automaton::Automaton, const tree::TreeBase &, const DefaultSymbolType &, const std::set < DefaultSymbolType > & > { static automaton::InputDrivenNPDA < > constructInternal ( const tree::PrefixRankedTree < > & tree, const DefaultSymbolType & subtreeWildcard, const DefaultSymbolType & currentNonlinearVariable, const std::set < DefaultSymbolType > & nonlinearVariables ); - static void constructTail ( automaton::InputDrivenNPDA < > & res, const tree::PrefixRankedTree < > & tree, const DefaultSymbolType & subtreeWildcard, const DefaultSymbolType & currentNonlinearVariable, const std::set < DefaultSymbolType > & nonlinearVariables, const std::ranked_symbol < > & subtreeSettings, unsigned subtreeId, std::vector < std::ranked_symbol < > >::const_iterator rankedSymbolsIter, int i, std::vector < std::ranked_symbol < > >::const_iterator subtreeRepeatsIter ); + static void constructTail ( automaton::InputDrivenNPDA < > & res, const tree::PrefixRankedTree < > & tree, const DefaultSymbolType & subtreeWildcard, const DefaultSymbolType & currentNonlinearVariable, const std::set < DefaultSymbolType > & nonlinearVariables, const std::ranked_symbol < unsigned, DefaultRankType > & subtreeSettings, unsigned subtreeId, std::vector < std::ranked_symbol < > >::const_iterator rankedSymbolsIter, int i, std::vector < std::ranked_symbol < unsigned, DefaultRankType > >::const_iterator subtreeRepeatsIter ); public: /** diff --git a/alib2algo/src/arbology/exact/ExactPatternMatch.h b/alib2algo/src/arbology/exact/ExactPatternMatch.h index fb48d9e246..01a6fd3373 100644 --- a/alib2algo/src/arbology/exact/ExactPatternMatch.h +++ b/alib2algo/src/arbology/exact/ExactPatternMatch.h @@ -70,14 +70,14 @@ private: template < class SymbolType, class RankType > static bool matchHelper ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable ); template < class SymbolType, class RankType > - static bool matchHelper ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable, const std::set < std::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const std::tree < std::ranked_symbol < SymbolType, RankType > > & repeats, std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > & variablesSetting ); + static bool matchHelper ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable, const std::set < std::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const std::tree < std::ranked_symbol < unsigned, RankType > > & repeats, std::map < std::ranked_symbol < SymbolType, RankType >, unsigned > & variablesSetting ); template < class SymbolType > static void matchInternal ( unsigned & index, std::set < unsigned > & occ, const std::tree < SymbolType > & subject, const std::tree < SymbolType > & pattern, const SymbolType & subtreeVariable ); template < class SymbolType, class RankType > static void matchInternal ( unsigned & index, std::set < unsigned > & occ, const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable ); template < class SymbolType, class RankType > - static void matchInternal ( unsigned & index, std::set < unsigned > & occ, const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable, const std::set < std::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const std::tree < std::ranked_symbol < SymbolType, RankType > > & subjectRepeats ); + static void matchInternal ( unsigned & index, std::set < unsigned > & occ, const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable, const std::set < std::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const std::tree < std::ranked_symbol < unsigned, RankType > > & subjectRepeats ); }; @@ -109,7 +109,7 @@ bool ExactPatternMatch::matchHelper ( const std::tree < std::ranked_symbol < Sym } template < class SymbolType, class RankType > -bool ExactPatternMatch::matchHelper ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable, const std::set < std::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const std::tree < std::ranked_symbol < SymbolType, RankType > > & repeats, std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > & variablesSetting ) { +bool ExactPatternMatch::matchHelper ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable, const std::set < std::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const std::tree < std::ranked_symbol < unsigned, RankType > > & repeats, std::map < std::ranked_symbol < SymbolType, RankType >, unsigned > & variablesSetting ) { if ( pattern.getData ( ) == subtreeVariable ) return true; if ( nonlinearVariables.count ( pattern.getData ( ) ) ) { @@ -125,7 +125,7 @@ bool ExactPatternMatch::matchHelper ( const std::tree < std::ranked_symbol < Sym if ( subject.getData ( ) != pattern.getData ( ) ) return false; // ranked symbols are the same; test for number of children is not needed - for ( const std::tuple < const std::tree < std::ranked_symbol < SymbolType, RankType > >, const std::tree < std::ranked_symbol < SymbolType, RankType > >, const std::tree < std::ranked_symbol < SymbolType, RankType > > > & childs : std::make_tuple_foreach ( subject.getChildren ( ), pattern.getChildren ( ), repeats.getChildren ( ) ) ) + for ( const std::tuple < const std::tree < std::ranked_symbol < SymbolType, RankType > >, const std::tree < std::ranked_symbol < SymbolType, RankType > >, const std::tree < std::ranked_symbol < unsigned, RankType > > > & childs : std::make_tuple_foreach ( subject.getChildren ( ), pattern.getChildren ( ), repeats.getChildren ( ) ) ) if ( !matchHelper ( std::get < 0 > ( childs ), std::get < 1 > ( childs ), subtreeVariable, nonlinearVariables, std::get < 2 > ( childs ), variablesSetting ) ) return false; return true; @@ -152,14 +152,14 @@ void ExactPatternMatch::matchInternal ( unsigned & index, std::set < unsigned > } template < class SymbolType, class RankType > -void ExactPatternMatch::matchInternal ( unsigned & index, std::set < unsigned > & occ, const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable, const std::set < std::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const std::tree < std::ranked_symbol < SymbolType, RankType > > & repeats ) { - std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; +void ExactPatternMatch::matchInternal ( unsigned & index, std::set < unsigned > & occ, const std::tree < std::ranked_symbol < SymbolType, RankType > > & subject, const std::tree < std::ranked_symbol < SymbolType, RankType > > & pattern, const std::ranked_symbol < SymbolType, RankType > & subtreeVariable, const std::set < std::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const std::tree < std::ranked_symbol < unsigned, RankType > > & repeats ) { + std::map < std::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; if ( matchHelper ( subject, pattern, subtreeVariable, nonlinearVariables, repeats, variablesSetting ) ) occ.insert ( index ); index++; - for ( const std::tuple < const std::tree < std::ranked_symbol < SymbolType, RankType > >, const std::tree < std::ranked_symbol < SymbolType, RankType > > > & childs : std::make_tuple_foreach ( subject.getChildren ( ), repeats.getChildren ( ) ) ) + for ( const std::tuple < const std::tree < std::ranked_symbol < SymbolType, RankType > >, const std::tree < std::ranked_symbol < unsigned, RankType > > > & childs : std::make_tuple_foreach ( subject.getChildren ( ), repeats.getChildren ( ) ) ) matchInternal ( index, occ, std::get < 0 > ( childs ), pattern, subtreeVariable, nonlinearVariables, std::get < 1 > ( childs ) ); } @@ -186,7 +186,7 @@ std::set < unsigned > ExactPatternMatch::match ( const tree::RankedTree < Symbol unsigned i = 0; std::set < unsigned > occ; - tree::RankedTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + tree::RankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); matchInternal ( i, occ, subject.getContent ( ), pattern.getContent ( ), pattern.getSubtreeWildcard ( ), pattern.getNonlinearVariables ( ), repeats.getContent ( ) ); return occ; @@ -221,9 +221,9 @@ std::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedTree < template < class SymbolType, class RankType > std::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) { std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; + std::map < std::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; - tree::PrefixRankedTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); std::set < unsigned > occ; @@ -287,9 +287,9 @@ std::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedBarTree template < class SymbolType, class RankType > std::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; + std::map < std::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; - tree::PrefixRankedBarTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); std::set < unsigned > occ; diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h index 35c3492793..bc5808d6b5 100644 --- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h +++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h @@ -110,9 +110,9 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRank std::set < unsigned > occ; std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; + std::map < std::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; - tree::PrefixRankedBarTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; @@ -213,9 +213,9 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRank std::set < unsigned > occ; std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; + std::map < std::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting; - tree::PrefixRankedTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); // index to the subject int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; diff --git a/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.cpp b/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.cpp index aadee252e2..89c35c81c5 100644 --- a/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.cpp +++ b/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.cpp @@ -6,13 +6,7 @@ */ #include "ExactSubtreeRepeatsNaive.h" -#include "SubtreeJumpTable.h" - -#include <tree/ranked/RankedTree.h> -#include <tree/ranked/PrefixRankedTree.h> -#include <tree/ranked/PrefixRankedBarTree.h> #include <tree/Tree.h> -#include <global/GlobalData.h> namespace tree { @@ -22,95 +16,9 @@ tree::Tree ExactSubtreeRepeatsNaive::repeats ( const tree::Tree & tree ) { return dispatch ( tree.getData ( ) ); } -std::tree < std::ranked_symbol < > > ExactSubtreeRepeatsNaive::repeats ( const std::tree < std::ranked_symbol < > > & node, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId ) { - std::vector < std::tree < std::ranked_symbol < > > > children; - std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > > childRepeatsKey ( node.getData ( ), std::vector < std::ranked_symbol < > > ( ) ); - - for ( const std::tree < std::ranked_symbol < > > & child : node.getChildren() ) { - children.push_back ( repeats ( child, data, minId ) ); - childRepeatsKey.second.push_back ( children.back ( ).getData ( ) ); - } - - int & uniqueRepeatId = data[childRepeatsKey]; - - if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; - - return std::tree < std::ranked_symbol < > > ( std::ranked_symbol < > ( DefaultSymbolType ( uniqueRepeatId ), node.getData ( ).getRank ( ) ), std::move ( children ) ); -} - -tree::RankedTree < > ExactSubtreeRepeatsNaive::repeats ( const tree::RankedTree < > & tree ) { - int minId = 1; - std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > data; - - return tree::RankedTree < > ( repeats ( tree.getContent ( ), data, minId ) ); -} - -auto ExactRepeatsNaiveRankedTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::RankedTree < >, tree::RankedTree < > > ( ExactSubtreeRepeatsNaive::repeats ); - -std::ranked_symbol < > ExactSubtreeRepeatsNaive::repeatsPrefixRanked ( const std::vector < std::ranked_symbol < > > & symbols, std::vector < std::ranked_symbol < > > & res, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId, int & index ) { - int begin = index; - std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > > childRepeatsKey ( symbols[begin], std::vector < std::ranked_symbol < > > ( ) ); - - res.push_back ( std::ranked_symbol < > ( DefaultSymbolType ( 0 ), symbols[begin].getRank ( ) ) ); - - index++; - - for ( unsigned i = 0; i < ( unsigned ) symbols[begin].getRank ( ); ++i ) - childRepeatsKey.second.push_back ( repeatsPrefixRanked ( symbols, res, data, minId, index ) ); - - int & uniqueRepeatId = data[childRepeatsKey]; - - if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; - - res[begin] = std::ranked_symbol < > ( DefaultSymbolType ( uniqueRepeatId ), symbols[begin].getRank ( ) ); - return res[begin]; -} - -tree::PrefixRankedTree < > ExactSubtreeRepeatsNaive::repeats ( const tree::PrefixRankedTree < > & tree ) { - int minId = 1; - int index = 0; - std::vector < std::ranked_symbol < > > res; - std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > data; - - repeatsPrefixRanked ( tree.getContent ( ), res, data, minId, index ); - return tree::PrefixRankedTree < > ( res ); -} - -auto ExactRepeatsNaivePrefixRankedTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::PrefixRankedTree < >, tree::PrefixRankedTree < > > ( ExactSubtreeRepeatsNaive::repeats ); - -std::ranked_symbol < > ExactSubtreeRepeatsNaive::repeatsPrefixRankedBar ( const std::vector < std::ranked_symbol < > > & symbols, std::vector < std::ranked_symbol < > > & res, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId, int & index ) { - int begin = index; - std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > > childRepeatsKey ( symbols[begin], std::vector < std::ranked_symbol < > > ( ) ); - - res.push_back ( std::ranked_symbol < > ( DefaultSymbolType ( 0 ), symbols[begin].getRank ( ) ) ); - - index++; - - for ( unsigned i = 0; i < ( unsigned ) symbols[begin].getRank ( ); ++i ) - childRepeatsKey.second.push_back ( repeatsPrefixRankedBar ( symbols, res, data, minId, index ) ); - - int & uniqueRepeatId = data[childRepeatsKey]; - - if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; - - res[begin] = std::ranked_symbol < > ( DefaultSymbolType ( uniqueRepeatId ), symbols[begin].getRank ( ) ); - res.push_back ( symbols[index] ); - index++; - - return res[begin]; -} - -tree::PrefixRankedBarTree < > ExactSubtreeRepeatsNaive::repeats ( const tree::PrefixRankedBarTree < > & tree ) { - int minId = 1; - int index = 0; - std::vector < std::ranked_symbol < > > res; - std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > data; - - repeatsPrefixRankedBar ( tree.getContent ( ), res, data, minId, index ); - return tree::PrefixRankedBarTree < > ( tree.getBars ( ), res ); -} - -auto ExactRepeatsNaivePrefixRankedBarTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::PrefixRankedBarTree < >, tree::PrefixRankedBarTree < > > ( ExactSubtreeRepeatsNaive::repeats ); +auto ExactRepeatsNaiveRankedTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::RankedTree < unsigned, DefaultRankType >, tree::RankedTree < > > ( ExactSubtreeRepeatsNaive::repeats ); +auto ExactRepeatsNaivePrefixRankedTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::PrefixRankedTree < unsigned, DefaultRankType >, tree::PrefixRankedTree < > > ( ExactSubtreeRepeatsNaive::repeats ); +auto ExactRepeatsNaivePrefixRankedBarTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::PrefixRankedBarTree < unsigned, DefaultRankType >, tree::PrefixRankedBarTree < > > ( ExactSubtreeRepeatsNaive::repeats ); } /* namespace properties */ diff --git a/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.h b/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.h index 2378926916..994a32c05d 100644 --- a/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.h +++ b/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.h @@ -16,6 +16,14 @@ #include <vector> #include <tree> #include <alphabet/RankedSymbol.h> +#include <primitive/Unsigned.h> + +#include "SubtreeJumpTable.h" + +#include <tree/ranked/RankedTree.h> +#include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedBarTree.h> +#include <global/GlobalData.h> namespace tree { @@ -25,9 +33,12 @@ namespace properties { * Simple computation of subtree repeats */ class ExactSubtreeRepeatsNaive : public std::SingleDispatch < ExactSubtreeRepeatsNaive, tree::Tree, const tree::TreeBase & > { - static std::tree < std::ranked_symbol < > > repeats ( const std::tree < std::ranked_symbol < > > & node, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId ); - static std::ranked_symbol < > repeatsPrefixRanked ( const std::vector < std::ranked_symbol < > > & symbols, std::vector < std::ranked_symbol < > > & res, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId, int & index ); - static std::ranked_symbol < > repeatsPrefixRankedBar ( const std::vector < std::ranked_symbol < > > & symbols, std::vector < std::ranked_symbol < > > & res, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId, int & index ); + template < class SymbolType, class RankType > + static std::tree < std::ranked_symbol < unsigned, RankType > > repeats ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & node, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > & data, unsigned & minId ); + template < class SymbolType, class RankType > + static std::ranked_symbol < unsigned, RankType > repeatsPrefixRanked ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < unsigned, RankType > > & res, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > & data, unsigned & minId, int & index ); + template < class SymbolType, class RankType > + static std::ranked_symbol < unsigned, RankType > repeatsPrefixRankedBar ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < unsigned, RankType > > & res, std::map < std::pair < std::ranked_symbol <SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > & data, unsigned & minId, unsigned barId, int & index ); public: /** @@ -40,12 +51,111 @@ public: * Compute a same shaped tree with nodes containing unique subtree ids. * @return Tree of repeats */ - static tree::RankedTree < > repeats ( const tree::RankedTree < > & tree ); - static tree::PrefixRankedTree < > repeats ( const tree::PrefixRankedTree < > & tree ); - static tree::PrefixRankedBarTree < > repeats ( const tree::PrefixRankedBarTree < > & tree ); + template < class SymbolType, class RankType > + static tree::RankedTree < unsigned, RankType > repeats ( const tree::RankedTree < SymbolType, RankType > & tree ); + template < class SymbolType, class RankType > + static tree::PrefixRankedTree < unsigned, RankType > repeats ( const tree::PrefixRankedTree < SymbolType, RankType > & tree ); + template < class SymbolType, class RankType > + static tree::PrefixRankedBarTree < unsigned, RankType > repeats ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree ); }; +template < class SymbolType, class RankType > +std::tree < std::ranked_symbol < unsigned, RankType > > ExactSubtreeRepeatsNaive::repeats ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & node, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > & data, unsigned & minId ) { + std::vector < std::tree < std::ranked_symbol < unsigned, RankType > > > children; + std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > > childRepeatsKey ( node.getData ( ), std::vector < std::ranked_symbol < unsigned, RankType > > ( ) ); + + for ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & child : node.getChildren() ) { + children.push_back ( repeats ( child, data, minId ) ); + childRepeatsKey.second.push_back ( children.back ( ).getData ( ) ); + } + + unsigned & uniqueRepeatId = data[childRepeatsKey]; + + if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; + + return std::tree < std::ranked_symbol < unsigned, RankType > > ( std::ranked_symbol < unsigned, RankType > ( uniqueRepeatId, node.getData ( ).getRank ( ) ), std::move ( children ) ); +} + +template < class SymbolType, class RankType > +tree::RankedTree < unsigned, RankType > ExactSubtreeRepeatsNaive::repeats ( const tree::RankedTree < SymbolType, RankType > & tree ) { + unsigned minId = 1; + std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > data; + + return tree::RankedTree < unsigned, RankType > ( repeats ( tree.getContent ( ), data, minId ) ); +} + +template < class SymbolType, class RankType > +std::ranked_symbol < unsigned, RankType > ExactSubtreeRepeatsNaive::repeatsPrefixRanked ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < unsigned, RankType > > & res, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > & data, unsigned & minId, int & index ) { + int begin = index; + std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > > childRepeatsKey ( symbols[begin], std::vector < std::ranked_symbol < unsigned, RankType > > ( ) ); + + res.push_back ( std::ranked_symbol < unsigned, RankType > ( 0, symbols[begin].getRank ( ) ) ); + + index++; + + for ( unsigned i = 0; i < ( unsigned ) symbols[begin].getRank ( ); ++i ) + childRepeatsKey.second.push_back ( repeatsPrefixRanked ( symbols, res, data, minId, index ) ); + + unsigned & uniqueRepeatId = data[childRepeatsKey]; + + if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; + + res[begin] = std::ranked_symbol < unsigned, RankType > ( uniqueRepeatId, symbols[begin].getRank ( ) ); + return res[begin]; +} + +template < class SymbolType, class RankType > +tree::PrefixRankedTree < unsigned, RankType > ExactSubtreeRepeatsNaive::repeats ( const tree::PrefixRankedTree < SymbolType, RankType > & tree ) { + unsigned minId = 1; + int index = 0; + std::vector < std::ranked_symbol < unsigned, RankType > > res; + std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > data; + + repeatsPrefixRanked ( tree.getContent ( ), res, data, minId, index ); + return tree::PrefixRankedTree < unsigned, RankType > ( res ); +} + +template < class SymbolType, class RankType > +std::ranked_symbol < unsigned, RankType > ExactSubtreeRepeatsNaive::repeatsPrefixRankedBar ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < unsigned, RankType > > & res, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > & data, unsigned & minId, unsigned barId, int & index ) { + int begin = index; + std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > > childRepeatsKey ( symbols[begin], std::vector < std::ranked_symbol < unsigned, RankType > > ( ) ); + + res.push_back ( std::ranked_symbol < unsigned, RankType > ( 0, symbols[begin].getRank ( ) ) ); + + index++; + + for ( unsigned i = 0; i < ( unsigned ) symbols[begin].getRank ( ); ++i ) + childRepeatsKey.second.push_back ( repeatsPrefixRankedBar ( symbols, res, data, minId, barId, index ) ); + + unsigned & uniqueRepeatId = data[childRepeatsKey]; + + if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; + + res[begin] = std::ranked_symbol < unsigned, RankType > ( uniqueRepeatId, symbols[begin].getRank ( ) ); + res.push_back ( std::ranked_symbol < unsigned, RankType > ( 0, symbols[index].getRank ( ) ) ); + index++; + + return res[begin]; +} + +template < class SymbolType, class RankType > +tree::PrefixRankedBarTree < unsigned, RankType > ExactSubtreeRepeatsNaive::repeats ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree ) { + unsigned barId = 0; + unsigned minId = barId + 1; + int index = 0; + std::vector < std::ranked_symbol < unsigned, RankType > > res; + std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < unsigned, RankType > > >, unsigned > data; + + repeatsPrefixRankedBar ( tree.getContent ( ), res, data, minId, barId, index ); + + std::set < std::ranked_symbol < unsigned, RankType > > bars; + for ( const std::ranked_symbol < SymbolType, RankType > & bar : tree.getBars ( ) ) + bars.insert ( std::ranked_symbol < unsigned, RankType > ( barId, bar.getRank ( ) ) ); + + return tree::PrefixRankedBarTree < unsigned, RankType > ( bars, res ); +} + } /* namespace properties */ } /* namespace tree */ diff --git a/alib2data/src/alphabet/ranked_symbol.hpp b/alib2data/src/alphabet/ranked_symbol.hpp index 5ade3c9f38..5f8f2ebff7 100644 --- a/alib2data/src/alphabet/ranked_symbol.hpp +++ b/alib2data/src/alphabet/ranked_symbol.hpp @@ -121,7 +121,7 @@ int ranked_symbol < SymbolType, RankType >::compare(const ranked_symbol& other) template < class SymbolType, class RankType > ranked_symbol < SymbolType, RankType >::operator std::string () const { - return (std::string) m_symbol + "_" + (std::string) m_rank; + return std::to_string ( m_symbol ) + "_" + std::to_string ( m_rank ); } } /* namespace std */ -- GitLab