From 7b945968e02a7156e7d01af3896a473e6787e9c9 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Tue, 6 Dec 2016 10:24:33 +0100 Subject: [PATCH] template some arbology algorithms --- .../src/arbology/exact/BoyerMooreHorspool.cpp | 114 --------- .../src/arbology/exact/BoyerMooreHorspool.h | 125 ++++++++- ...neUsingBadCharacterShiftAndBorderArray.cpp | 106 -------- ...ZoneUsingBadCharacterShiftAndBorderArray.h | 128 +++++++++- .../exact/ReversedBoyerMooreHorspool.cpp | 219 ---------------- .../exact/ReversedBoyerMooreHorspool.h | 239 +++++++++++++++++- .../properties/ExactSubtreeRepeatsNaive.cpp | 93 +++++++ .../properties/ExactSubtreeRepeatsNaive.h | 118 +-------- 8 files changed, 576 insertions(+), 566 deletions(-) diff --git a/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp b/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp index 2df5634ce4..d31d1e892b 100644 --- a/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp +++ b/alib2algo/src/arbology/exact/BoyerMooreHorspool.cpp @@ -6,17 +6,6 @@ */ #include "BoyerMooreHorspool.h" -#include <tree/properties/BadCharacterShiftTable.h> -#include <tree/properties/SubtreeJumpTable.h> -#include <tree/properties/ExactSubtreeRepeatsNaive.h> - -#include <tree/Tree.h> -#include <tree/ranked/PrefixRankedBarTree.h> -#include <tree/ranked/PrefixRankedBarPattern.h> -#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> -#include <alphabet/RankedSymbol.h> - -#include <map> namespace arbology { @@ -26,111 +15,8 @@ std::set < unsigned > BoyerMooreHorspool::match ( const tree::Tree & subject, co return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarTree < > & pattern ) { - return match ( subject, tree::PrefixRankedBarPattern < > ( pattern ) ); -} - auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarTree = BoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree < >, tree::PrefixRankedBarTree < > > ( BoyerMooreHorspool::match ); - -std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarPattern < > & pattern ) { - std::set < unsigned > occ; - std::map < std::ranked_symbol < >, size_t > bcs = tree::properties::BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern - std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - - // index to the subject - unsigned i = 0; - - // main loop of the algorithm over all possible indexes where the pattern can start - while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { - - // index to the pattern - int j = pattern.getContent ( ).size ( ) - 1; - - // offset to the subject - int offset = i + j; - - while ( ( j >= 0 ) && ( offset >= 0 ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { - // match of symbol - offset = offset - 1; - j = j - 1; - } else if ( ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) && ( pattern.getBars ( ).count ( subject.getContent ( )[offset] )) ) { - // match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - j = j - 2; - } else { - break; - } - } - - // match was found - if ( j == -1 ) occ.insert ( offset ); - - // shift heuristics - i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( ) - 1]]; - } - - return occ; -} - auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = BoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree < >, tree::PrefixRankedBarPattern < > > ( BoyerMooreHorspool::match ); - -std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarNonlinearPattern < > & pattern ) { - std::set < unsigned > occ; - std::map < std::ranked_symbol < >, size_t > bcs = tree::properties::BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern - std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - std::map < std::ranked_symbol < >, alphabet::Symbol > variablesSetting; - - tree::PrefixRankedBarTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); - - // index to the subject - unsigned i = 0; - - // main loop of the algorithm over all possible indexes where the pattern can start - while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { - // clear the current state of variable to subtree repeat - variablesSetting.clear(); - - // index to the pattern - int j = pattern.getContent ( ).size ( ) - 1; - - // offset to the subject - int offset = i + j; - - while ( ( j >= 0 ) && ( offset >= 0 ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { - // match of symbol - offset = offset - 1; - j = j - 1; - } else if ( ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) && ( pattern.getBars ( ).count ( subject.getContent ( )[offset] )) ) { - // else match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - j = j - 2; - - // check nonlinear variable - if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j + 1 ] ) ) { - auto setting = variablesSetting.find ( pattern.getContent ( )[ j + 1 ] ); - - if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset + 1 ].getSymbol ( ) != setting->second ) - break; - - variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j + 1 ], repeats.getContent( )[ offset + 1 ].getSymbol ( ) ) ); - } - } else { - break; - } - } - - // match was found - if ( j == -1 ) occ.insert ( offset ); - - // shift heuristics - i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( ) - 1]]; - } - - return occ; -} - auto BoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarNonlinearPattern = BoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree < >, tree::PrefixRankedBarNonlinearPattern < > > ( BoyerMooreHorspool::match ); } /* namespace exact */ diff --git a/alib2algo/src/arbology/exact/BoyerMooreHorspool.h b/alib2algo/src/arbology/exact/BoyerMooreHorspool.h index be6136aead..67ad272db2 100644 --- a/alib2algo/src/arbology/exact/BoyerMooreHorspool.h +++ b/alib2algo/src/arbology/exact/BoyerMooreHorspool.h @@ -9,9 +9,21 @@ #define _ARBOLOGY_BOYER_MOORE_HORSPOOL_H_ #include <set> +#include <map> + #include <core/multipleDispatch.hpp> #include <tree/TreeFeatures.h> +#include <tree/properties/BadCharacterShiftTable.h> +#include <tree/properties/SubtreeJumpTable.h> +#include <tree/properties/ExactSubtreeRepeatsNaive.h> + +#include <tree/Tree.h> +#include <tree/ranked/PrefixRankedBarTree.h> +#include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> +#include <alphabet/RankedSymbol.h> + namespace arbology { namespace exact { @@ -28,12 +40,119 @@ public: */ static std::set < unsigned > match ( const tree::Tree & subject, const tree::Tree & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarTree < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarPattern < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarNonlinearPattern < > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ); }; +template < class SymbolType, class RankType > +std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) { + return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) ); +} + +template < class SymbolType, class RankType > +std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { + std::set < unsigned > occ; + std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); + + // index to the subject + unsigned i = 0; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { + + // index to the pattern + int j = pattern.getContent ( ).size ( ) - 1; + + // offset to the subject + int offset = i + j; + + while ( ( j >= 0 ) && ( offset >= 0 ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { + // match of symbol + offset = offset - 1; + j = j - 1; + } else if ( ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) && ( pattern.getBars ( ).count ( subject.getContent ( )[offset] )) ) { + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + j = j - 2; + } else { + break; + } + } + + // match was found + if ( j == -1 ) occ.insert ( offset ); + + // shift heuristics + i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( ) - 1]]; + } + + return occ; +} + +template < class SymbolType, class RankType > +std::set < unsigned > BoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { + std::set < unsigned > occ; + std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::BadCharacterShiftTable::bcs ( pattern ); //NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); + std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; + + tree::PrefixRankedBarTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + + // index to the subject + unsigned i = 0; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) { + // clear the current state of variable to subtree repeat + variablesSetting.clear(); + + // index to the pattern + int j = pattern.getContent ( ).size ( ) - 1; + + // offset to the subject + int offset = i + j; + + while ( ( j >= 0 ) && ( offset >= 0 ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { + // match of symbol + offset = offset - 1; + j = j - 1; + } else if ( ( pattern.getContent ( )[j] == pattern.getVariablesBar ( ) ) && ( pattern.getBars ( ).count ( subject.getContent ( )[offset] )) ) { + // else match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + j = j - 2; + + // check nonlinear variable + if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j + 1 ] ) ) { + auto setting = variablesSetting.find ( pattern.getContent ( )[ j + 1 ] ); + + if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset + 1 ].getSymbol ( ) != setting->second ) + break; + + variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j + 1 ], repeats.getContent( )[ offset + 1 ].getSymbol ( ) ) ); + } + } else { + break; + } + } + + // match was found + if ( j == -1 ) occ.insert ( offset ); + + // shift heuristics + i += bcs[subject.getContent ( )[i + pattern.getContent ( ).size ( ) - 1]]; + } + + return occ; +} + } /* namespace exact */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.cpp b/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.cpp index 436381fbdf..8ef1994b84 100644 --- a/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.cpp +++ b/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.cpp @@ -6,18 +6,6 @@ */ #include "DeadZoneUsingBadCharacterShiftAndBorderArray.h" -#include <tree/properties/ReversedBadCharacterShiftTable.h> -#include <tree/properties/BorderArrayNaive.h> -#include <tree/properties/SubtreeJumpTable.h> - -#include <tree/Tree.h> -#include <tree/ranked/PrefixRankedBarTree.h> -#include <tree/ranked/PrefixRankedBarPattern.h> -#include <tree/ranked/PrefixRankedTree.h> -#include <tree/ranked/PrefixRankedPattern.h> -#include <alphabet/RankedSymbol.h> - -#include <map> namespace arbology { @@ -27,103 +15,9 @@ std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( cons return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarTree < > & pattern ) { - return match ( subject, tree::PrefixRankedBarPattern < > ( pattern ) ); -} - auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedBarTreePrefixRankedBarTree = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree < >, tree::PrefixRankedBarTree < > > ( DeadZoneUsingBadCharacterShiftAndBorderArray::match ); - -std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarPattern < > & pattern ) { - std::set < unsigned > occ; - std::map < std::ranked_symbol < >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - std::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern ); - std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - - match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); - return occ; -} - -void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarPattern < > & pattern, std::vector < size_t > & fba, std::map < std::ranked_symbol < >, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ) { - if ( low >= high ) return; - - int i = ( low + high ) / 2; - - // index to the pattern - unsigned j = 0; - - // offset to the subject - unsigned offset = i; - - while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { - // match of symbol - offset++; - j++; - } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { - // match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - j += 2; - } else { - break; - } - } - - // match was found - if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); - - match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 ); - match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high ); -} - auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedBarTreePrefixRankedBarPattern = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree < >, tree::PrefixRankedBarPattern < > > ( DeadZoneUsingBadCharacterShiftAndBorderArray::match ); - -std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedTree < > & pattern ) { - return match ( subject, tree::PrefixRankedPattern < > ( pattern ) ); -} - auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedTreePrefixRankedTree = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree < >, tree::PrefixRankedTree < > > ( DeadZoneUsingBadCharacterShiftAndBorderArray::match ); - -std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedPattern < > & pattern ) { - std::set < unsigned > occ; - std::map < std::ranked_symbol < >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - std::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern ); - std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - - match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); - return occ; -} - -void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedPattern < > & pattern, std::vector < size_t > & fba, std::map < std::ranked_symbol < >, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ) { - if ( low >= high ) return; - - int i = ( low + high ) / 2; - - // index to the pattern - unsigned j = 0; - - // offset to the subject - unsigned offset = i; - - while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) - // match of symbol - offset++; - else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) - // match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - else - break; - - j++; - } - - // match was found - if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); - - match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 ); - match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high ); -} - auto DeadZoneUsingBadCharacterShiftAndBorderArrayPrefixRankedTreePrefixRankedPattern = DeadZoneUsingBadCharacterShiftAndBorderArray::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree < >, tree::PrefixRankedPattern < > > ( DeadZoneUsingBadCharacterShiftAndBorderArray::match ); } /* namespace exact */ diff --git a/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.h b/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.h index 16c2498691..51f1a64787 100644 --- a/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.h +++ b/alib2algo/src/arbology/exact/DeadZoneUsingBadCharacterShiftAndBorderArray.h @@ -14,6 +14,19 @@ #include <tree/TreeFeatures.h> #include <alphabet/RankedSymbol.h> +#include <tree/properties/ReversedBadCharacterShiftTable.h> +#include <tree/properties/BorderArrayNaive.h> +#include <tree/properties/SubtreeJumpTable.h> + +#include <tree/Tree.h> +#include <tree/ranked/PrefixRankedBarTree.h> +#include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedPattern.h> +#include <alphabet/RankedSymbol.h> + +#include <map> + namespace arbology { namespace exact { @@ -29,16 +42,119 @@ public: */ static std::set < unsigned > match ( const tree::Tree & subject, const tree::Tree & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarTree < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarPattern < > & pattern ); - static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarPattern < > & pattern, std::vector < size_t > & fba, std::map < std::ranked_symbol < >, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, std::vector < size_t > & fba, std::map < std::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ); - static std::set < unsigned > match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedTree < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedPattern < > & pattern ); - static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedPattern < > & pattern, std::vector < size_t > & fba, std::map < std::ranked_symbol < >, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static void match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, std::vector < size_t > & fba, std::map < std::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ); }; +template < class SymbolType, class RankType > +std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) { + return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) ); +} + +template < class SymbolType, class RankType > +std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { + std::set < unsigned > occ; + std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern ); + std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); + + match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); + return occ; +} + +template < class SymbolType, class RankType > +void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, std::vector < size_t > & fba, std::map < std::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ) { + if ( low >= high ) return; + + int i = ( low + high ) / 2; + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { + // match of symbol + offset++; + j++; + } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + j += 2; + } else { + break; + } + } + + // match was found + if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); + + match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 ); + match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high ); +} + +template < class SymbolType, class RankType > +std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) { + return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) ); +} + +template < class SymbolType, class RankType > +std::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) { + std::set < unsigned > occ; + std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern ); + std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); + + match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 ); + return occ; +} + +template < class SymbolType, class RankType > +void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( std::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, std::vector < size_t > & fba, std::map < std::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, std::vector < int > & subjectSubtreeJumpTable, int low, int high ) { + if ( low >= high ) return; + + int i = ( low + high ) / 2; + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) + // match of symbol + offset++; + else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + else + break; + + j++; + } + + // match was found + if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i ); + + match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 ); + match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high ); +} + } /* namespace exact */ } /* namespace arbology */ diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp index 135aaede2d..8535cf8561 100644 --- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp +++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.cpp @@ -6,20 +6,6 @@ */ #include "ReversedBoyerMooreHorspool.h" -#include <tree/properties/ReversedBadCharacterShiftTable.h> -#include <tree/properties/SubtreeJumpTable.h> -#include <tree/properties/ExactSubtreeRepeatsNaive.h> - -#include <tree/Tree.h> -#include <tree/ranked/PrefixRankedTree.h> -#include <tree/ranked/PrefixRankedBarTree.h> -#include <tree/ranked/PrefixRankedPattern.h> -#include <tree/ranked/PrefixRankedBarPattern.h> -#include <tree/ranked/PrefixRankedNonlinearPattern.h> -#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> -#include <alphabet/RankedSymbol.h> - -#include <map> namespace arbology { @@ -29,216 +15,11 @@ std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::Tree & sub return dispatch ( subject.getData ( ), pattern.getData ( ) ); } -std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarTree < > & pattern ) { - return match ( subject, tree::PrefixRankedBarPattern < > ( pattern ) ); -} - auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarTree = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree < >, tree::PrefixRankedBarTree < > > ( ReversedBoyerMooreHorspool::match ); - -std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarPattern < > & pattern ) { - std::set < unsigned > occ; - std::map < std::ranked_symbol < >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - - // index to the subject - int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; - - // main loop of the algorithm over all possible indexes where the pattern can start - while ( i >= 0 ) { - - // index to the pattern - unsigned j = 0; - - // offset to the subject - unsigned offset = i; - - while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { - // match of symbol - offset = offset + 1; - j = j + 1; - } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { - // match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - j = j + 2; - } else { - break; - } - } - - // match was found - if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); - - // shift heuristics - i -= bcs[subject.getContent ( )[i]]; - } - - return occ; -} - auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree < >, tree::PrefixRankedBarPattern < > > ( ReversedBoyerMooreHorspool::match ); - -std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarNonlinearPattern < > & pattern ) { - std::set < unsigned > occ; - std::map < std::ranked_symbol < >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - std::map < std::ranked_symbol < >, alphabet::Symbol > variablesSetting; - - tree::PrefixRankedBarTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); - - // index to the subject - int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; - - // main loop of the algorithm over all possible indexes where the pattern can start - while ( i >= 0 ) { - // clear the current state of variable to subtree repeat - variablesSetting.clear(); - - // index to the pattern - unsigned j = 0; - - // offset to the subject - unsigned offset = i; - - while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { - // match of symbol - offset = offset + 1; - j = j + 1; - } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { - // check nonlinear variable - if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { - auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); - - if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) - break; - - variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); - } - - // match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - j = j + 2; - } else { - break; - } - } - - // match was found - if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); - - // shift heuristics - i -= bcs[subject.getContent ( )[i]]; - } - - return occ; -} - auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedBarNonlinearPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree < >, tree::PrefixRankedBarNonlinearPattern < > > ( ReversedBoyerMooreHorspool::match ); - -std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedTree < > & pattern ) { - return match ( subject, tree::PrefixRankedPattern < > ( pattern ) ); -} - auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedTree = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree < >, tree::PrefixRankedTree < > > ( ReversedBoyerMooreHorspool::match ); - -std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedPattern < > & pattern ) { - std::set < unsigned > occ; - std::map < std::ranked_symbol < >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - - // index to the subject - int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; - - // main loop of the algorithm over all possible indexes where the pattern can start - while ( i >= 0 ) { - - // index to the pattern - unsigned j = 0; - - // offset to the subject - unsigned offset = i; - - while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) - // match of symbol - offset = offset + 1; - else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) - // match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - else - break; - - j = j + 1; - } - - // match was found - if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); - - // shift heristics - i -= bcs[subject.getContent ( )[i]]; - } - - return occ; -} - auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree < >, tree::PrefixRankedPattern < > > ( ReversedBoyerMooreHorspool::match ); - -std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedNonlinearPattern < > & pattern ) { - std::set < unsigned > occ; - std::map < std::ranked_symbol < >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern - std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); - std::map < std::ranked_symbol < >, alphabet::Symbol > variablesSetting; - - tree::PrefixRankedTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); - - // index to the subject - int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; - - // main loop of the algorithm over all possible indexes where the pattern can start - while ( i >= 0 ) { - // clear the current state of variable to subtree repeat - variablesSetting.clear(); - - // index to the pattern - unsigned j = 0; - - // offset to the subject - unsigned offset = i; - - while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { - if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) - // match of symbol - offset = offset + 1; - else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { - // check nonlinear variable - if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { - auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); - - if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) - break; - - variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); - } - - // match of variable with subtree - offset = subjectSubtreeJumpTable[offset]; - } else - break; - - j = j + 1; - } - - // match was found - if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); - - // shift heristics - i -= bcs[subject.getContent ( )[i]]; - } - - return occ; -} - auto ReversedBoyerMooreHorspoolPrefixRankedBarTreePrefixRankedNonlinearPattern = ReversedBoyerMooreHorspool::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree < >, tree::PrefixRankedNonlinearPattern < > > ( ReversedBoyerMooreHorspool::match ); } /* namespace exact */ diff --git a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h index 2d2f0d3c6e..35c3492793 100644 --- a/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h +++ b/alib2algo/src/arbology/exact/ReversedBoyerMooreHorspool.h @@ -12,6 +12,21 @@ #include <core/multipleDispatch.hpp> #include <tree/TreeFeatures.h> +#include <tree/properties/ReversedBadCharacterShiftTable.h> +#include <tree/properties/SubtreeJumpTable.h> +#include <tree/properties/ExactSubtreeRepeatsNaive.h> + +#include <tree/Tree.h> +#include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedBarTree.h> +#include <tree/ranked/PrefixRankedPattern.h> +#include <tree/ranked/PrefixRankedBarPattern.h> +#include <tree/ranked/PrefixRankedNonlinearPattern.h> +#include <tree/ranked/PrefixRankedBarNonlinearPattern.h> +#include <alphabet/RankedSymbol.h> + +#include <map> + namespace arbology { namespace exact { @@ -28,15 +43,227 @@ public: */ static std::set < unsigned > match ( const tree::Tree & subject, const tree::Tree & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarTree < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarPattern < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedBarTree < > & subject, const tree::PrefixRankedBarNonlinearPattern < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedTree < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedPattern < > & pattern ); - static std::set < unsigned > match ( const tree::PrefixRankedTree < > & subject, const tree::PrefixRankedNonlinearPattern < > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ); + template < class SymbolType, class RankType > + static std::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ); }; +template < class SymbolType, class RankType > +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) { + return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) ); +} + +template < class SymbolType, class RankType > +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) { + std::set < unsigned > occ; + std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); + + // index to the subject + int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i >= 0 ) { + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { + // match of symbol + offset = offset + 1; + j = j + 1; + } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + j = j + 2; + } else { + break; + } + } + + // match was found + if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); + + // shift heuristics + i -= bcs[subject.getContent ( )[i]]; + } + + return occ; +} + +template < class SymbolType, class RankType > +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) { + std::set < unsigned > occ; + std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); + std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; + + tree::PrefixRankedBarTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + + // index to the subject + int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i >= 0 ) { + // clear the current state of variable to subtree repeat + variablesSetting.clear(); + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) { + // match of symbol + offset = offset + 1; + j = j + 1; + } else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) { + // check nonlinear variable + if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { + auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); + + if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) + break; + + variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); + } + + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + j = j + 2; + } else { + break; + } + } + + // match was found + if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); + + // shift heuristics + i -= bcs[subject.getContent ( )[i]]; + } + + return occ; +} + +template < class SymbolType, class RankType > +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) { + return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) ); +} + +template < class SymbolType, class RankType > +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) { + std::set < unsigned > occ; + std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); + + // index to the subject + int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i >= 0 ) { + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) + // match of symbol + offset = offset + 1; + else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + else + break; + + j = j + 1; + } + + // match was found + if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); + + // shift heristics + i -= bcs[subject.getContent ( )[i]]; + } + + return occ; +} + +template < class SymbolType, class RankType > +std::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) { + std::set < unsigned > occ; + std::map < std::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern + std::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject ); + std::map < std::ranked_symbol < SymbolType, RankType >, SymbolType > variablesSetting; + + tree::PrefixRankedTree < > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject ); + + // index to the subject + int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1; + + // main loop of the algorithm over all possible indexes where the pattern can start + while ( i >= 0 ) { + // clear the current state of variable to subtree repeat + variablesSetting.clear(); + + // index to the pattern + unsigned j = 0; + + // offset to the subject + unsigned offset = i; + + while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) { + if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) + // match of symbol + offset = offset + 1; + else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { + // check nonlinear variable + if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) { + auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] ); + + if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second ) + break; + + variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) ); + } + + // match of variable with subtree + offset = subjectSubtreeJumpTable[offset]; + } else + break; + + j = j + 1; + } + + // match was found + if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i ); + + // shift heristics + i -= bcs[subject.getContent ( )[i]]; + } + + return occ; +} + } /* namespace exact */ } /* namespace arbology */ diff --git a/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.cpp b/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.cpp index 51ff2af299..9c14e08469 100644 --- a/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.cpp +++ b/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.cpp @@ -6,6 +6,13 @@ */ #include "ExactSubtreeRepeatsNaive.h" +#include "SubtreeJumpTable.h" + +#include <tree/ranked/RankedTree.h> +#include <tree/ranked/PrefixRankedTree.h> +#include <tree/ranked/PrefixRankedBarTree.h> +#include <tree/Tree.h> +#include <global/GlobalData.h> namespace tree { @@ -15,8 +22,94 @@ tree::Tree ExactSubtreeRepeatsNaive::repeats ( const tree::Tree & tree ) { return dispatch ( tree.getData ( ) ); } +std::tree < std::ranked_symbol < > > ExactSubtreeRepeatsNaive::repeats ( const std::tree < std::ranked_symbol < > > & node, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId ) { + std::vector < std::tree < std::ranked_symbol < > > > children; + std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > > childRepeatsKey ( node.getData ( ), std::vector < std::ranked_symbol < > > ( ) ); + + for ( const std::tree < std::ranked_symbol < > > & child : node.getChildren() ) { + children.push_back ( repeats ( child, data, minId ) ); + childRepeatsKey.second.push_back ( children.back ( ).getData ( ) ); + } + + int & uniqueRepeatId = data[childRepeatsKey]; + + if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; + + return std::tree < std::ranked_symbol < > > ( std::ranked_symbol < > ( alphabet::symbolFrom ( uniqueRepeatId ), node.getData ( ).getRank ( ) ), std::move ( children ) ); +} + +tree::RankedTree < > ExactSubtreeRepeatsNaive::repeats ( const tree::RankedTree < > & tree ) { + int minId = 1; + std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > data; + + return tree::RankedTree < > ( repeats ( tree.getContent ( ), data, minId ) ); +} + auto ExactRepeatsNaiveRankedTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::RankedTree < >, tree::RankedTree < > > ( ExactSubtreeRepeatsNaive::repeats ); + +std::ranked_symbol < > ExactSubtreeRepeatsNaive::repeatsPrefixRanked ( const std::vector < std::ranked_symbol < > > & symbols, std::vector < std::ranked_symbol < > > & res, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId, int & index ) { + int begin = index; + std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > > childRepeatsKey ( symbols[begin], std::vector < std::ranked_symbol < > > ( ) ); + + res.push_back ( std::ranked_symbol < > ( alphabet::symbolFrom ( 0 ), symbols[begin].getRank ( ) ) ); + + index++; + + for ( unsigned i = 0; i < ( unsigned ) symbols[begin].getRank ( ); ++i ) + childRepeatsKey.second.push_back ( repeatsPrefixRanked ( symbols, res, data, minId, index ) ); + + int & uniqueRepeatId = data[childRepeatsKey]; + + if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; + + res[begin] = std::ranked_symbol < > ( alphabet::symbolFrom ( uniqueRepeatId ), symbols[begin].getRank ( ) ); + return res[begin]; +} + +tree::PrefixRankedTree < > ExactSubtreeRepeatsNaive::repeats ( const tree::PrefixRankedTree < > & tree ) { + int minId = 1; + int index = 0; + std::vector < std::ranked_symbol < > > res; + std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > data; + + repeatsPrefixRanked ( tree.getContent ( ), res, data, minId, index ); + return tree::PrefixRankedTree < > ( res ); +} + auto ExactRepeatsNaivePrefixRankedTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::PrefixRankedTree < >, tree::PrefixRankedTree < > > ( ExactSubtreeRepeatsNaive::repeats ); + +std::ranked_symbol < > ExactSubtreeRepeatsNaive::repeatsPrefixRankedBar ( const std::vector < std::ranked_symbol < > > & symbols, std::vector < std::ranked_symbol < > > & res, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId, int & index ) { + int begin = index; + std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > > childRepeatsKey ( symbols[begin], std::vector < std::ranked_symbol < > > ( ) ); + + res.push_back ( std::ranked_symbol < > ( alphabet::symbolFrom ( 0 ), symbols[begin].getRank ( ) ) ); + + index++; + + for ( unsigned i = 0; i < ( unsigned ) symbols[begin].getRank ( ); ++i ) + childRepeatsKey.second.push_back ( repeatsPrefixRankedBar ( symbols, res, data, minId, index ) ); + + int & uniqueRepeatId = data[childRepeatsKey]; + + if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; + + res[begin] = std::ranked_symbol < > ( alphabet::symbolFrom ( uniqueRepeatId ), symbols[begin].getRank ( ) ); + res.push_back ( symbols[index] ); + index++; + + return res[begin]; +} + +tree::PrefixRankedBarTree < > ExactSubtreeRepeatsNaive::repeats ( const tree::PrefixRankedBarTree < > & tree ) { + int minId = 1; + int index = 0; + std::vector < std::ranked_symbol < > > res; + std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > data; + + repeatsPrefixRankedBar ( tree.getContent ( ), res, data, minId, index ); + return tree::PrefixRankedBarTree < > ( tree.getBars ( ), res ); +} + auto ExactRepeatsNaivePrefixRankedBarTree = ExactSubtreeRepeatsNaive::RegistratorWrapper < tree::PrefixRankedBarTree < >, tree::PrefixRankedBarTree < > > ( ExactSubtreeRepeatsNaive::repeats ); } /* namespace properties */ diff --git a/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.h b/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.h index 3f81f6e3da..2378926916 100644 --- a/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.h +++ b/alib2algo/src/tree/properties/ExactSubtreeRepeatsNaive.h @@ -17,14 +17,6 @@ #include <tree> #include <alphabet/RankedSymbol.h> -#include "SubtreeJumpTable.h" - -#include <tree/ranked/RankedTree.h> -#include <tree/ranked/PrefixRankedTree.h> -#include <tree/ranked/PrefixRankedBarTree.h> -#include <tree/Tree.h> -#include <global/GlobalData.h> - namespace tree { namespace properties { @@ -33,14 +25,9 @@ namespace properties { * Simple computation of subtree repeats */ class ExactSubtreeRepeatsNaive : public std::SingleDispatch < ExactSubtreeRepeatsNaive, tree::Tree, const tree::TreeBase & > { - template < class SymbolType, class RankType > - static std::tree < std::ranked_symbol < SymbolType, RankType > > repeats ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & node, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > & data, int & minId ); - - template < class SymbolType, class RankType > - static std::ranked_symbol < SymbolType, RankType > repeatsPrefixRanked ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < SymbolType, RankType > > & res, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > & data, int & minId, int & index ); - - template < class SymbolType, class RankType > - static std::ranked_symbol < SymbolType, RankType > repeatsPrefixRankedBar ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < SymbolType, RankType > > & res, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > & data, int & minId, int & index ); + static std::tree < std::ranked_symbol < > > repeats ( const std::tree < std::ranked_symbol < > > & node, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId ); + static std::ranked_symbol < > repeatsPrefixRanked ( const std::vector < std::ranked_symbol < > > & symbols, std::vector < std::ranked_symbol < > > & res, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId, int & index ); + static std::ranked_symbol < > repeatsPrefixRankedBar ( const std::vector < std::ranked_symbol < > > & symbols, std::vector < std::ranked_symbol < > > & res, std::map < std::pair < std::ranked_symbol < >, std::vector < std::ranked_symbol < > > >, int > & data, int & minId, int & index ); public: /** @@ -53,105 +40,12 @@ public: * Compute a same shaped tree with nodes containing unique subtree ids. * @return Tree of repeats */ - template < class SymbolType, class RankType > - static tree::RankedTree < SymbolType, RankType > repeats ( const tree::RankedTree < SymbolType, RankType > & tree ); - template < class SymbolType, class RankType > - static tree::PrefixRankedTree < SymbolType, RankType > repeats ( const tree::PrefixRankedTree < SymbolType, RankType > & tree ); - template < class SymbolType, class RankType > - static tree::PrefixRankedBarTree < SymbolType, RankType > repeats ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree ); + static tree::RankedTree < > repeats ( const tree::RankedTree < > & tree ); + static tree::PrefixRankedTree < > repeats ( const tree::PrefixRankedTree < > & tree ); + static tree::PrefixRankedBarTree < > repeats ( const tree::PrefixRankedBarTree < > & tree ); }; -template < class SymbolType, class RankType > -std::tree < std::ranked_symbol < SymbolType, RankType > > ExactSubtreeRepeatsNaive::repeats ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & node, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > & data, int & minId ) { - std::vector < std::tree < std::ranked_symbol < SymbolType, RankType > > > children; - std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > > childRepeatsKey ( node.getData ( ), std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); - - for ( const std::tree < std::ranked_symbol < SymbolType, RankType > > & child : node.getChildren() ) { - children.push_back ( repeats ( child, data, minId ) ); - childRepeatsKey.second.push_back ( children.back ( ).getData ( ) ); - } - - int & uniqueRepeatId = data[childRepeatsKey]; - - if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; - - return std::tree < std::ranked_symbol < SymbolType, RankType > > ( std::ranked_symbol < SymbolType, RankType > ( alphabet::symbolFrom ( uniqueRepeatId ), node.getData ( ).getRank ( ) ), std::move ( children ) ); -} - -template < class SymbolType, class RankType > -tree::RankedTree < SymbolType, RankType > ExactSubtreeRepeatsNaive::repeats ( const tree::RankedTree < SymbolType, RankType > & tree ) { - int minId = 1; - std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > data; - - return tree::RankedTree < SymbolType, RankType > ( repeats ( tree.getContent ( ), data, minId ) ); -} - -template < class SymbolType, class RankType > -std::ranked_symbol < SymbolType, RankType > ExactSubtreeRepeatsNaive::repeatsPrefixRanked ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < SymbolType, RankType > > & res, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > & data, int & minId, int & index ) { - int begin = index; - std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > > childRepeatsKey ( symbols[begin], std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); - - res.push_back ( std::ranked_symbol < SymbolType, RankType > ( alphabet::symbolFrom ( 0 ), symbols[begin].getRank ( ) ) ); - - index++; - - for ( unsigned i = 0; i < ( unsigned ) symbols[begin].getRank ( ); ++i ) - childRepeatsKey.second.push_back ( repeatsPrefixRanked ( symbols, res, data, minId, index ) ); - - int & uniqueRepeatId = data[childRepeatsKey]; - - if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; - - res[begin] = std::ranked_symbol < SymbolType, RankType > ( alphabet::symbolFrom ( uniqueRepeatId ), symbols[begin].getRank ( ) ); - return res[begin]; -} - -template < class SymbolType, class RankType > -tree::PrefixRankedTree < SymbolType, RankType > ExactSubtreeRepeatsNaive::repeats ( const tree::PrefixRankedTree < SymbolType, RankType > & tree ) { - int minId = 1; - int index = 0; - std::vector < std::ranked_symbol < SymbolType, RankType > > res; - std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > data; - - repeatsPrefixRanked ( tree.getContent ( ), res, data, minId, index ); - return tree::PrefixRankedTree < SymbolType, RankType > ( res ); -} - -template < class SymbolType, class RankType > -std::ranked_symbol < SymbolType, RankType > ExactSubtreeRepeatsNaive::repeatsPrefixRankedBar ( const std::vector < std::ranked_symbol < SymbolType, RankType > > & symbols, std::vector < std::ranked_symbol < SymbolType, RankType > > & res, std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > & data, int & minId, int & index ) { - int begin = index; - std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > > childRepeatsKey ( symbols[begin], std::vector < std::ranked_symbol < SymbolType, RankType > > ( ) ); - - res.push_back ( std::ranked_symbol < SymbolType, RankType > ( alphabet::symbolFrom ( 0 ), symbols[begin].getRank ( ) ) ); - - index++; - - for ( unsigned i = 0; i < ( unsigned ) symbols[begin].getRank ( ); ++i ) - childRepeatsKey.second.push_back ( repeatsPrefixRankedBar ( symbols, res, data, minId, index ) ); - - int & uniqueRepeatId = data[childRepeatsKey]; - - if ( uniqueRepeatId == 0 ) uniqueRepeatId = minId++; - - res[begin] = std::ranked_symbol < SymbolType, RankType > ( alphabet::symbolFrom ( uniqueRepeatId ), symbols[begin].getRank ( ) ); - res.push_back ( symbols[index] ); - index++; - - return res[begin]; -} - -template < class SymbolType, class RankType > -tree::PrefixRankedBarTree < SymbolType, RankType > ExactSubtreeRepeatsNaive::repeats ( const tree::PrefixRankedBarTree < SymbolType, RankType > & tree ) { - int minId = 1; - int index = 0; - std::vector < std::ranked_symbol < SymbolType, RankType > > res; - std::map < std::pair < std::ranked_symbol < SymbolType, RankType >, std::vector < std::ranked_symbol < SymbolType, RankType > > >, int > data; - - repeatsPrefixRankedBar ( tree.getContent ( ), res, data, minId, index ); - return tree::PrefixRankedBarTree < SymbolType, RankType > ( tree.getBars ( ), res ); -} - } /* namespace properties */ } /* namespace tree */ -- GitLab