-
Jan Trávníček authoredJan Trávníček authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
ReversedBoyerMooreHorspool.h 10.82 KiB
/*
* ReversedBoyerMooreHorspool.h
*
* Created on: 5. 11. 2014
* Author: Jan Travnicek
*/
#ifndef _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_
#define _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_
#include <alib/set>
#include <alib/map>
#include <common/ranked_symbol.hpp>
#include <tree/properties/ReversedBadCharacterShiftTable.h>
#include <tree/properties/SubtreeJumpTable.h>
#include <tree/properties/ExactSubtreeRepeatsNaive.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedNonlinearPattern.h>
#include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
namespace arbology {
namespace exact {
/**
* Implementation of BMH for MI(E+\eps)-EVY course 2014
* To get rid of zeros in BCS table we ignore last haystack character
*/
class ReversedBoyerMooreHorspool {
public:
/**
* Search for pattern in linear string.
* @return set set of occurences
*/
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern );
};
template < class SymbolType, class RankType >
ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
// index to the subject
int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i >= 0 ) {
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset = offset + 1;
j = j + 1;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) {
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j = j + 2;
} else {
break;
}
}
// match was found
if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heuristics
i -= bcs[subject.getContent ( )[i]];
}
return occ;
}
template < class SymbolType, class RankType >
ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting;
tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );
// index to the subject
int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i >= 0 ) {
// clear the current state of variable to subtree repeat
variablesSetting.clear();
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
// match of symbol
offset = offset + 1;
j = j + 1;
} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) {
// check nonlinear variable
if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] );
if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second )
break;
variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) );
}
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
j = j + 2;
} else {
break;
}
}
// match was found
if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heuristics
i -= bcs[subject.getContent ( )[i]];
}
return occ;
}
template < class SymbolType, class RankType >
ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
}
template < class SymbolType, class RankType >
ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
// index to the subject
int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i >= 0 ) {
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
// match of symbol
offset = offset + 1;
else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
else
break;
j = j + 1;
}
// match was found
if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heristics
i -= bcs[subject.getContent ( )[i]];
}
return occ;
}
template < class SymbolType, class RankType >
ext::set < unsigned > ReversedBoyerMooreHorspool::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) {
ext::set < unsigned > occ;
ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting;
tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );
// index to the subject
int i = ( int ) subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1;
// main loop of the algorithm over all possible indexes where the pattern can start
while ( i >= 0 ) {
// clear the current state of variable to subtree repeat
variablesSetting.clear();
// index to the pattern
unsigned j = 0;
// offset to the subject
unsigned offset = i;
while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
// match of symbol
offset = offset + 1;
else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
// check nonlinear variable
if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[ j ] ) ) {
auto setting = variablesSetting.find ( pattern.getContent ( )[ j ] );
if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[ offset ].getSymbol ( ) != setting->second )
break;
variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[ j ], repeats.getContent( )[ offset ].getSymbol ( ) ) );
}
// match of variable with subtree
offset = subjectSubtreeJumpTable[offset];
} else
break;
j = j + 1;
}
// match was found
if ( j == pattern.getContent ( ).size ( ) ) occ.insert ( i );
// shift heristics
i -= bcs[subject.getContent ( )[i]];
}
return occ;
}
} /* namespace exact */
} /* namespace arbology */
#endif /* _ARBOLOGY_REVERSED_BOYER_MOORE_HORSPOOL_H_ */