Skip to content
Snippets Groups Projects
DeadZoneUsingBadCharacterShiftAndBorderArray.h 7.78 KiB
Newer Older
/*
 * DeadZoneUsingBadCharacterShiftAndBorderArray.h
 *
 *  Created on: 5. 11. 2014
 *      Author: Jan Travnicek
 */

#ifndef _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_
#define _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_

#include <alib/set>
#include <alib/vector>
#include <alib/map>
Jan Trávníček's avatar
Jan Trávníček committed

#include <common/ranked_symbol.hpp>
#include <tree/properties/ReversedBadCharacterShiftTable.h>
#include <tree/properties/BorderArrayNaive.h>
#include <tree/properties/SubtreeJumpTable.h>

#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>

namespace arbology {

namespace exact {

/**
 * Implementation of DeadZone matching using bad character shift as shifting method on one direction and border array on the other
 */
class DeadZoneUsingBadCharacterShiftAndBorderArray {
public:
	/**
	 * Search for pattern in linear string.
	 * @return set set of occurences
	 */
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static void match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static void match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high );
template < class SymbolType, class RankType >
ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
	return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
}

template < class SymbolType, class RankType >
ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
	ext::set < unsigned > occ;
	ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
	ext::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern );
	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );

	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
	return occ;
}

template < class SymbolType, class RankType >
void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
	if ( low >= high ) return;

	int i = ( low + high ) / 2;

	 // index to the pattern
	unsigned j = 0;

	 // offset to the subject
	unsigned offset = i;

	while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
		if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
			 // match of symbol
			offset++;
			j++;
		} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) /* && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) */ ) {
			 // match of variable with subtree
			offset = subjectSubtreeJumpTable[offset];
			j += 2;
		} else {
			break;
		}
	}

	 // match was found
	if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );

	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
}

template < class SymbolType, class RankType >
ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
	return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
}

template < class SymbolType, class RankType >
ext::set < unsigned > DeadZoneUsingBadCharacterShiftAndBorderArray::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
	ext::set < unsigned > occ;
	ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > bbcs = tree::properties::ReversedBadCharacterShiftTable::bcs ( pattern ); // NOTE: the subjects alphabet must be a subset or equal to the pattern
	ext::vector < size_t > fba = tree::properties::BorderArrayNaive::ba ( pattern );
	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );

	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, 0, subject.getContent ( ).size ( ) - pattern.getContent ( ).size ( ) + 1 );
	return occ;
}

template < class SymbolType, class RankType >
void DeadZoneUsingBadCharacterShiftAndBorderArray::match_rec ( ext::set < unsigned > & occ, const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern, ext::vector < size_t > & fba, ext::map < common::ranked_symbol < SymbolType, RankType >, size_t > & bbcs, ext::vector < int > & subjectSubtreeJumpTable, int low, int high ) {
	if ( low >= high ) return;

	int i = ( low + high ) / 2;

	 // index to the pattern
	unsigned j = 0;

	 // offset to the subject
	unsigned offset = i;

	while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
		if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] )
			 // match of symbol
			offset++;
		else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
			 // match of variable with subtree
			offset = subjectSubtreeJumpTable[offset];
		else
			break;

		j++;
	}

	 // match was found
	if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );

	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, low, i - bbcs[subject.getContent ( )[i]] + 1 );
	match_rec ( occ, subject, pattern, fba, bbcs, subjectSubtreeJumpTable, i + j - fba[j], high );
}

} /* namespace exact */

} /* namespace arbology */

#endif /* _DEAD_ZONE_USING_BAD_CHARACTER_SHIFT_AND_BORDER_ARRAY_H_ */