Skip to content
Snippets Groups Projects
KnuthMorrisPratt.h 5.04 KiB
Newer Older
/*
 * KnuthMorrisPratt.h
 *
 *  Created on: 5. 11. 2014
 *      Author: Jan Travnicek
 */

#ifndef _ARBOLOGY_KNUTH_MORRIS_PRATT_H_
#define _ARBOLOGY_KNUTH_MORRIS_PRATT_H_

#include <set>
#include <vector>

#include <tree/properties/BorderArrayNaive.h>
#include <tree/properties/SubtreeJumpTable.h>

#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>

namespace arbology {

namespace exact {

/**
 * Implementation of BMH for MI(E+\eps)-EVY course 2014
 * To get rid of zeros in BCS table we ignore last haystack character
 */
class KnuthMorrisPratt {
public:
	/**
	 * Search for pattern in linear string.
	 * @return set set of occurences
	 */
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarTree < SymbolType, RankType > & pattern ) {
	return match ( subject, tree::PrefixRankedBarPattern < SymbolType, RankType > ( pattern ) );
}

template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
	ext::set < unsigned > occ;
	ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern );
	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );

	 // index to the subject
	unsigned i = 0;

	 // main loop of the algorithm over all possible indexes where the pattern can start
	while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {

		 // index to the pattern
		unsigned j = 0;

		 // offset to the subject
		unsigned offset = i;

		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
				 // match of symbol
				offset++;
				j++;
			} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( ! pattern.getBars ( ).count ( subject.getContent ( )[offset] ) ) ) {
				 // match of variable with subtree
				offset = subjectSubtreeJumpTable[offset];
				j += 2;
			} else {
				break;
			}
		}

		 // match was found
		if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );

		 // shift heuristics
		i += j - ba[j];
	}

	return occ;
}

template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedTree < SymbolType, RankType > & pattern ) {
	return match ( subject, tree::PrefixRankedPattern < SymbolType, RankType > ( pattern ) );
}

template < class SymbolType, class RankType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
	ext::set < unsigned > occ;
	ext::vector < size_t > ba = tree::properties::BorderArrayNaive::ba ( pattern );
	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );

	 // index to the subject
	unsigned i = 0;

	 // main loop of the algorithm over all possible indexes where the pattern can start
	while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {

		 // index to the pattern
		unsigned j = 0;

		 // offset to the subject
		unsigned offset = i;

		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
				 // match of symbol
				offset++;
				j++;
			} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) ) {
				 // match of variable with subtree
				offset = subjectSubtreeJumpTable[offset];
				j++;
			} else {
				break;
			}
		}

		 // match was found
		if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );

		 // shift heristics
		i += j - ba[j];
	}

	return occ;
}

} /* namespace exact */

} /* namespace arbology */

#endif /* _ARBOLOGY_KNUTH_MORRIS_PRATT_H_ */