Skip to content
Snippets Groups Projects
KnuthMorrisPratt.h 8.25 KiB
Newer Older
#include <alib/measure>
#include <alib/set>
#include <alib/vector>
#include <tree/properties/BorderArray.h>
#include <tree/properties/SubtreeJumpTable.h>
#include <tree/properties/ExactSubtreeRepeatsNaive.h>
#include <tree/exact/ForwardOccurrenceTest.h>

#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
#include <tree/ranked/PrefixRankedNonlinearPattern.h>

namespace arbology {

namespace exact {

/**
 * Implementation of BMH for MI(E+\eps)-EVY course 2014
 * To get rid of zeros in BCS table we ignore last haystack character
 */
class KnuthMorrisPratt {
public:
	/**
	 * Search for pattern in linear string.
	 * @return set set of occurences
	 */
	template < class SymbolType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarTree < SymbolType > & pattern );
	template < class SymbolType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarPattern < SymbolType > & pattern );
	template < class SymbolType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType > & pattern );

	template < class SymbolType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedTree < SymbolType > & pattern );
	template < class SymbolType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern );
	template < class SymbolType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType > & pattern );
template < class SymbolType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarTree < SymbolType > & pattern ) {
	return match ( subject, tree::PrefixRankedBarPattern < SymbolType > ( pattern ) );
template < class SymbolType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarPattern < SymbolType > & pattern ) {
	ext::set < unsigned > occ;
	ext::vector < size_t > construct = tree::properties::BorderArray::construct ( pattern );

	//measurements::start("Algorithm", measurements::Type::MAIN);

	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );

	unsigned Spos = pattern.getContent ( ).size ( );
	for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); ++ i ) {
		if ( pattern.getContent ( ) [ i ] == pattern.getSubtreeWildcard ( ) ) {
			Spos = i;
			break;
		}
	}

	 // index to the subject
	unsigned i = 0;

	 // main loop of the algorithm over all possible indexes where the pattern can start
	while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
		 // index to the pattern
		unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i );

		 // match was found
		if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );

		if ( j == 0 ) {
			i += 1;
		} else {
			unsigned shift = j - construct [ j ];
			 // shift heuristics
			i += shift;
			j = std::min ( Spos, j ) - shift;
		}
	//measurements::end();

	return occ;
}

template < class SymbolType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree < SymbolType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType > & pattern ) {
	ext::set < unsigned > occ;
	ext::vector < size_t > construct = tree::properties::BorderArray::construct ( pattern );

	//measurements::start("Algorithm", measurements::Type::MAIN);

	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
	tree::PrefixRankedBarTree < unsigned > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );
	unsigned Spos = pattern.getContent ( ).size ( );
	for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); ++ i ) {
		if ( pattern.getContent ( ) [ i ] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).contains ( pattern.getContent ( ) [ i ] ) ) {
			Spos = i;
			break;
		}
	}

	 // index to the subject
	unsigned i = 0;

	 // main loop of the algorithm over all possible indexes where the pattern can start
	while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
		 // index to the pattern
		unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i );

		 // match was found
		if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );

		if ( j == 0 ) {
			i += 1;
		} else {
			unsigned shift = j - construct [ j ];
			 // shift heuristics
			i += shift;
			j = std::min ( Spos, j ) - shift;
		}
template < class SymbolType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedTree < SymbolType > & pattern ) {
	return match ( subject, tree::PrefixRankedPattern < SymbolType > ( pattern ) );
template < class SymbolType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedPattern < SymbolType > & pattern ) {
	ext::set < unsigned > occ;
	ext::vector < size_t > construct = tree::properties::BorderArray::construct ( pattern );

	//measurements::start("Algorithm", measurements::Type::MAIN);

	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );

	unsigned Spos = pattern.getContent ( ).size ( );
	for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); ++ i ) {
		if ( pattern.getContent ( ) [ i ] == pattern.getSubtreeWildcard ( ) ) {
			Spos = i;
			break;
		}
	}

	 // index to the subject
	unsigned i = 0;

	 // main loop of the algorithm over all possible indexes where the pattern can start
	while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
		 // index to the pattern
		unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, pattern, i );

		 // match was found
		if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );

		if ( j == 0 ) {
			i += 1;
		} else {
			unsigned shift = j - construct [ j ];
			 // shift heuristics
			i += shift;
			j = std::min ( Spos, j ) - shift;
		}
template < class SymbolType >
ext::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree < SymbolType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType > & pattern ) {
	ext::set < unsigned > occ;
	ext::vector < size_t > construct = tree::properties::BorderArray::construct ( pattern );

	//measurements::start("Algorithm", measurements::Type::MAIN);

	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
	tree::PrefixRankedTree < unsigned > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );
	unsigned Spos = pattern.getContent ( ).size ( );
	for ( unsigned i = 0; i < pattern.getContent ( ).size ( ); ++ i ) {
		if ( pattern.getContent ( ) [ i ] == pattern.getSubtreeWildcard ( ) || pattern.getNonlinearVariables ( ).contains ( pattern.getContent ( ) [ i ] ) ) {
			Spos = i;
			break;
		}
	}

	 // index to the subject
	unsigned i = 0;

	 // main loop of the algorithm over all possible indexes where the pattern can start
	while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
		 // index to the pattern
		unsigned j = tree::exact::ForwardOccurrenceTest::occurrence ( subject, subjectSubtreeJumpTable, repeats, pattern, i );

		 // match was found
		if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );

		if ( j == 0 ) {
			i += 1;
		} else {
			unsigned shift = j - construct [ j ];
			 // shift heuristics
			i += shift;
			j = std::min ( Spos, j ) - shift;
		}
	//measurements::end();

} /* namespace exact */

} /* namespace arbology */