Skip to content
Snippets Groups Projects
KnuthMorrisPratt.cpp 4.52 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * KnuthMorrisPratt.cpp
     *
     *  Created on: 5. 11. 2014
     *      Author: Jan Travnicek
     */
    
    #include "KnuthMorrisPratt.h"
    #include "BorderArrayNaive.h"
    #include "SubtreeJumpTable.h"
    
    #include <exception/AlibException.h>
    #include <tree/Tree.h>
    #include <tree/ranked/PrefixRankedBarTree.h>
    #include <tree/ranked/PrefixRankedBarPattern.h>
    
    #include <tree/ranked/PrefixRankedTree.h>
    #include <tree/ranked/PrefixRankedPattern.h>
    
    #include <alphabet/RankedSymbol.h>
    
    #include <map>
    
    namespace arbology {
    
    namespace exact {
    
    std::set < unsigned > KnuthMorrisPratt::match ( const tree::Tree & subject, const tree::Tree & pattern ) {
    	return getInstance ( ).dispatch ( subject.getData ( ), pattern.getData ( ) );
    }
    
    std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarTree & pattern ) {
    	return match ( subject, tree::PrefixRankedBarPattern ( pattern ) );
    }
    
    auto KnuthMorrisPrattPrefixRankedBarTreePrefixRankedBarTree = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarTree > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
    
    std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedBarTree & subject, const tree::PrefixRankedBarPattern & pattern ) {
    	std::set < unsigned > occ;
    	std::vector < size_t > ba = BorderArrayNaive::ba ( pattern );
    	std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
    
    	 // index to the subject
    	unsigned i = 0;
    
    	 // main loop of the algorithm over all possible indexes where the pattern can start
    	while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
    
    		 // index to the pattern
    		unsigned j = 0;
    
    		 // offset to the subject
    		unsigned offset = i;
    
    		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
    			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
    				 // match of symbol
    				offset++;
    				j++;
    			} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) && ( subject.getContent ( )[offset].getSymbol ( ) != pattern.getBarSymbol ( ) ) ) {
    				 // match of variable with subtree
    				offset = subjectSubtreeJumpTable[offset];
    				j += 2;
    			} else {
    				break;
    			}
    		}
    
    		 // match was found
    		if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
    
    		 // shift heristics
    		i += j - ba[j];
    	}
    
    	return occ;
    }
    
    auto KnuthMorrisPrattPrefixRankedBarTreePrefixRankedBarPattern = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedBarTree, tree::PrefixRankedBarPattern > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
    
    
    std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedTree & pattern ) {
    	return match ( subject, tree::PrefixRankedPattern ( pattern ) );
    }
    
    auto KnuthMorrisPrattPrefixRankedTreePrefixRankedTree = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedTree > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
    
    std::set < unsigned > KnuthMorrisPratt::match ( const tree::PrefixRankedTree & subject, const tree::PrefixRankedPattern & pattern ) {
    	std::set < unsigned > occ;
    	std::vector < size_t > ba = BorderArrayNaive::ba ( pattern );
    	std::vector < int > subjectSubtreeJumpTable = SubtreeJumpTable::compute ( subject );
    
    	 // index to the subject
    	unsigned i = 0;
    
    	 // main loop of the algorithm over all possible indexes where the pattern can start
    	while ( i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ) ) {
    
    		 // index to the pattern
    		unsigned j = 0;
    
    		 // offset to the subject
    		unsigned offset = i;
    
    		while ( ( j < pattern.getContent ( ).size ( ) ) && ( offset < subject.getContent ( ).size ( ) ) ) {
    			if ( subject.getContent ( )[offset] == pattern.getContent ( )[j] ) {
    				 // match of symbol
    				offset++;
    				j++;
    			} else if ( ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) ) {
    				 // match of variable with subtree
    				offset = subjectSubtreeJumpTable[offset];
    				j++;
    			} else {
    				break;
    			}
    		}
    
    		 // match was found
    		if ( j >= pattern.getContent ( ).size ( ) ) occ.insert ( i );
    
    		 // shift heristics
    		i += j - ba[j];
    	}
    
    	return occ;
    }
    
    auto KnuthMorrisPrattPrefixRankedTreePrefixRankedPattern = KnuthMorrisPratt::RegistratorWrapper < std::set < unsigned >, tree::PrefixRankedTree, tree::PrefixRankedPattern > ( KnuthMorrisPratt::getInstance ( ), KnuthMorrisPratt::match );
    
    
    } /* namespace exact */
    
    } /* namespace arbology */