Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
ExactPatternMatch.h 16.29 KiB
/*
 * ExactPatternMatch.h
 *
 *  Created on: 9. 2. 2014
 *      Author: Jan Travnicek
 */

#ifndef _EXACT_PATTERN_MATCH_H_
#define _EXACT_PATTERN_MATCH_H_

#include <alib/set>
#include <alib/tree>
#include <alib/deque>
#include <alib/foreach>

#include <alphabet/RankedSymbol.h>

#include <tree/ranked/RankedTree.h>
#include <tree/ranked/RankedPattern.h>
#include <tree/ranked/RankedNonlinearPattern.h>
#include <tree/ranked/PrefixRankedTree.h>
#include <tree/ranked/PrefixRankedPattern.h>
#include <tree/ranked/PrefixRankedNonlinearPattern.h>
#include <tree/ranked/PrefixRankedBarTree.h>
#include <tree/ranked/PrefixRankedBarPattern.h>
#include <tree/ranked/PrefixRankedBarNonlinearPattern.h>
#include <tree/unranked/UnrankedTree.h>
#include <tree/unranked/UnrankedPattern.h>

#include <tree/properties/SubtreeJumpTable.h>
#include <tree/properties/ExactSubtreeRepeatsNaive.h>

namespace arbology {

namespace exact {

class ExactPatternMatch {
public:
	/**
	 * Performs conversion.
	 * @return left regular grammar equivalent to source automaton.
	 */
	template < class SymbolType >
	static ext::set < unsigned > match ( const tree::UnrankedTree < SymbolType > & subject, const tree::UnrankedPattern < SymbolType > & pattern );

	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::RankedTree < SymbolType, RankType > & subject, const tree::RankedPattern < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::RankedTree < SymbolType, RankType > & subject, const tree::RankedNonlinearPattern < SymbolType, RankType > & pattern );

	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern );

	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern );
	template < class SymbolType, class RankType >
	static ext::set < unsigned > match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern );

private:
	template < class SymbolType >
	static bool matchHelper ( const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable );
	template < class SymbolType, class RankType >
	static bool matchHelper ( const ext::tree < common::ranked_symbol < SymbolType, RankType > > & subject, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & pattern, const common::ranked_symbol < SymbolType, RankType > & subtreeVariable );
	template < class SymbolType, class RankType >
	static bool matchHelper ( const ext::tree < common::ranked_symbol < SymbolType, RankType > > & subject, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & pattern, const common::ranked_symbol < SymbolType, RankType > & subtreeVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const ext::tree < common::ranked_symbol < unsigned, RankType > > & repeats, ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > & variablesSetting );

	template < class SymbolType >
	static void matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable );
	template < class SymbolType, class RankType >
	static void matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & subject, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & pattern, const common::ranked_symbol < SymbolType, RankType > & subtreeVariable );
	template < class SymbolType, class RankType >
	static void matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & subject, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & pattern, const common::ranked_symbol < SymbolType, RankType > & subtreeVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const ext::tree < common::ranked_symbol < unsigned, RankType > > & subjectRepeats );

};

template < class SymbolType >
bool ExactPatternMatch::matchHelper ( const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable ) {
	if ( pattern.getData ( ) == subtreeVariable ) return true;

	if ( subject.getData ( ) != pattern.getData ( ) ) return false;

	if ( subject.getChildren ( ).size ( ) != pattern.getChildren ( ).size ( ) ) return false;

	for ( const ext::tuple < const ext::tree < SymbolType >, const ext::tree < SymbolType > > & childs : ext::make_tuple_foreach ( subject.getChildren ( ), pattern.getChildren ( ) ) )
		if ( !matchHelper ( std::get < 0 > ( childs ), std::get < 1 > ( childs ), subtreeVariable ) ) return false;

	return true;
}

template < class SymbolType, class RankType >
bool ExactPatternMatch::matchHelper ( const ext::tree < common::ranked_symbol < SymbolType, RankType > > & subject, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & pattern, const common::ranked_symbol < SymbolType, RankType > & subtreeVariable ) {
	if ( pattern.getData ( ) == subtreeVariable ) return true;

	if ( subject.getData ( ) != pattern.getData ( ) ) return false;

	 // ranked symbols are the same; test for number of children is not needed
	for ( const ext::tuple < const ext::tree < common::ranked_symbol < SymbolType, RankType > >, const ext::tree < common::ranked_symbol < SymbolType, RankType > > > & childs : ext::make_tuple_foreach ( subject.getChildren ( ), pattern.getChildren ( ) ) )
		if ( !matchHelper ( std::get < 0 > ( childs ), std::get < 1 > ( childs ), subtreeVariable ) ) return false;

	return true;
}

template < class SymbolType, class RankType >
bool ExactPatternMatch::matchHelper ( const ext::tree < common::ranked_symbol < SymbolType, RankType > > & subject, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & pattern, const common::ranked_symbol < SymbolType, RankType > & subtreeVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const ext::tree < common::ranked_symbol < unsigned, RankType > > & repeats, ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > & variablesSetting ) {
	if ( pattern.getData ( ) == subtreeVariable ) return true;

	if ( nonlinearVariables.count ( pattern.getData ( ) ) ) {
		auto setting = variablesSetting.find ( pattern.getData ( ) );

		if ( setting != variablesSetting.end ( ) ) return repeats.getData ( ).getSymbol ( ) == setting->second;

		variablesSetting.insert ( std::make_pair ( pattern.getData ( ), repeats.getData ( ).getSymbol ( ) ) );

		return true;
	}

	if ( subject.getData ( ) != pattern.getData ( ) ) return false;

	 // ranked symbols are the same; test for number of children is not needed
	for ( const ext::tuple < const ext::tree < common::ranked_symbol < SymbolType, RankType > >, const ext::tree < common::ranked_symbol < SymbolType, RankType > >, const ext::tree < common::ranked_symbol < unsigned, RankType > > > & childs : ext::make_tuple_foreach ( subject.getChildren ( ), pattern.getChildren ( ), repeats.getChildren ( ) ) )
		if ( !matchHelper ( std::get < 0 > ( childs ), std::get < 1 > ( childs ), subtreeVariable, nonlinearVariables, std::get < 2 > ( childs ), variablesSetting ) ) return false;

	return true;
}

template < class SymbolType >
void ExactPatternMatch::matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < SymbolType > & subject, const ext::tree < SymbolType > & pattern, const SymbolType & subtreeVariable ) {
	if ( matchHelper ( subject, pattern, subtreeVariable ) ) occ.insert ( index );

	index++;

	for ( const ext::tree < SymbolType > & child : subject.getChildren ( ) )
		matchInternal ( index, occ, child, pattern, subtreeVariable );
}

template < class SymbolType, class RankType >
void ExactPatternMatch::matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & subject, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & pattern, const common::ranked_symbol < SymbolType, RankType > & subtreeVariable ) {
	if ( matchHelper ( subject, pattern, subtreeVariable ) ) occ.insert ( index );

	index++;

	for ( const ext::tree < common::ranked_symbol < SymbolType, RankType > > & child : subject.getChildren ( ) )
		matchInternal ( index, occ, child, pattern, subtreeVariable );
}

template < class SymbolType, class RankType >
void ExactPatternMatch::matchInternal ( unsigned & index, ext::set < unsigned > & occ, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & subject, const ext::tree < common::ranked_symbol < SymbolType, RankType > > & pattern, const common::ranked_symbol < SymbolType, RankType > & subtreeVariable, const ext::set < common::ranked_symbol < SymbolType, RankType > > & nonlinearVariables, const ext::tree < common::ranked_symbol < unsigned, RankType > > & repeats ) {
	ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting;

	if ( matchHelper ( subject, pattern, subtreeVariable, nonlinearVariables, repeats, variablesSetting ) ) occ.insert ( index );

	index++;

	for ( const ext::tuple < const ext::tree < common::ranked_symbol < SymbolType, RankType > >, const ext::tree < common::ranked_symbol < unsigned, RankType > > > & childs : ext::make_tuple_foreach ( subject.getChildren ( ), repeats.getChildren ( ) ) )
		matchInternal ( index, occ, std::get < 0 > ( childs ), pattern, subtreeVariable, nonlinearVariables, std::get < 1 > ( childs ) );
}

template < class SymbolType >
ext::set < unsigned > ExactPatternMatch::match ( const tree::UnrankedTree < SymbolType > & subject, const tree::UnrankedPattern < SymbolType > & pattern ) {
	unsigned i = 0;
	ext::set < unsigned > occ;

	matchInternal ( i, occ, subject.getContent ( ), pattern.getContent ( ), pattern.getSubtreeWildcard ( ) );
	return occ;
}

template < class SymbolType, class RankType >
ext::set < unsigned > ExactPatternMatch::match ( const tree::RankedTree < SymbolType, RankType > & subject, const tree::RankedPattern < SymbolType, RankType > & pattern ) {
	unsigned i = 0;
	ext::set < unsigned > occ;

	matchInternal ( i, occ, subject.getContent ( ), pattern.getContent ( ), pattern.getSubtreeWildcard ( ) );
	return occ;
}

template < class SymbolType, class RankType >
ext::set < unsigned > ExactPatternMatch::match ( const tree::RankedTree < SymbolType, RankType > & subject, const tree::RankedNonlinearPattern < SymbolType, RankType > & pattern ) {
	unsigned i = 0;
	ext::set < unsigned > occ;

	tree::RankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );

	matchInternal ( i, occ, subject.getContent ( ), pattern.getContent ( ), pattern.getSubtreeWildcard ( ), pattern.getNonlinearVariables ( ), repeats.getContent ( ) );
	return occ;
}

template < class SymbolType, class RankType >
ext::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedPattern < SymbolType, RankType > & pattern ) {
	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );

	ext::set < unsigned > occ;

	for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) {
		unsigned j = 0;
		unsigned offset = i;

		for ( ; j < pattern.getContent ( ).size ( ); j++ ) {
			if ( pattern.getContent ( )[j] == subject.getContent ( )[offset] )
				offset++;
			else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
				offset = subjectSubtreeJumpTable[offset];
			else
				break;
		}

		if ( j == pattern.getContent ( ).size ( ) )
			occ.insert ( i );
	}

	return occ;
}

template < class SymbolType, class RankType >
ext::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedTree < SymbolType, RankType > & subject, const tree::PrefixRankedNonlinearPattern < SymbolType, RankType > & pattern ) {
	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
	ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting;

	tree::PrefixRankedTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );

	ext::set < unsigned > occ;

	for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) {
		variablesSetting.clear();
		unsigned j = 0;
		unsigned offset = i;

		for ( ; j < pattern.getContent ( ).size ( ); j++ ) {
			if ( pattern.getContent ( )[j] == subject.getContent ( )[offset] )
				offset++;
			else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) )
				offset = subjectSubtreeJumpTable[offset];
			else if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) {
				auto setting = variablesSetting.find ( pattern.getContent ( )[j] );

				if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[offset].getSymbol ( ) != setting->second)
					break;

				variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[j], repeats.getContent( )[offset].getSymbol ( ) ) );
				offset = subjectSubtreeJumpTable[offset];
			} else
				break;
		}

		if ( j == pattern.getContent ( ).size ( ) )
			occ.insert ( i );
	}

	return occ;
}

template < class SymbolType, class RankType >
ext::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarPattern < SymbolType, RankType > & pattern ) {
	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );

	ext::set < unsigned > occ;

	for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) {
		unsigned j = 0;
		unsigned offset = i;

		for ( ; j < pattern.getContent ( ).size ( ); j++ ) {
			if ( pattern.getContent ( )[j] == subject.getContent ( )[offset] ) {
				offset++;
			} else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) {
				j++;
				offset = subjectSubtreeJumpTable[offset];
			} else {
				break;
			}
		}

		if ( j == pattern.getContent ( ).size ( ) )
			occ.insert ( i );
	}

	return occ;
}
template < class SymbolType, class RankType >
ext::set < unsigned > ExactPatternMatch::match ( const tree::PrefixRankedBarTree < SymbolType, RankType > & subject, const tree::PrefixRankedBarNonlinearPattern < SymbolType, RankType > & pattern ) {
	ext::vector < int > subjectSubtreeJumpTable = tree::properties::SubtreeJumpTable::compute ( subject );
	ext::map < common::ranked_symbol < SymbolType, RankType >, unsigned > variablesSetting;

	tree::PrefixRankedBarTree < unsigned, RankType > repeats = tree::properties::ExactSubtreeRepeatsNaive::repeats ( subject );

	ext::set < unsigned > occ;

	for ( unsigned i = 0; i + pattern.getContent ( ).size ( ) <= subject.getContent ( ).size ( ); i++ ) {
		variablesSetting.clear();
		unsigned j = 0;
		unsigned offset = i;

		for ( ; j < pattern.getContent ( ).size ( ); j++ ) {
			if ( pattern.getContent ( )[j] == subject.getContent ( )[offset] ) {
				offset++;
			} else if ( pattern.getContent ( )[j] == pattern.getSubtreeWildcard ( ) ) {
				j++;
				offset = subjectSubtreeJumpTable[offset];
			} else if ( pattern.getNonlinearVariables ( ).count ( pattern.getContent ( )[j] ) ) {
				auto setting = variablesSetting.find ( pattern.getContent ( )[j] );

				if ( setting != variablesSetting.end ( ) && repeats.getContent ( )[offset].getSymbol ( ) != setting->second)
					break;

				variablesSetting.insert ( std::make_pair ( pattern.getContent ( )[j], repeats.getContent( )[offset].getSymbol ( ) ) );

				j++;
				offset = subjectSubtreeJumpTable[offset];
			} else {
				break;
			}
		}

		if ( j == pattern.getContent ( ).size ( ) )
			occ.insert ( i );
	}

	return occ;
}

} /* namespace exact */

} /* namespace arbology */

#endif /* _EXACT_PATTERN_MATCH_H_ */