Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
aarbology.cpp 11.98 KiB
/*
 * aarbology.cpp
 *
 *  Created on: 26. 3. 2014
 *	  Author: Jan Travnicek
 */

#include <tclap/CmdLine.h>
#include <global/GlobalData.h>
#include <measure>

#include <exception/CommonException.h>
#include <lexer/Lexer.h>
#include <parser/Parser.h>

#include <factory/XmlDataFactory.hpp>

int main ( int argc, char * argv[] ) {
	try {
		common::GlobalData::argc = argc;
		common::GlobalData::argv = argv;

		TCLAP::CmdLine cmd ( "Arbology algorithm access binary", ' ', "0.01" );

		std::vector < std::string > allowed;
		allowed.push_back ( "exactSubtreeMatch" );
		allowed.push_back ( "exactPatternMatch" );
		allowed.push_back ( "boyerMooreHorspool" );
		allowed.push_back ( "reversedBoyerMooreHorspool" );
		allowed.push_back ( "knuthMorrisPratt" );
		allowed.push_back ( "deadZoneUsingBadCharacterShiftAndBorderArray" );
		allowed.push_back ( "exactSubtreeMatchingAutomaton" );
		allowed.push_back ( "exactPatternMatchingAutomaton" );
		allowed.push_back ( "exactSubtreeAutomaton" );
		allowed.push_back ( "exactTreePatternAutomaton" );
		allowed.push_back ( "exactNonlinearTreePatternAutomaton" );
		allowed.push_back ( "compressedBitParallelIndex" );
		allowed.push_back ( "nonlinearCompressedBitParallelIndex" );
		allowed.push_back ( "fullAndLinearIndex" );
		allowed.push_back ( "nonlinearFullAndLinearIndex" );

		allowed.push_back ( "exactSubtreeRepeatsNaive" );
		allowed.push_back ( "exactSubtreeRepeats" );
		allowed.push_back ( "normalizeTreeLabels" );
		allowed.push_back ( "badCharacterShiftTable" );
		TCLAP::ValuesConstraint < std::string > allowedVals ( allowed );

		TCLAP::ValueArg < std::string > algorithm ( "a", "algorithm", "Execute algorithm", false, "exactSubtreeMatch", & allowedVals );
		cmd.add ( algorithm );

		TCLAP::MultiArg < std::string > subjectInput ( "s", "subject", "Subject tree from file", false, "file" );
		cmd.add ( subjectInput );

		TCLAP::MultiArg < std::string > patternInput ( "p", "pattern", "Pattern tree from file", false, "file" );
		cmd.add ( patternInput );

		TCLAP::ValueArg < std::string > subtreeWildcardInput ( "w", "subtree_wildcard", "Wildcard to be used in algorithms needing it", false, "-", "file" );
		cmd.add ( subtreeWildcardInput );

		TCLAP::ValueArg < std::string > nonlinearVariablesInput ( "n", "nonlinear_variables", "Set of nonlinear variables to be used in algorithms needing it", false, "-", "file" );
		cmd.add ( nonlinearVariablesInput );

		TCLAP::ValueArg < std::string > variablesBarInput ( "b", "variables_bar", "Variables bar symbol of subtree wildcard and nonlinear variables", false, "-", "file" );
		cmd.add ( variablesBarInput );

		TCLAP::SwitchArg ends ( "e", "ends", "Return occurrences as end indexes when applicable", false );
		cmd.add ( ends );

		TCLAP::SwitchArg measure ( "m", "measure", "Measure times", false );
		cmd.add ( measure );

		TCLAP::SwitchArg verbose ( "v", "verbose", "Be verbose", false );
		cmd.add ( verbose );

		cmd.parse ( argc, argv );

		if(verbose.isSet())
			common::GlobalData::verbose = true;
		if(measure.isSet())
			common::GlobalData::measure = true;

		cli::Environment environment;
		environment.setBinding ( "stdout", "-" );

		measurements::start ( "Overal", measurements::Type::OVERALL );
		measurements::start ( "Input read", measurements::Type::AUXILIARY );

		if ( algorithm.getValue ( ) == "exactSubtreeMatch"
		  || algorithm.getValue ( ) == "exactPatternMatch"
		  || algorithm.getValue ( ) == "boyerMooreHorspool"
		  || algorithm.getValue ( ) == "reversedBoyerMooreHorspool"
		  || algorithm.getValue ( ) == "knuthMorrisPratt"
		  || algorithm.getValue ( ) == "deadZoneUsingBadCharacterShiftAndBorderArray"
		  || algorithm.getValue ( ) == "exactSubtreeAutomaton"
		  || algorithm.getValue ( ) == "exactTreePatternAutomaton"
		  || algorithm.getValue ( ) == "exactNonlinearTreePatternAutomaton"
		  || algorithm.getValue ( ) == "exactSubtreeRepeatsNaive"
		  || algorithm.getValue ( ) == "normalizeTreeLabels"
		  || algorithm.getValue ( ) == "exactSubtreeRepeats"
		  || algorithm.getValue ( ) == "compressedBitParallelIndex"
		  || algorithm.getValue ( ) == "nonlinearCompressedBitParallelIndex"
		  || algorithm.getValue ( ) == "fullAndLinearIndex"
		  || algorithm.getValue ( ) == "nonlinearFullAndLinearIndex" ) {
			std::string input;
			if ( subjectInput.getValue ( ).size ( ) == 0 )
				input = "-";
			else if ( subjectInput.getValue ( ).size ( ) == 1 )
				input = * subjectInput.getValue ( ).begin ( );
			else
				throw exception::CommonException("Multiple parameters when single required.");

			environment.setBinding ( "inputSubject", input );
			cli::Parser parser ( cli::Lexer ( "execute <#inputSubject > $subject" ) );
			parser.parse ( )->run ( environment );
		}

		if ( algorithm.getValue ( ) == "exactSubtreeMatch"
		  || algorithm.getValue ( ) == "exactPatternMatch"
		  || algorithm.getValue ( ) == "boyerMooreHorspool"
		  || algorithm.getValue ( ) == "reversedBoyerMooreHorspool"
		  || algorithm.getValue ( ) == "knuthMorrisPratt"
		  || algorithm.getValue ( ) == "deadZoneUsingBadCharacterShiftAndBorderArray"
		  || algorithm.getValue ( ) == "exactSubtreeMatchingAutomaton"
		  || algorithm.getValue ( ) == "exactPatternMatchingAutomaton"
		  || algorithm.getValue ( ) == "badCharacterShiftTable" ) {
			std::string input;
			if ( patternInput.getValue ( ).size ( ) == 0 )
				input = "-";
			else if ( patternInput.getValue ( ).size ( ) == 1 )
				input = * patternInput.getValue ( ).begin ( );
			else
				throw exception::CommonException("Multiple parameters when single required.");

			environment.setBinding ( "inputPattern", input );
			cli::Parser parser ( cli::Lexer ( "execute <#inputPattern > $pattern" ) );
			parser.parse ( )->run ( environment );
		}

		if ( algorithm.getValue ( ) == "exactTreePatternAutomaton"
		  || algorithm.getValue ( ) == "exactNonlinearTreePatternAutomaton" ) {
			std::string input;
			if ( ! subtreeWildcardInput.isSet ( ) )
				input = "-";
			else
				input = subtreeWildcardInput.getValue ( );

			environment.setBinding ( "inputSubtreeWildcard", input );
			cli::Parser parser ( cli::Lexer ( "execute <:ranked_symbol #inputSubtreeWildcard > $subtreeWildcard" ) );
			parser.parse ( )->run ( environment );
		}

		if ( algorithm.getValue ( ) == "exactNonlinearTreePatternAutomaton" ) {
			std::string input;
			if ( ! nonlinearVariablesInput.isSet ( ) )
				input = "-";
			else
				input = nonlinearVariablesInput.getValue ( );

			environment.setBinding ( "inputNonlinearVariables", input );
			cli::Parser parser ( cli::Lexer ( "execute <{:ranked_symbol} #inputNonlinearVariables > $nonlinearVariables" ) );
			parser.parse ( )->run ( environment );
		}

		bool isBarNotation = false;

		if ( algorithm.getValue ( ) == "exactTreePatternAutomaton"
		  || algorithm.getValue ( ) == "exactNonlinearTreePatternAutomaton" ) {
			std::string inputType = environment.getVariable ( "subject" )->getReturnType ( );
			isBarNotation = inputType.find ( "Bar" ) != std::string::npos;

			if ( isBarNotation ) {
				std::string input;
				if ( ! variablesBarInput.isSet ( ) )
					input = "-";
				else
					input = variablesBarInput.getValue ( );

				environment.setBinding ( "inputVariablesBar", input );
				cli::Parser parser ( cli::Lexer ( "execute <:ranked_symbol #inputVariablesBar > $variablesBar" ) );
				parser.parse ( )->run ( environment );
			}
		}

		measurements::end ( );
		measurements::start ( "Algorithm", measurements::Type::MAIN );

		std::string cliCommand;
		if ( algorithm.getValue ( ) == "exactSubtreeMatch" ) {
			cliCommand = "execute arbology::exact::ExactSubtreeMatch $subject $pattern > $output";
		} else if ( algorithm.getValue ( ) == "exactPatternMatch" ) {
			cliCommand = "execute arbology::exact::ExactPatternMatch $subject $pattern > $output";
		} else if ( algorithm.getValue ( ) == "boyerMooreHorspool" ) {
			cliCommand = "execute arbology::exact::BoyerMooreHorspool $subject $pattern > $output";
		} else if ( algorithm.getValue ( ) == "reversedBoyerMooreHorspool" ) {
			cliCommand = "execute arbology::exact::ReversedBoyerMooreHorspool $subject $pattern > $output";
		} else if ( algorithm.getValue ( ) == "knuthMorrisPratt" ) {
			cliCommand = "execute arbology::exact::KnuthMorrisPratt $subject $pattern > $output";
		} else if ( algorithm.getValue ( ) == "deadZoneUsingBadCharacterShiftAndBorderArray" ) {
			cliCommand = "execute arbology::exact::DeadZoneUsingBadCharacterShiftAndBorderArray $subject $pattern > $output";
		} else if ( algorithm.getValue ( ) == "exactSubtreeMatchingAutomaton" ) {
			cliCommand = "execute arbology::exact::ExactSubtreeMatchingAutomaton $pattern > $output";
		} else if ( algorithm.getValue ( ) == "exactPatternMatchingAutomaton" ) {
			cliCommand = "execute arbology::exact::ExactPatternMatchingAutomaton $pattern > $output";
		} else if ( algorithm.getValue ( ) == "exactSubtreeAutomaton" ) {
			cliCommand = "execute arbology::exact::ExactSubtreeAutomaton $subject > $output";
		} else if ( algorithm.getValue ( ) == "exactTreePatternAutomaton" ) {
			if ( isBarNotation )
				cliCommand = "execute arbology::exact::ExactTreePatternAutomaton $subject $subtreeWildcard $variablesBar > $output";
			else
				cliCommand = "execute arbology::exact::ExactTreePatternAutomaton $subject $subtreeWildcard > $output";
		} else if ( algorithm.getValue ( ) == "exactNonlinearTreePatternAutomaton" ) {
			if ( isBarNotation )
				cliCommand = "execute arbology::exact::ExactNonlinearTreePatternAutomaton $subject $subtreeWildcard $nonlinearVariables $variablesBar > $output";
			else
				cliCommand = "execute arbology::exact::ExactNonlinearTreePatternAutomaton $subject $subtreeWildcard $nonlinearVariables > $output";
		} else if ( algorithm.getValue ( ) == "exactSubtreeRepeatsNaive" ) {
			cliCommand = "execute tree::properties::ExactSubtreeRepeatsNaive $subject > $output";
		} else if ( algorithm.getValue ( ) == "normalizeTreeLabels" ) {
			cliCommand = "execute tree::NormalizeTreeLabels $subject > $output";
		} else if ( algorithm.getValue ( ) == "exactSubtreeRepeats" ) {
			cliCommand = "execute tree::properties::ExactSubtreeRepeats $subject > $output";
		} else if ( algorithm.getValue ( ) == "compressedBitParallelIndex" ) {
			cliCommand = "execute arbology::indexing::CompressedBitParallelIndexConstruction $subject > $output";
		} else if ( algorithm.getValue ( ) == "nonlinearCompressedBitParallelIndex" ) {
			cliCommand = "execute arbology::indexing::NonlinearCompressedBitParallelIndexConstruction $subject > $output";
		} else if ( algorithm.getValue ( ) == "fullAndLinearIndex" ) {
			cliCommand = "execute arbology::indexing::FullAndLinearIndexConstruction $subject > $output";
		} else if ( algorithm.getValue ( ) == "nonlinearFullAndLinearIndex" ) {
			cliCommand = "execute arbology::indexing::NonlinearFullAndLinearIndexConstruction $subject > $output";
		} else if ( algorithm.getValue ( ) == "badCharacterShiftTable" ) {
			cliCommand = "execute tree::properties::BadCharacterShiftTable $pattern > $output";
		} else {
			throw exception::CommonException ( "Invalid algorithm" );
		}

		cli::Parser parser = cli::Parser ( cli::Lexer ( cliCommand ) );
		parser.parse ( )->run ( environment );

		if( ends.isSet ( ) ) {
			parser = cli::Parser ( cli::Lexer ( "execute arbology::transform::BeginToEndIndex $subject $output > $output" ) );
			parser.parse ( )->run ( environment );
		}

		measurements::end ( );
		measurements::start ( "Output write", measurements::Type::AUXILIARY );

		parser = cli::Parser ( cli::Lexer ( "execute $output >#stdout" ) );
		parser.parse ( )->run ( environment );

		measurements::end ( );
		measurements::end ( );

		if ( measure.getValue ( ) ) common::Streams::measure << measurements::results ( ) << std::endl;

		return 0;
	} catch ( const exception::CommonException & exception ) {
		factory::XmlDataFactory::toStdout ( exception );
		return 1;
	} catch ( const TCLAP::ArgException & exception ) {
		common::Streams::out << exception.error ( ) << std::endl;
		return 2;
	} catch ( const std::exception & exception ) {
		common::Streams::err << "Exception caught: " << exception.what ( ) << std::endl;
		return 3;
	} catch ( ... ) {
		common::Streams::err << "Unknown exception caught." << std::endl;
		return 127;
	}
}