diff --git a/alib2algo/src/stringology/query/SuffixTrieFactors.cpp b/alib2algo/src/stringology/query/SuffixTrieFactors.cpp new file mode 100644 index 0000000000000000000000000000000000000000..51e67dba6b2a66c554f5cbedf539e627907445dd --- /dev/null +++ b/alib2algo/src/stringology/query/SuffixTrieFactors.cpp @@ -0,0 +1,24 @@ +/* + * SuffixTrieFactors.cpp + * + * Created on: 2. 1. 2017 + * Author: Jan Travnicek + */ + +#include "SuffixTrieFactors.h" + +#include <string/LinearString.h> + +namespace stringology { + +namespace query { + +std::set < unsigned > SuffixTrieFactors::query ( const indexes::SuffixTrie < DefaultSymbolType, unsigned > & suffixTrie, const string::String & string ) { + return dispatch ( suffixTrie, string.getData ( ) ); +} + +auto SuffixTrieFactorsLinearString = SuffixTrieFactors::RegistratorWrapper < std::set < unsigned >, string::LinearString < > > ( SuffixTrieFactors::query ); + +} /* namespace query */ + +} /* namespace stringology */ diff --git a/alib2algo/src/stringology/query/SuffixTrieFactors.h b/alib2algo/src/stringology/query/SuffixTrieFactors.h new file mode 100644 index 0000000000000000000000000000000000000000..45dc07edebd5cb9373a8e3ce298e646692aaf4de --- /dev/null +++ b/alib2algo/src/stringology/query/SuffixTrieFactors.h @@ -0,0 +1,70 @@ +/* + * SuffixTrieFactors.h + * + * Created on: 2. 1. 2017 + * Author: Jan Travnicek + */ + +#ifndef SUFFIX_TRIE_FACTORS_H_ +#define SUFFIX_TRIE_FACTORS_H_ + +#include <indexes/SuffixTrie.h> +#include <string/String.h> +#include <string/LinearString.h> +#include <core/multipleDispatch.hpp> + +namespace stringology { + +namespace query { + +/** + * Query suffix trie for given string. + * + * Source: ?? + */ + +class SuffixTrieFactors : public std::SingleDispatchFirstStaticParam < SuffixTrieFactors, std::set < unsigned >, const indexes::SuffixTrie < DefaultSymbolType, unsigned > &, const string::StringBase & > { + template < class SymbolType, class ValueType > + static void accumulateResult ( const std::trie < SymbolType, std::variant < void, ValueType > > & trie, std::set < ValueType > & res ) { + if ( trie.getData ( ).template is < ValueType > ( ) ) + res.insert ( trie.getData ( ).template get < ValueType > ( ) ); + + for ( const std::pair < SymbolType, std::trie < SymbolType, std::variant < void, ValueType > > > & child : trie.getChildren ( ) ) { + accumulateResult ( child.second, res ); + } + } +public: + /** + * Query a suffix trie + * @param suffix trie to query + * @param string string to query by + * @return occurences of factors + */ + static std::set < unsigned > query ( const indexes::SuffixTrie < DefaultSymbolType, unsigned > & suffixTrie, const string::String & string ); + + template < class SymbolType, class ValueType > + static std::set < ValueType > query ( const indexes::SuffixTrie < SymbolType, ValueType > & suffixTrie, const string::LinearString < SymbolType > & string ); + +}; + +template < class SymbolType, class ValueType > +std::set < ValueType > SuffixTrieFactors::query ( const indexes::SuffixTrie < SymbolType, ValueType > & suffixTrie, const string::LinearString < SymbolType > & string ) { + const std::trie < SymbolType, std::variant < void, ValueType > > * node = & suffixTrie.getRoot ( ); + for ( const SymbolType & symbol : string.getContent ( ) ) { + auto iter = node->getChildren ( ).find ( symbol ); + if ( iter == node->getChildren ( ).end ( ) ) { + return {}; + } + node = & iter->second; + } + + std::set < ValueType > res; + accumulateResult ( * node, res ); + return res; +} + +} /* namespace query */ + +} /* namespace stringology */ + +#endif /* SUFFIX_TRIE_FACTORS_H_ */ diff --git a/aquery2/makefile b/aquery2/makefile new file mode 100644 index 0000000000000000000000000000000000000000..a28dfa12ae91c8621d28bcbd5cb8c16573d8031d --- /dev/null +++ b/aquery2/makefile @@ -0,0 +1,159 @@ +SHELL:=/bin/bash +USE_RAMDISK ?= 0 +-include makefile.conf + +define NEW_LINE + + +endef + +export NEW_LINE + +CXX_FLAGS := -Wall -pedantic -Wextra -Werror -Wshadow -Wpointer-arith -Wcast-qual -Wdelete-non-virtual-dtor -Wredundant-decls + +LDFLAGS_DEBUG:=-Wl,-no-as-needed $(addprefix -L, $(addsuffix lib-debug, $(LINK_PATHS))) -rdynamic $(addprefix -l, $(LINK_LIBRARIES)) -Wl,-rpath,. + +LDFLAGS_RELEASE:=-Wl,-no-as-needed $(addprefix -L, $(addsuffix lib-release, $(LINK_PATHS))) -rdynamic $(addprefix -l, $(LINK_LIBRARIES)) -Wl,-rpath,. + +OBJECTS_DEBUG:=$(patsubst src/%.cpp, obj-debug/%.o, $(shell find src/ -name *cpp)) + +OBJECTS_RELEASE:=$(patsubst src/%.cpp, obj-release/%.o, $(shell find src/ -name *cpp)) + +.PHONY: all build-debug clean-debug doc + +all: + @echo "What to do master?" + +FORCE: + +# ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +# make subdir makefile + +%/makefile: makefile makefile.conf + if [ ! -w $(dir $@) ] && [ $(USE_RAMDISK) -eq 1 ]; then\ + ln -s /tmp/`date +'%s%N'`-$(dir $@) $(subst /, , $(dir $@)) 2>/dev/null;\ + fi;\ + if [ -L $(subst /, , $(dir $@)) ]; then\ + mkdir -p `readlink $(subst /, , $(dir $@))`;\ + else\ + mkdir -p $(dir $@);\ + fi + echo "\ + SHELL:=/bin/bash$${NEW_LINE}\ + SRCDIR:=$${NEW_LINE}\ + $${NEW_LINE}\ + define NEW_LINE$${NEW_LINE}\ + $${NEW_LINE}\ + $${NEW_LINE}\ + endef$${NEW_LINE}\ + $${NEW_LINE}\ + export NEW_LINE$${NEW_LINE}\ + $${NEW_LINE}\ + CXXFLAGS:= -pipe -std=c++11 \$$(CXX_OTHER_FLAGS) -c $(CXX_FLAGS) -fPIC \$$(addprefix -I, \$$(realpath $(INCLUDE_PATHS)))$${NEW_LINE}\ + $${NEW_LINE}\ + SOURCES:= \$$(shell find \$$(SOURCES_BASE_DIR)/\$$(SRCDIR) -maxdepth 1 -type f -name \"*.cpp\")$${NEW_LINE}\ + DEPENDENCIES:= \$$(patsubst \$$(SOURCES_BASE_DIR)/\$$(SRCDIR)%.cpp, \$$(OBJECTS_BASE_DIR)/\$$(SRCDIR)%.d, \$$(SOURCES))$${NEW_LINE}\ + OBJECTS:= \$$(patsubst %.d, %.o, \$$(DEPENDENCIES))$${NEW_LINE}\ + SOURCES_DIRS:= \$$(shell find \$$(SOURCES_BASE_DIR)/\$$(SRCDIR) -maxdepth 1 -mindepth 1 -type d)$${NEW_LINE}\ + OBJECTS_DIRS:= \$$(patsubst \$$(SOURCES_BASE_DIR)/\$$(SRCDIR)%, %/, \$$(SOURCES_DIRS))$${NEW_LINE}\ + OBJECTS_DIRS_MAKEFILES:= \$$(patsubst %, %makefile, \$$(OBJECTS_DIRS))$${NEW_LINE}\ + $${NEW_LINE}\ + .PHONY: all$${NEW_LINE}\ + .PRECIOUS: \$$(DEPENDECIES) \$$(OBJECTS_DIRS_MAKEFILES)$${NEW_LINE}\ + $${NEW_LINE}\ + all: \$$(OBJECTS_DIRS) \$$(OBJECTS)$${NEW_LINE}\ + $${NEW_LINE}\ + %.d: makefile$${NEW_LINE}\ + @echo \"\\$${NEW_LINE}\ + \$$(shell sha1sum <<< \"\$$@\" | sed \"s/ -//g\") = \\$$\$$(shell (\\$$\$$(CXX) -M \\$$\$$(CXXFLAGS) \$$(patsubst \$$(OBJECTS_BASE_DIR)/\$$(SRCDIR)%.d,\$$(SOURCES_BASE_DIR)/\$$(SRCDIR)%.cpp, \$$@) 2>/dev/null || echo \\\"\$$(patsubst \$$(OBJECTS_BASE_DIR)/\$$(SRCDIR)%.d,\$$(SOURCES_BASE_DIR)/\$$(SRCDIR)%.cpp, \$$@) FORCE\\\") | sed \\\"s/.*://g;s/\\\\\\\\\\\\\\\\//g\\\")\$$\$${NEW_LINE}\\$${NEW_LINE}\ + \$$(patsubst %.d,%.o, \$$@): \\$$\$$(\$$(shell sha1sum <<< \"\$$@\" | sed \"s/ -//g\")) makefile\$$\$${NEW_LINE}\\$${NEW_LINE}\ + \\$$\$$(CXX) \\$$\$$(CXXFLAGS) \\$$\$$< -o \$$(patsubst %.d,%.o, \$$@)\$$\$${NEW_LINE}\\$${NEW_LINE}\ + \" > \$$@$${NEW_LINE}\ + $${NEW_LINE}\ + %/makefile: makefile$${NEW_LINE}\ + mkdir -p \$$(dir \$$@)$${NEW_LINE}\ + cp makefile \$$@$${NEW_LINE}\ + $${NEW_LINE}\ + %/: FORCE | %/makefile$${NEW_LINE}\ + @accesstime=\`stat -c %Y \$$@\` && \\$${NEW_LINE}\ + \$$(MAKE) -C \$$@ SRCDIR=\$$(SRCDIR)\$$(notdir \$$(patsubst %/, %, \$$@))/ OBJECTS_BASE_DIR=\$$(OBJECTS_BASE_DIR) SOURCES_BASE_DIR=\$$(SOURCES_BASE_DIR) CXX_OTHER_FLAGS=\"\$$(CXX_OTHER_FLAGS)\" && \\$${NEW_LINE}\ + accesstime2=\`stat -c %Y \$$@\` && \\$${NEW_LINE}\ + if [ "\$$\$$accesstime" -ne "\$$\$$accesstime2" ]; then \\$${NEW_LINE}\ + touch .; \\$${NEW_LINE}\ + fi$${NEW_LINE}\ + $${NEW_LINE}\ + FORCE:$${NEW_LINE}\ + $${NEW_LINE}\ + -include \$$(DEPENDENCIES)" > $@ + +# ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +# final lib/bin construction + +bin-debug/$(EXECUTABLE): obj-debug/ $(OBJECTS_DEBUG) + if [ ! -w $(dir $@) ] && [ $(USE_RAMDISK) -eq 1 ]; then\ + ln -s /tmp/`date +'%s%N'`-$(dir $@) $(subst /, , $(dir $@)) 2>/dev/null;\ + fi;\ + if [ -L $(subst /, , $(dir $@)) ]; then\ + mkdir -p `readlink $(subst /, , $(dir $@))`;\ + else\ + mkdir -p $(dir $@);\ + fi + $(CXX) $(OBJECTS_DEBUG) -o $@ $(LDFLAGS_DEBUG) + +bin-release/$(EXECUTABLE): obj-release/ $(OBJECTS_RELEASE) + if [ ! -w $(dir $@) ] && [ $(USE_RAMDISK) -eq 1 ]; then\ + ln -s /tmp/`date +'%s%N'`-$(dir $@) $(subst /, , $(dir $@)) 2>/dev/null;\ + fi;\ + if [ -L $(subst /, , $(dir $@)) ]; then\ + mkdir -p `readlink $(subst /, , $(dir $@))`;\ + else\ + mkdir -p $(dir $@);\ + fi + $(CXX) $(OBJECTS_RELEASE) -o $@ $(LDFLAGS_RELEASE) + +# ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +# subdir make calls + +obj-debug/: FORCE | obj-debug/makefile + $(MAKE) -C $@ OBJECTS_BASE_DIR=$(realpath obj-debug) SOURCES_BASE_DIR=$(realpath src) CXX_OTHER_FLAGS="-g -O0 -DDEBUG" + +obj-release/: FORCE | obj-release/makefile + $(MAKE) -C $@ OBJECTS_BASE_DIR=$(realpath obj-release) SOURCES_BASE_DIR=$(realpath src) CXX_OTHER_FLAGS="-O3 -DNDEBUG -DRELEASE" + +# ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +# objects dependencies + +$(OBJECTS_DEBUG): obj-debug/ + +$(OBJECTS_RELEASE): obj-release/ + +# ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +# main targets + +debug: bin-debug/$(EXECUTABLE) + +release: bin-release/$(EXECUTABLE) + +clean: clean-debug clean-release + $(RM) -r doc + +clean-debug: + if [ -L obj-debug ]; then\ + $(RM) -r `readlink obj-debug`;\ + fi + if [ -L bin-debug ]; then\ + $(RM) -r `readlink bin-debug`;\ + fi + $(RM) -r *.o *.d bin-debug obj-debug + +clean-release: + if [ -L obj-release ]; then\ + $(RM) -r `readlink obj-release`;\ + fi + if [ -L lib-release ]; then\ + $(RM) -r `readlink lib-release`;\ + fi + $(RM) -r *.o *.d bin-release obj-release + +doc: + doxygen diff --git a/aquery2/makefile.conf b/aquery2/makefile.conf new file mode 100644 index 0000000000000000000000000000000000000000..e1369ed88f749c67a86efd451d5df6407bef4377 --- /dev/null +++ b/aquery2/makefile.conf @@ -0,0 +1,4 @@ +EXECUTABLE:=aquery2 +LINK_PATHS=../alib2elgo/ ../alib2algo_experimental/ ../alib2algo/ ../alib2data_experimental/ ../alib2data/ ../alib2common/ ../alib2std/ +LINK_LIBRARIES=alib2elgo alib2algo_experimental alib2algo alib2data_experimental alib2data alib2common alib2std xml2 +INCLUDE_PATHS=\$$(SOURCES_BASE_DIR)/../../alib2elgo/src/ \$$(SOURCES_BASE_DIR)/../../alib2algo_experimental/src/ \$$(SOURCES_BASE_DIR)/../../alib2algo/src/ \$$(SOURCES_BASE_DIR)/../../alib2data_experimental/src/ \$$(SOURCES_BASE_DIR)/../../alib2data/src/ \$$(SOURCES_BASE_DIR)/../../alib2common/src/ \$$(SOURCES_BASE_DIR)/../../alib2std/src/ /usr/include/libxml2/ diff --git a/aquery2/src/aquery.cpp b/aquery2/src/aquery.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e2ab0ff683a28a63875e5fa72210de69f0f364b3 --- /dev/null +++ b/aquery2/src/aquery.cpp @@ -0,0 +1,92 @@ +/* + * aquery.cpp + * + * Created on: 26. 3. 2014 + * Author: Jan Travnicek + */ + +#include <tclap/CmdLine.h> +#include <global/GlobalData.h> +#include <measure> +#include <vector> +#include <sax/FromXMLParserHelper.h> + +#include <factory/XmlDataFactory.hpp> +#include <exception/CommonException.h> +#include <string/String.h> + +#include <stringology/query/SuffixTrieFactors.h> + +int main ( int argc, char * argv[] ) { + try { + common::GlobalData::argc = argc; + common::GlobalData::argv = argv; + + TCLAP::CmdLine cmd ( "Stringology algorithm access binary", ' ', "0.01" ); + + std::vector < std::string > allowed; + allowed.push_back ( "suffixTrieFactors" ); + TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); + + TCLAP::ValueArg < std::string > query ( "q", "query", "Query index", false, "exactFactorMatch", & allowedVals ); + cmd.add ( query ); + + TCLAP::ValueArg <std::string > indexInput ( "i", "index", "Index to query", false, "-", "file"); + cmd.add( indexInput ); + + TCLAP::MultiArg < std::string > patternInput ( "p", "pattern", "Pattern object from file", false, "file" ); + cmd.add ( patternInput ); + + TCLAP::SwitchArg measure ( "m", "measure", "Measure times", false ); + cmd.add ( measure ); + + TCLAP::SwitchArg verbose ( "v", "verbose", "Be verbose", false ); + cmd.add ( verbose ); + + cmd.parse ( argc, argv ); + + if(verbose.isSet()) + common::GlobalData::verbose = true; + if(measure.isSet()) + common::GlobalData::measure = true; + + measurements::start ( "Overal", measurements::Type::OVERALL ); + measurements::start ( "Input read", measurements::Type::AUXILIARY ); + + if ( query.getValue ( ) == "suffixTrieFactors" ) { + indexes::SuffixTrie < DefaultSymbolType, unsigned > suffixTrie = alib::XmlDataFactory::fromTokens < indexes::SuffixTrie < DefaultSymbolType, unsigned > > ( sax::FromXMLParserHelper::parseInput ( indexInput ) ); + string::String pattern = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) ); + + measurements::end ( ); + measurements::start ( "Algorithm", measurements::Type::MAIN ); + + std::set < unsigned > res = stringology::query::SuffixTrieFactors::query ( suffixTrie, pattern ); + + measurements::end ( ); + measurements::start ( "Output write", measurements::Type::AUXILIARY ); + + alib::XmlDataFactory::toStdout ( res ); + } else { + throw exception::CommonException ( "Invalid algorithm" ); + } + + measurements::end ( ); + measurements::end ( ); + + if ( measure.getValue ( ) ) std::cmeasure << measurements::results ( ) << std::endl; + + return 0; + } catch ( const exception::CommonException & exception ) { + alib::XmlDataFactory::toStdout ( exception ); + return 1; + } catch ( const TCLAP::ArgException & exception ) { + std::cout << exception.error ( ) << std::endl; + return 2; + } catch ( const std::exception & exception ) { + std::cerr << "Exception caught: " << exception.what ( ) << std::endl; + return 3; + } catch ( ... ) { + std::cerr << "Unknown exception caught." << std::endl; + return 127; + } +} diff --git a/makefile b/makefile index 4fef7f1c75406ad6d2d4d186ed8f0da98aec5bdc..49d901ddfa2cdea33f5821274e04ba1afbdf4fbe 100644 --- a/makefile +++ b/makefile @@ -51,6 +51,7 @@ SUBDIRS_BINS = aecho2 \ astat2 \ aaccess2 \ astringology2 \ + aquery2 \ atrim2 \ tniceprint \