-
Jan Trávníček authoredJan Trávníček authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
userGuide.tex 46.64 KiB
%%% DOCUMENTCLASS
%%%-------------------------------------------------------------------------------
\documentclass[
a4paper, % Stock and paper size.
11pt, % Type size.
% article,
% oneside,
onecolumn, % Only one column of text on a page.
% openright, % Each chapter will start on a recto page.
% openleft, % Each chapter will start on a verso page.
openany, % A chapter may start on either a recto or verso page.
]{memoir}
%%% PACKAGES
%%%------------------------------------------------------------------------------
\usepackage[utf8]{inputenc} % If utf8 encoding
% \usepackage[lantin1]{inputenc} % If not utf8 encoding, then this is probably the way to go
\usepackage[T1]{fontenc} %
\usepackage[english]{babel} % English please
\usepackage[final]{microtype} % Less badboxes
% \usepackage{kpfonts} %Font
\usepackage{amsmath,amssymb,mathtools} % Math
% \usepackage{tikz} % Figures
\usepackage{graphicx} % Include figures
%%% PAGE LAYOUT
%%%------------------------------------------------------------------------------
\setlrmarginsandblock{0.15\paperwidth}{*}{1} % Left and right margin
\setulmarginsandblock{0.2\paperwidth}{*}{1} % Upper and lower margin
\checkandfixthelayout
%%% SECTIONAL DIVISIONS
%%%------------------------------------------------------------------------------
\maxsecnumdepth{subsection} % Subsections (and higher) are numbered
\setsecnumdepth{subsection}
\makeatletter %
\makechapterstyle{standard}{
\setlength{\beforechapskip}{0\baselineskip}
\setlength{\midchapskip}{1\baselineskip}
\setlength{\afterchapskip}{8\baselineskip}
\renewcommand{\chapterheadstart}{\vspace*{\beforechapskip}}
\renewcommand{\chapnamefont}{\centering\normalfont\Large}
\renewcommand{\printchaptername}{\chapnamefont \@chapapp}
\renewcommand{\chapternamenum}{\space}
\renewcommand{\chapnumfont}{\normalfont\Large}
\renewcommand{\printchapternum}{\chapnumfont \thechapter}
\renewcommand{\afterchapternum}{\par\nobreak\vskip \midchapskip}
\renewcommand{\printchapternonum}{\vspace*{\midchapskip}\vspace*{5mm}}
\renewcommand{\chaptitlefont}{\centering\bfseries\LARGE}
\renewcommand{\printchaptertitle}[1]{\chaptitlefont ##1}
\renewcommand{\afterchaptertitle}{\par\nobreak\vskip \afterchapskip}
}
\makeatother
\chapterstyle{standard}
\setsecheadstyle{\normalfont\large\bfseries}
\setsubsecheadstyle{\normalfont\normalsize\bfseries}
\setparaheadstyle{\normalfont\normalsize\bfseries}
\setparaindent{0pt}\setafterparaskip{0pt}
%%% FLOATS AND CAPTIONS
%%%------------------------------------------------------------------------------
\makeatletter % You do not need to write [htpb] all the time
\renewcommand\fps@figure{htbp} %
\renewcommand\fps@table{htbp} %
\makeatother %
\captiondelim{\space } % A space between caption name and text
\captionnamefont{\small\bfseries} % Font of the caption name
\captiontitlefont{\small\normalfont} % Font of the caption text
\changecaptionwidth % Change the width of the caption
\captionwidth{1\textwidth} %
%%% ABSTRACT
%%%------------------------------------------------------------------------------
\renewcommand{\abstractnamefont}{\normalfont\small\bfseries} % Font of abstract title
\setlength{\absleftindent}{0.1\textwidth} % Width of abstract
\setlength{\absrightindent}{\absleftindent}
%%% HEADER AND FOOTER
%%%------------------------------------------------------------------------------
\makepagestyle{standard} % Make standard pagestyle
\makeatletter % Define standard pagestyle
\makeevenfoot{standard}{}{}{} %
\makeoddfoot{standard}{}{}{} %
\makeevenhead{standard}{\bfseries\thepage\normalfont\qquad\small\leftmark}{}{}
\makeoddhead{standard}{}{}{\small\rightmark\qquad\bfseries\thepage}
% \makeheadrule{standard}{\textwidth}{\normalrulethickness}
\makeatother %
\makeatletter
\makepsmarks{standard}{
\createmark{chapter}{both}{shownumber}{\@chapapp\ }{ \quad }
\createmark{section}{right}{shownumber}{}{ \quad }
\createplainmark{toc}{both}{\contentsname}
\createplainmark{lof}{both}{\listfigurename}
\createplainmark{lot}{both}{\listtablename}
\createplainmark{bib}{both}{\bibname}
\createplainmark{index}{both}{\indexname}
\createplainmark{glossary}{both}{\glossaryname}
}
\makeatother %
\makepagestyle{chap} % Make new chapter pagestyle
\makeatletter
\makeevenfoot{chap}{}{\small\bfseries\thepage}{} % Define new chapter pagestyle
\makeoddfoot{chap}{}{\small\bfseries\thepage}{} %
\makeevenhead{chap}{}{}{} %
\makeoddhead{chap}{}{}{} %
% \makeheadrule{chap}{\textwidth}{\normalrulethickness}
\makeatother
\nouppercaseheads
\pagestyle{standard} % Choosing pagestyle and chapter pagestyle
\aliaspagestyle{chapter}{chap} %
%%% NEW COMMANDS
%%%------------------------------------------------------------------------------
\newcommand{\p}{\partial} %Partial
% Or what ever you want
%%% TABLE OF CONTENTS
%%%------------------------------------------------------------------------------
\maxtocdepth{subsection} % Only parts, chapters and sections in the table of contents
\settocdepth{subsection}
\AtEndDocument{\addtocontents{toc}{\par}} % Add a \par to the end of the TOC
%%% INTERNAL HYPERLINKS
%%%-------------------------------------------------------------------------------
\usepackage{hyperref} % Internal hyperlinks
\hypersetup{
pdfborder={0 0 0}, % No borders around internal hyperlinks
pdfauthor={I am the Author} % author
}
\usepackage{memhfixc} %
\usepackage{lipsum} % Just to put in some text
\usepackage{xcolor}
\usepackage{listings}
\lstset{
tabsize=4,
language=C++,
captionpos=b,
tabsize=3,
frame=lines,
numbers=left,
numberstyle=\tiny,
numbersep=5pt,
breaklines=true,
showstringspaces=false,
basicstyle=\footnotesize,
keywordstyle=\color[rgb]{0,0,1},
commentstyle=\color{Darkgreen},
stringstyle=\color{red},
inputencoding=utf8
}
%%% THE DOCUMENT
%%% Where all the important stuff is included!
%%%-------------------------------------------------------------------------------
\author{J. Travnicek}
\title{The algorithms library toolkit}
\begin{document}
\frontmatter
\maketitle
\begin{abstract}
The algorithms library toolkit is a collection of datatypes and algorithms. The datatypes cover various kinds of automata, grammars, regexps, trees, tree regular expressions, strings, and some indexing structures. Algorithms cover manipulation and conversion algorithms of automata, grammar, regexp, tree regular expressions, some basic string and tree index creation algorithms, and tree and string matching algorithms. The implementation is in c++14 standard and heavily using templates. The toolkit comes with a command line interface and with still evolving graphical interface.
\end{abstract}
\clearpage
\tableofcontents*
\clearpage
\chapter{Introduction}
The Algorithms Library Toolkit is a opensource project aiming to provide an implementation of algorithms from areas including automata theory, stringology, arbology and others. The project started with bachelor theses of students of Faculty of Information Technology of Czech Technical University in Prague. The original idea behind the library, formely called Automata Library, is to provided another source material for students -- show implemetation of algorithms that manipulate automata. Soon it got extended with algorithms that use automata -- mainly from the area of stringology and arbology. Later, the library was used to base implementation of some state-of-the-art algorthms including efficient determinisation of subclass pushdown automata and some tree searching and indexing algorithms.
The library is desinged to be mostly about algorithms which manipulate data and are as such stateles. Data manipulated by algorithms can vary from automata through regexps to string and others. The design allows simple extension of functionality. If a new algorithm manipulates already existing datatype, it immediately fits into the ecosystem. Of course new datatype can be introduced as well. Datatypes and algorithms are designed to be maximaly independent on each other, which allows easier maitanence.
Besides using algorithms to manipulate data, the datatypes allow casting when appropriate, i.e. DFA to NFA and similar.
The user interaction with the library is through binaries using pipes and filters philosophy, through builtin interactive command line interface, or through graphical interface.
The binaries are oldest approach of user interface suported by the library and as they are designed as filters, they use some common communication format -- xml. The binaries and command line interface providing binary are compatible, meaning the command line interface providing binary can itself be used as one of the filters. Also following the pipes and filters philosophy, each binary is accumulating some related operations and combining more binaries together allows to implement more complicated algorithms.
The command line interface binary is newer in design and it allows interaction with all algorithms through interactove command line interface. The syntax of the language similar to shell known from unix. Internally it does not use any common communication format. Such approach allows speeding up the interpretation of the algorithm description. Even though the command line interface tries to hide some implementation details, user can feel the fact that the imlementation language of the library is c++ in some cases. For example, the command line interface is aware of datatypes and it is also unable to handle templates (used to design datatypes and algorithms) othervise than statically. This however does not influence the overall functionality.
The command line interface binary can at start react to changes of algorithms provided by the library -- it detects newly registered algorithms automatically. The command line interface is planed to be extended to support procedural like language which may cause drop of backward compatibility.
A limited graphical interface allowing to desing complex algorithm is also provided. The limitation is in the number of available algorithms, which is a static subset of all algorithms. The graphical interface is planed to be extended to support all registered algorithm similarly to the command line interface.
The toolkit is extensible and as long as the added datatype or algorithm connects itself to already exitent code via cast or conversion algorithm, the extension can benefit from features already implemented.
\mainmatter
\chapter{Structure of the code}
The library consists of modules compiled to libraries and some accessors represented by binaries. Modules provide the functionality - either some core code, datatypes, or algorithms. Accessors are usually programmed as a single file with main function. The accessor binary allows interaction with implemented algorithms from bash, or other shell like environment. One more complex accessor binary \emph{aql} exists. This aql accessor itself provides command line interface to all algorithms.
\section{Structure and overall description of modules}
Each module can be compiled separately. It shares a common makefile, which is parametrized with a configuration. Source files of the module are placed into \emph{src} directory. Modules are tested with appended unit tests. Unit tests are placed in a separate directory \emph{test-src}.
When compiled the modules object files are, depending on the target type, placed into \emph{obj-debug} or \emph{obj-release} directory. Resulting shared library is placed into \emph{lib-debug} or \emph{lib-release}. Similarly object files of unit tests are placed into \emph{obj-test-debug} or \emph{obj-test-release}. An executable binary representing unit tests can be located in \emph{tet-bin-debug} or \emph{test-bin-release} directory.
Compilation of each module requires its dependencies to be compiled as well. One can deside whether to compile only the module itself, only unit tests, or both. When both the code and tests are compiled, the tests are also executed. The respective targets are \emph{build-code-debug} (\emph{build-code-releae}), \emph{build-test-debug} (\emph{build-test-release}), or simply \emph{debug} (\emph{release}).
Compilation may be tuned with some parameters in configuration file. Library name can be specified with LIBRARY parameter, the name of the test binary with TESTBIN. Dependencies split into those of the module and some additional ones required by unit tests. Of those the compilation environment also distinguish other linked modules (LINK\_LIBRARIES) and system libraries which require to be linked to the shared library of the module (SYSTEM\_LIBRARIES). Include paths can be also specified for system libraries with SYSTEM\_INCLUDE\_PATHS. Prefix TEST\_ is used for both types of linked libraries and specification of includes for dependencies of unit tests.
\section{Existing modules}
The algorithm library toolkit consists of three types of modules: core, feature, experimental.
The core modules include: alib2std (c++ standard library extensions), alib2measure (support for measurements), alib2abstraction (storage of registered datatypes and algorithms, alib2common (base datatypes), alib2xml (xml export and import of basic datatypes), and alib2cli (command line interface implementation).
The feature modules include: alib2data (automata, grammar, regexps, and other datatypes), alib2aux (helper algorithms), alib2str (parsing and composing a string representation of some datatypes), alib2raw (parsing and composing of raw representation of some datatypes), alib2algo (most of the algorithms).
The experimental modules available mainly for testing purposes: alib2data\_experimental (experimental datatypes), alib2algo\_experimental (experimental algorithms), alib2elgo (more efficient implementation of some algorithms), alib2graph\_data (graph datatypes), alib2graph\_algo (graph algorithms), and alib2dummy (playground library).
\subsection{Standard library extensiton}
The module alib2std provides extensions to the c++ standard library. The extensition are placed in namespace \emph{ext}, not to colide with the same classes, functions ... in namespace \emph{std} and also because the standard disallowes placing anything new into the std namespace exept specilisations of templated types.
This module exists to simplify some common operations with standard library containers, to serve as a place for backported code from newer standards then used currently. To name a few exaples now consider three-way comparison of standard library containers, print of containers to standard stream with overloaded operator, implementation of copy on write shared pointer, and some standard library containers modified to store array of pointers instead of array of values, while providing almost the same interface.
To use these extended features use prepared includes. For example to use extensitions of standard vector include \emph{alib2/vector}.
\subsection{alib2measure}
The measure module contains datastructures needed for measurements. Execution of code can be measured usign this module when include \emph{measure} is used. Mmasurements are represented by frames where each frame can contain subframes. Each frame rememberes an ammount of time spend within the frame (not including the subframes), difference in memory usage caused by dynamic allocations and deallocations, and value general purpose counters.
\subsection{alib2abstraction}
Abstraction is one of the most fundamental concepts of the library. Implementation of it is generalized to a separate module to allow reuse if needed. This module greatly co-operates with alib2cli module.
Every algorithm, cast operation, and datatype implemented in the algorithm library toolkit is so called registered to the internal structures of the abstraction module. The registration allows later execution of each algorithm from the command line iterface.
The registration is achieved through construction of global variables placed to unnamed namespaces or via a function call.
The abstraction itself is universaly represented by classes \emph{OperationAbstraction} and \emph{ValueProvider}. OperationAbstraction classes can be interliked together in representation of algorithm (or any other entity) and parameter.
\subsection{alib2common}
Basic datastructures including representation of primitive datatypes, standard containers, exceptions, and common Object wrapper of any datatype in the library's type hierarchy is implemented in this module.
Some core functionality like visitor pattern support classes, components classes, and stack trace printing handler of segmentation fault is implemented here as well.
\subsection{alib2xml}
Xml is one of the most common communication format nowadays. This module provides base for xml parsing and composing also with actual parsing and composing callbacks for basic datatypes from alib2common module.
This module also provides some registration facility similar to the ones in the abstraction module. Xml composing and parsing (including parsing of raw set of given type) is registered here.
\subsection{alib2data}
Nontrivial datatypes are implemented in this module. The state of implementation is historically like so. The module is a subject of change in later versions of the library, so that each group of datatypes is separated into standalone module. So far everything is in one module.
Datatypes present include some support types like labels and alphabet types, some main datatypes including variants of strings, trees, automata, grammars, regeps, regular tree expressions, and some string or tree indexes.
This module also contains code extending the xml module, hence all datatypes from the module are registered within the xml registration structures.
\subsection{alib2aux}
Auxiliary or helper functions of comparing automata, grammars, or string, and conversion of automata to dot, gastex, latex, or tikz are located in this module. These helper functions are registered as algorithms into the abstraction module.
\subsection{alib2str}
Similar to the alib2xml, datatypes can be converted to and from strings. Such string representation is currently designed for finite automata (DFA, NFA, EpsilonNFA and NFA with multiple initial states), grammars (Regular and Context-Free), regular expressions, strings, and trees.
There is also a registration facility present in the module in order to allow interfacing command line interface.
\subsection{alib2raw}
Some datatypes like strings and trees correspond to some basic file formats. String correspons to any file, tree to xml. This module allows interaction between mentioned datatypes and files of the given format - either reading the file and creating the representation of data or creating file based on some provided data.
\subsection{alib2algo}
Main module where algorithms are implemented. Right now most of algorithms are present in this module. Algorithms include automata, grammar, regexp operation and conversions. String and tree matching and indexing. Some random data generators are implemented for trees and automata.
Most of algorithms are also registered so that command line interface can call those. Those not registered ones are either support algorithms, where it does not make much sence or algorithms accepting something not yet supported on the command line interface level (like nontrivial function callback).
\subsection{alib2cli}
The module responsible for parsing of the command line commands. The module implements simple LL1 grammar-based parser (and lexer) as recursive decent parser producing an internal representtion of the command as abstract syntax tree. The representation can be converted to graph consisting of abstractions over algorithm, file reads and writes, etc.
The language is limited. It supports some introspection into registered algorithm, their overloads, known datatypes, and casts. Execution of colon of commands is line-by-line. Each line can be quite complex, but there is no procedural extension, except variables.
The language is described in more details later.
\chapter{Concepts used in the implementation}
\section{Components}
Many data structures are defined as a n-tuple where the components are of different types and have some defined constraints. The components concept is present to aid with design of data structures having this exact definition. A components concept is still in a stage of proof of concept, it does not support different types than sets and values. When extended, it will support maps, vectors, and trees to allow complete definition of most of data structures.
\subsection{Component as a base of datastructure}
To compose datatype using components requires to derive your datatype from a templated class \emph{code::Components}. The \emph{code::Components} class uses the \emph{Curiously recurring template pattern} in its first template parameter. Next parameters come three at a time. The first of the triplet is defining the internal datatype of the component, the second is the component behavior scheme, and the third is a name (or names) of the component.
\begin{lstlisting}
template < class SymbolType, class StateType >
class DFA final : public AutomatonBase, public core::Components < DFA < SymbolType, StateType >, ext::set < SymbolType >, component::Set, InputAlphabet, ext::set < StateType >, component::Set, std::tuple < States, FinalStates >, StateType, component::Value, InitialState >;
\end{lstlisting}
In this example the deterministic finite automaton is constructed from set of symbols represented by InputAlphabet component, two sets of states represented by States and FinalStates components, and a state represented by InitialState component.
The internal type of the component must provide common interface required by the component behavior scheme. So far supported schemes are \emph{component::Set} and \emph{component::Value}. The component set scheme expects the internal datatype to implement interface of a set from the standard library. The componet value scheme expects the internal datatype to behave like primitive type.
The name of the component can be specified in two ways. Either it can be specified by class name, where typically the class used here is incomplete. Or more names can be given at once to signal there are more components having the same structure and behavior in the datatype definition. If more names are provided, the resulting components are instanciated for each name in a set and they are therefore independent instances.
\subsection{Access to component content}
Depending on the behavior scheme of the component, each component provides some simplified inteface to the underlying data type. The common operations of all components are \emph{get} and \emph{set}, to get the data and set the data as a whole.
The \emph{component::Set} scheme additionally supports \emph{empty}, \emph{remove}, and \emph{add}, where these are equivalent to standard set operations \emph{empty}, \emph{erase}, and \emph{insert}.
\subsection{Constraint specification}
To maintain consistency of datatypes constructed from components, some constraints need to be specified. These constraints are specified by additional code inside a class specialized with the concrete data type and component name. Given the component behavior scheme the constraints may differ.
The \emph{component::Value} behavior scheme requires existence of available and valid static methods inside the \emph{ElementContraint} class. The method available represents check that the value to be set is available in other related components. The method valid represents additional check for validity of the setted object, if needed the method is supposed to throw an exception.
\begin{lstlisting}
template<class SymbolType, class StateType >
class ElementConstraint< automaton::DFA<SymbolType, StateType>, StateType, automaton::InitialState > {
public:
static bool available ( const automaton::DFA<SymbolType, StateType> & automaton, const StateType & state ) {
return automaton.getStates ( ).count ( state );
}
static void valid ( const automaton::DFA<SymbolType, StateType> &, const StateType & ) {
}
};
\end{lstlisting}
The \emph{component::Set} behavior scheme requires existence of used, available, and valid static methods inside the \emph{SetContraint} class. The methods available and valid are the same as in case of the \emph{component::Value} behaviour sheme. The additional method used is representing check whether removed object from the set component is used in other related components.
\begin{lstlisting}
template<class SymbolType, class StateType >
class SetConstraint< automaton::DFA<SymbolType, StateType>, StateType, automaton::FinalStates > {
public:
static bool used ( const automaton::DFA<SymbolType, StateType> &, const StateType & ) {
return false;
}
static bool available ( const automaton::DFA<SymbolType, StateType> & automaton, const StateType & state ) {
return automaton.getStates ( ).count ( state );
}
static void valid ( const automaton::DFA<SymbolType, StateType> &, const StateType & ) {
}
};
\end{lstlisting}
\section{Abstraction}
As mentioned the alib2abstraction module provides a facility to register algorithms, casts, datatypes, etc... to its internal structures for later execution on demand from command line interface. Here in this section more detailed description of use and internal behavior of the abstraction concept is provided.
First the documentation focuses on the overview of the concept, next the registration of algorithms, later casts and variables printing and other posibilities.
\subsection{Abstraction concept overview}
To provide an on demand execution of algorithm based on the algorithm name and algorithm parameters, a lookup within available algorithm must be possible. The c++ language does not provide any form of introspection that would allow detection of existence of methods within a class. Detection of number of parameters, their types, qualifications of a given method is not available in the c++ language as well.
The abstraction concept is designed to address this limitation of c++ language with registering avaiable algorithms, casts, etc. internally, via some registration calls, in order to retrieve registered callable.
There are two approaches to registration in the abstraction module. Variables of registraction class type, when constructed, carry on information about the registered algorithm to inside of the abstraction module via a call of registration function. Such an approach was chosen to easily hook some code before the execution of main function, so that all registrations are done beforehand at a load time of shared library. Registration function also be called directly from any context to avoid the creation of global variable. Hence any algorithm can be registered at any time before or during the execution of main function.
\subsection{Registration of algorithms}
The abstraction is in general mostly about algorithms and their execution. The algorithm to execute is specified by its name and parameters (also with category which is however unused now). Then list of overloads of the algorithm is selected from registered ones, the set is filtered based on number of actual parameters, parameter types, and the best candidate is selected and returned for execution. Efectively the overload resolution implemeted within the abstraction concept is capable multiple dispatch.
Such a selection should also take into account casts between actual parameters and formal parameters of individual overloads to establish the quality of overloads. Qualifiers such as const, ... and parameter passing by r-value reference, l-value reference, or by value also limits viable overloads to investigate further.
The abstraction is not fully implemented yet but it tries to be as close to c++ language behavior as posible. Current implementation does not fully implement the selection algorithm but in final implementation the selection should mimic the c++ function overload selection scheme. So far if the selection finds two or more viable overloads, error is reported, even though best of those overloads should be selected.
In the registration the algorithm name is provided via template parameter. Hence the algorithm name is derived from an existing type. The type can be either a class, in that case the algorithm name is the class name and the algorithm itself is categorized by possible namespaces. The class can be itself templated, in which case, the templates are also registered as part of the name. During the execution of the algorithm these templates must be specified allong with the algorithm name in order to sucessfuly find it.
\subsubsection{Registration of algorithms via constructors}
The registration of algorithms is performed via \emph{registration::AbstractRegister} class. The class is templated with types Algorithm, ReturnType, and by means of variadic template also ParameterTypes. The Algorithm type will serve as a source of identifier naming the algorithm. ReturnType and ParameterTypes are exact types of returned value and parameters including const qualifications, reference specifiers, etc. Constructor of the class accepts a function pointer, optionaly algorithm category (unused yet), and optionaly strings representing parameter names (up to the number of parameters specified by templates). Parameter names default to arg0 to argn.
The function pointer must exactly match the requirements set up by ReturnType and ParameterTypes.
\begin{lstlisting}
auto RegExpEmptyFormalRegExp = registration::AbstractRegister < regexp::properties::RegExpEmpty, bool, const regexp::FormalRegExp < > & > ( regexp::properties::RegExpEmpty::languageIsEmpty );
\end{lstlisting}
Already registered algorithm, cast, printer of datatype can be re-registered as an algorithm. Such a registration is available via \emph{registration::WrapperRegister} class. The class is templated with types Algorithm and ParameterTypes with the same meaning as in case of AbstractRegister. The class constructor accepts a function pointer to a delegate function capable of looking up the actual registered algorthm, cast, ... Also paramter names can be specified as well.
\begin{lstlisting}
auto xmlParse = registration::WrapperRegister < xml::Parse, ext::deque < sax::Token > && > ( xml::Parse::abstractionFromTokens, "arg0" );
\end{lstlisting}
Last option is register a method. The class allowing this kind of registration is \emph{registration::MethodRegister}. It is parametrized again with types Algorithm, ReturnType, additionally ObjectType, and again ParameterTypes. The ObjectType is a decayed type of object on which to call the registered method. Constructor of the method registration class accepts pointer to method, string representing the method name, and optionaly names of parameters.
\begin{lstlisting}
auto fooBar = registration::MethodRegister < Foo, int, Foo, int > ( & Foo::bar, "bar" );
\end{lstlisting}
The registration::AbstractRegister and registration::MethodRegister also register the exact return type for a normalization automatically.
\subsubsection{Registration of algorithms via function call}
The basic registration is of a callback to the algorithm. The registration is equivalent to use of registration::AbstractRegister class. The call is to a function \emph{abstraction::AlgorithmRegistry::registerAlgorithm} which is parametrized with following types: Algorithm, ReturnType and ParameterTypes. As parameters the registration accepts the callback of the algorithm, the category and parameter names. The same requirements for the callback as specified for the registration via constructor apply. The parameter names are accepted in a form of standard c++ array of strings of size equal to the number of parameters.
\begin{lstlisting}
abstraction::AlgorithmRegistry::registerAlgorithm < Divide > ( Divide::divide, abstraction::AlgorithmCategories::AlgorithmCategory::DEFAULT, std::array < std::string, 2 > ( "numerator", "denominator" ) );
\end{lstlisting}
A wrapper of alredy registered abstraction can be registered as algorithm via function call too. The function is \emph{abstraction::AlgorithmRegistry::registerWrapper} and it is templated by Algorithm and ParameterTypes. The call accepts a callback to the same delegate function as in the case of registration via constructor. Parameter names must be specified by a string array of size equal to the number of parameters (ParameterTypes template).
\begin{lstlisting}
abstraction::AlgorithmRegistry::registerWrapper < xml::Compose, const Type \& > ( xml::Compose::abstractionFromType, std::array < std::string, 1 > { { "arg0" } } );
\end{lstlisting}
A method can be registered via \emph{abstraction::AlgorithmRegistry::registerMethod} function. The template parameters are Algorithm, ObjectType, ReturnType, and ParameterTypes. The call parameters are a method pointer, a method name, and parameter names. Note that method effectively needs extra parameter which is the object call the method on.
\begin{lstlisting}
abstraction::AlgorithmRegistry::registerMethod < ComponentName > ( getMethod, "get", emptyNames );
\end{lstlisting}
Registration of functions does not register for normalization.
\subsection{Registration of casts}
Full featured selection of algorithm to call, as implemented in c++ overload resolution algorithm, uses information about casts. Abstraction can keep track of casts available for such a purpose. The cast can also be executed manually when algorithm overload resolution would report multiple call candidates.
The cast is in general a conversion for one type to another. Again there are two possible registration approaches either via creation of a global variable or via a function call. The cast can later be referenced by means of the string representing the target type.
\subsubsection{Registration of casts via constructor}
Registration of cast can be execute via creation of \emph{registraion::CastRegister} class. Template parameters To and From of the class specify the two types participating in the cast. The class can be constructed either using the default constructor or with a constructor accepting a so called cast function as a pointer to function. The cast function must take a single argument of constant reference type identified by From template parameter and return the type specified by To template parameter.
Both constructors actually use the registration via function call internally again and both approaches do register the cast target type for normalization.
The default constructor requires an expression using standard c-like cast From and To the participating types to be valid.
\subsubsection{Registration of casts via function call}
Same information about types participating in the cast must be provided to the registration via function call. There are again overloads allowing either cast by c-like syntax or via a casting function. All overloads need to be templated with the To and From types.
The casting function must produce the To type of the cast. There are however two overloads of registration of cast function. One for parameter being a const reference to From type and second for From type being a value. This gives in total three overloads of registrator functions.
Since the target type is the source of a name used to access the cast operation and the target type name may not be appropriate to use, all three overloads also allow specification of target and source type names.
\subsection{Component registration}
All datatypes are construced using unified representation of its internals. These datatype internals can also be registered as an algorithm. The registration is possible either by variable construction. The type of the variable is \emph{ComponentRegister} and it is parametrized with the datatype which components are to be registered. Internally the datatype's function \emph{registerComponent} is called. The call to this function can serve as an equivalent to function call registration.
\subsection{Normalization registration}
For the purpose of normalisation of results of any operation, the datatype can be registered for normalisation. The need for normalisation was discussed in section~\ref{section:normalisation}.
\subsubsection{Registration via constructor}
The normalisation registration is achieved by constructing \emph{NormalizationRegister} class. The class conditionally calls registration funcion if the registration is actually needed. The need of registration is computed by examining the result of normalisation function for the datatype parameter. If they differ, normalisation is needed.
\subsubsection{Registration via function call}
The registration function call \emph{NormalizeRegistry::registerNormalize} can be used directly. The function is templated with the normalised datatype. Optinally additional argument specifying the name of the datatype to normalise can be provided. The name however is not used from the cli directly, therefore this option has very limited external use.
\subsection{Container construction registration}
Immediate values of primitive types are internally supported, however containers need some additional care builtin to registration framework. Containers are specified by their type and value. So far construction of sets is implemented. Such registration is possible again via constructor or via function call.
\subsubsection{Registration via constructor}
Using class \emph{SetRegister} templated with the resulting set template parameter. The constructor internally calls the registration function.
\subsubsection{Registration via function call}
Static function \emph{ContainerRegistry::registerSet} templated with the resulting set template parameter internally registeres set of given type to be possible to construct.
\subsection{Value printing registration}
In order for the results of algorithms to be easily printed later in the command line interface, the datatype needs to be registered for printing. An overload of \emph{operator <<} allowing the datatype to be printed to the output stream needs to be present for successful registration.
\subsubsection{Registration via constructor}
A variable of type \emph{ValuePrinterRegister} can be used to register a type provided by template parameter for printing. Internally the constructor calls registration function.
\subsubsection{Registration via function call}
A function \emph{ValuePrinterRegistry::registerValuePrinter} parametrized with type to print can be called directly to register the type for printing as well.
\section{Normalisation}
\label{section:normalisation}
Use of templates in the implementation requires the compiler to instantiate the templated code before it can be used in the executable. This mainly affects algorithms since they are processing templated datatypes. Internal use of algorithms implemented in the library can benefit from templates, the compiler can instantiate the algorithm template while preparing the executable. However, to be able to use templated algorithms in statically prepared environment, templates need to instantiated for some normalized types.
All datatypes can be normalized by means of specialisation of a templated class \emph{core::normalize}. The class must provide a public static method eval accepting a templated datatype it normalizes and it provides the same datatype with predefined, fixed template types.
\begin{lstlisting}
template < class SymbolType, class StateType >
struct normalize < automaton::CompactNFA < SymbolType, StateType > > {
static automaton::CompactNFA < > eval ( automaton::CompactNFA < SymbolType, StateType > && value ) {
ext::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( value ).getInputAlphabet ( ) );
ext::set < DefaultStateType > states = automaton::AutomatonNormalize::normalizeStates ( std::move ( value ).getStates ( ) );
DefaultStateType initialState = automaton::AutomatonNormalize::normalizeState ( std::move ( value ).getInitialState ( ) );
ext::set < DefaultStateType > finalStates = automaton::AutomatonNormalize::normalizeStates ( std::move ( value ).getFinalStates ( ) );
automaton::CompactNFA < > res ( std::move ( states ), std::move ( alphabet ), std::move ( initialState ), std::move ( finalStates ) );
for ( std::pair < ext::pair < StateType, ext::vector < SymbolType > >, ext::set < StateType > > && transition : ext::make\_mover ( std::move ( value ).getTransitions ( ) ) ) {
DefaultStateType from = automaton::AutomatonNormalize::normalizeState ( std::move ( transition.first.first ) );
ext::vector < DefaultSymbolType > input = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( transition.first.second ) );
ext::set < DefaultStateType > targets = automaton::AutomatonNormalize::normalizeStates ( std::move ( transition.second ) );
res.addTransitions ( std::move ( from ), std::move ( input ), std::move ( targets ) );
}
return res;
}
};
\end{lstlisting}
Many normalization helpers are prepared for symbols, ranked symbols, states, their sets, etc.
The normalization class is used after any operation registered via abstraction that produces other than normalized type.
The library in its compiled version has all algorithms prepared for the normalized types by default.
\chapter{The query language}
This chapter will mostly focus on describing current status of the command line interface so called aql. The command line interface was creted to replace the interface represented by multiple binaries. The command line interface uses the abstraction module and registered algorithms. The algorithms are available to be called with appripriate arguments. The language of the command line interface is similar to bash, featuring subqueries and pipes.
\section{Language description}
The language is described by syntax similar to EBNF. The grammar's initial symbol is parse. The terminal symbols are in uppercase, nonterminals in lowercase. Terminal symbols starting with KW\_ prefix represent nonreserved keywords of value what is after the underscore. Integer is a sequence of digits, identifier is letter followed by letters and digits and string is double quoted character sequence. End is a special terminal symbol representing the end of the input.
\begin{lstlisting}
arg
: HASH_SIGN ( INTEGER | IDENTIFIER )
| IDENTIFIER
;
optional_arg
: arg
|
;
template_arg
: AT_SIGN arg
;
in_redirect_file
: ( LEFT_BRACKET arg RIGHT_BRACKET )? ( COLON_SIGN ( INTEGER | IDENTIFIER ) )? template_arg* arg
;
in_redirect
: LEFT_PAREN statement_list RIGHT_PAREN
| in_redirect_file
;
common
: DOLAR_SIGN arg
| IN_REDIRECT in_redirect
| STRING
| INTEGER
| HASH_SIGN ( INTEGER | IDENTIFIER )
| LEFT_BRACE ( COLON_SIGN ( INTEGER | IDENTIFIER ) )? ( CARET_SIGN? param ) * RIGHT_BRACE
;
param
: common
| DASH_SIGN
| IDENTIFIER
| LEFT_PAREN arg RIGHT_PAREN CARET_SIGN? param
;
statement
: common ( );
| IDENTIFIER template_arg ( COLON_SIGN ( INTEGER | IDENTIFIER ) )? ( CARET_SIGN? param )*
| LEFT_PAREN arg RIGHT_PAREN CARET_SIGN? move_arg statement
;
statement_list
: statement ( PIPE_SIGN statement )*
;
out_redirect_file
: ( LEFT_BRACKET arg RIGHT_BRACKET )? arg
;
out_redirect
: DOLAR_SIGN arg
|
| out_redirect_file
;
result
| out_redirect
|
;
introspect_cast_from_to
: ( COLON_SIGN ( KW_FROM | KW_TO ) )*
;
introspect_command
: KW_ALGORITHMS optional_arg END
| KW_OVERLOADS arg template_arg* END
| KW_DATATYPES optional_arg END
| KW_CASTS introspect_cast_from_to optional_arg END
;
parse
: EXECUTE statement_list result END
| QUIT END
| HELP optional_arg END
| INTROSPECT introspect_command END
| SET ( INTEGER | IDENTIFIER ) ( INTEGER | IDENTIFIER | STRING ) END
| LOAD ( INTEGER | IDENTIFIER | STRING ) END
;
\end{lstlisting}
\subsection{Introspection}
The query language supports listing of registered algorithms, their overloads, datatypes, and casts. The introspection command is executed by introspect keyword followed by the introspected entity, i.e. algorithms, overloads, datatypes, and casts.
Algorithms introspection may use namespace names to narrow the query. The namespace name must end with fourdot. The result of the introspection is list of all algorithms (filtered by namespace if specified) one per line.
Overloads introspection requires exact algorithm name. The result is a list of signatures of available overloads, one per line. Each signature consists of the return type followed by parameter types.
Datatypes introspection allows to print datatypes that can be exported or imported as xml file.
Casts introspection prints all available casts. Each cast is represented by to type and from type. The query may be limited to casts from or to specific type
Some examples of introspection commands follow.
\begin{lstlisting}[language=bash]
introspect algorithms
introspect algorithms automaton::
introspect overloads automaton::determinize::Determinize
introspect datatypes
introspect casts
introspect casts :from int
introspect casts :to int
\end{lstlisting}
\subsection{Execute}
In\_redirect\_file is parametrized by some clauses, the argument inside the brackets is a specification of the file type, the argument after the colon is the specification of the conteint of the file, and arguments after at sign give template paramters is needed.
The file type specification are xml, string, raw, and file. The xml file type contains xml representation of some datatype. The string file type represent string representation of an automaton, grammar, regexp, or some other type (available to be listed via introspection. The raw file type is representation of unranked tree i.e. xml or linear string i.e. sequence of chars. The file file type reads the content of the file as standard string.
The string and raw types require the content specification to be set with type argument introduced by colon. The template parameter is extra information not used now.
The use of in\_redirect symbol is overloaded and allows creation of subquery.
Out\_redirect\_file knows the type of the value printed, hence only the file type specification is required. The syntax is again an identifier in brackets. Based on the specified file type the agrument to out\_redirect\_file can be limited.
Use of out\_redirect symbol is overloaded and allows to set a variable.
\section{Builtin commands}
\chapter{Summary on registered commands}
\appendix
\chapter{Examples}
\section{Interactive}
\begin{lstlisting}[language=bash]
execute cli::builtin::ReadFile ../examples2/automaton/DFA.txt | string::Parse @ automaton::Automaton ^ -
\end{lstlisting}
\section{Command line execution}
\begin{lstlisting}
./aql2 -p input=../examples2/automaton/DFA.txt -q 'execute cli::builtin::ReadFile #input# > $in' -q 'execute string::Parse @ automaton::Automaton ^ $in'
\end{lstlisting}
\noindent
\begin{lstlisting}
./aql2 -q 'execute automaton::simplify::efficient::EpsilonRemoverIncoming <#stdin | automaton::determinize::Determinize - | automaton::simplify::Trim - | automaton::simplify::Minimize - | automaton::simplify::Normalize - > #stdout'
\end{lstlisting}
\noindent
\begin{lstlisting}
echo 'a + a' | ./aql2 -q 'execute cli::builtin::ReadFile \#stdin | string::Parse @ regexp::RegExp ^ - | regexp::convert::ToAutomatonGlushkov (UnboundedRegExp) -'
\end{lstlisting}
\backmatter
%%% BIBLIOGRAPHY
%%% -------------------------------------------------------------
% \bibliographystyle{utphysics}
% \bibliography{ref}
\end{document}