From 2af48bbe9e6bb36f2db30ac4d0990c17ac9784e1 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Mon, 1 May 2017 23:29:56 +0200
Subject: [PATCH] BNDM matching from experimental

---
 .../matching/BNDMMatcherConstruction.cpp      |  24 ++
 .../matching/BNDMMatcherConstruction.h        |  58 +++++
 .../matching/WideBNDMMatcherConstruction.cpp  |  24 ++
 .../matching/WideBNDMMatcherConstruction.h    |  56 ++++
 .../src/stringology/query/BNDMOccurrences.cpp |  24 ++
 .../src/stringology/query/BNDMOccurrences.h   | 101 ++++++++
 .../stringology/query/WideBNDMOccurrences.cpp |  24 ++
 .../stringology/query/WideBNDMOccurrences.h   |  94 +++++++
 ...ckwardNondeterministicDAWGMatchingTest.cpp |  58 +++++
 ...BackwardNondeterministicDAWGMatchingTest.h |  18 ++
 .../src/indexes/stringology/BNDMMatcher.cpp   |  14 +
 .../src/indexes/stringology/BNDMMatcher.h     | 243 ++++++++++++++++++
 alib2std/src/extensions/vector.hpp            |  29 ++-
 astringology2/src/astringology.cpp            |  24 +-
 tests.astringology.sh                         |   1 +
 15 files changed, 780 insertions(+), 12 deletions(-)
 create mode 100644 alib2algo/src/stringology/matching/BNDMMatcherConstruction.cpp
 create mode 100644 alib2algo/src/stringology/matching/BNDMMatcherConstruction.h
 create mode 100644 alib2algo/src/stringology/matching/WideBNDMMatcherConstruction.cpp
 create mode 100644 alib2algo/src/stringology/matching/WideBNDMMatcherConstruction.h
 create mode 100644 alib2algo/src/stringology/query/BNDMOccurrences.cpp
 create mode 100644 alib2algo/src/stringology/query/BNDMOccurrences.h
 create mode 100644 alib2algo/src/stringology/query/WideBNDMOccurrences.cpp
 create mode 100644 alib2algo/src/stringology/query/WideBNDMOccurrences.h
 create mode 100644 alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.cpp
 create mode 100644 alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.h
 create mode 100644 alib2data/src/indexes/stringology/BNDMMatcher.cpp
 create mode 100644 alib2data/src/indexes/stringology/BNDMMatcher.h

diff --git a/alib2algo/src/stringology/matching/BNDMMatcherConstruction.cpp b/alib2algo/src/stringology/matching/BNDMMatcherConstruction.cpp
new file mode 100644
index 0000000000..01c752df57
--- /dev/null
+++ b/alib2algo/src/stringology/matching/BNDMMatcherConstruction.cpp
@@ -0,0 +1,24 @@
+/*
+ * BNDMMatcherConstruction.cpp
+ *
+ *  Created on: 6. 2. 2017
+ *      Author: Jan Travnicek
+ */
+
+#include "BNDMMatcherConstruction.h"
+
+#include <string/LinearString.h>
+
+namespace stringology {
+
+namespace matching {
+
+indexes::stringology::BNDMMatcher < DefaultSymbolType > BNDMMatcherConstruction::construct ( const string::String & string ) {
+	return dispatch ( string.getData ( ) );
+}
+
+auto BNDMIndexConstructionLinearString = BNDMMatcherConstruction::RegistratorWrapper < indexes::stringology::BNDMMatcher < >, string::LinearString < > > ( BNDMMatcherConstruction::construct );
+
+} /* namespace matching */
+
+} /* namespace stringology */
diff --git a/alib2algo/src/stringology/matching/BNDMMatcherConstruction.h b/alib2algo/src/stringology/matching/BNDMMatcherConstruction.h
new file mode 100644
index 0000000000..974ff4910b
--- /dev/null
+++ b/alib2algo/src/stringology/matching/BNDMMatcherConstruction.h
@@ -0,0 +1,58 @@
+/*
+ * BNDMMatcherConstruction.h
+ *
+ *  Created on: 6. 2. 2017
+ *      Author: Jan Travnicek
+ */
+
+#ifndef BNDM_MATCHER_CONSTRUCTION_H_
+#define BNDM_MATCHER_CONSTRUCTION_H_
+
+#include <indexes/stringology/BNDMMatcher.h>
+#include <string/String.h>
+#include <string/LinearString.h>
+#include <core/multipleDispatch.hpp>
+#include <exception/CommonException.h>
+
+namespace stringology {
+
+namespace matching {
+
+/**
+ * Constructs a bit parallel index for given string.
+ *
+ */
+
+class BNDMMatcherConstruction : public std::SingleDispatch < BNDMMatcherConstruction, indexes::stringology::BNDMMatcher < >, const string::StringBase & > {
+public:
+	/**
+	 * Creates suffix trie
+	 * @param string string to construct suffix trie for
+	 * @return automaton
+	 */
+	static indexes::stringology::BNDMMatcher < > construct ( const string::String & string );
+
+	template < class SymbolType, size_t BitmaskBitCount = 64 >
+	static indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount > construct ( const string::LinearString < SymbolType > & string );
+
+};
+
+template < class SymbolType, size_t BitmaskBitCount >
+indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount > BNDMMatcherConstruction::construct ( const string::LinearString < SymbolType > & w ) {
+	size_t bitmaskLength = std::min ( w.getContent ( ).size ( ), BitmaskBitCount );
+
+	std::map < SymbolType, std::bitset < BitmaskBitCount > > res;
+	for ( const SymbolType & symbol : w.getAlphabet ( ) )
+		res [ symbol ] = std::bitset < BitmaskBitCount > ( 0 );
+
+	for ( unsigned i = 0; i < bitmaskLength; ++i )
+		res [ w.getContent ( ) [ i ] ] [ bitmaskLength - i - 1 ] = true;
+
+	return indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount > ( w.getAlphabet ( ), res, w.getContent ( ) );
+}
+
+} /* namespace matching */
+
+} /* namespace stringology */
+
+#endif /* BNDM_MATCHER_CONSTRUCTION_H_ */
diff --git a/alib2algo/src/stringology/matching/WideBNDMMatcherConstruction.cpp b/alib2algo/src/stringology/matching/WideBNDMMatcherConstruction.cpp
new file mode 100644
index 0000000000..1357d72957
--- /dev/null
+++ b/alib2algo/src/stringology/matching/WideBNDMMatcherConstruction.cpp
@@ -0,0 +1,24 @@
+/*
+ * WideBNDMMatcherConstruction.cpp
+ *
+ *  Created on: 6. 2. 2017
+ *      Author: Jan Travnicek
+ */
+
+#include "WideBNDMMatcherConstruction.h"
+
+#include <string/LinearString.h>
+
+namespace stringology {
+
+namespace matching {
+
+indexes::stringology::BitParallelIndex < DefaultSymbolType > WideBNDMMatcherConstruction::construct ( const string::String & string ) {
+	return dispatch ( string.getData ( ) );
+}
+
+auto WideBNDMIndexConstructionLinearString = WideBNDMMatcherConstruction::RegistratorWrapper < indexes::stringology::BitParallelIndex < DefaultSymbolType >, string::LinearString < > > ( WideBNDMMatcherConstruction::construct );
+
+} /* namespace matching */
+
+} /* namespace stringology */
diff --git a/alib2algo/src/stringology/matching/WideBNDMMatcherConstruction.h b/alib2algo/src/stringology/matching/WideBNDMMatcherConstruction.h
new file mode 100644
index 0000000000..f5ccf345af
--- /dev/null
+++ b/alib2algo/src/stringology/matching/WideBNDMMatcherConstruction.h
@@ -0,0 +1,56 @@
+/*
+ * WideBNDMMatcherConstruction.h
+ *
+ *  Created on: 6. 2. 2017
+ *      Author: Jan Travnicek
+ */
+
+#ifndef WIDE_BNDM_MATCHER_CONSTRUCTION_H_
+#define WIDE_BNDM_MATCHER_CONSTRUCTION_H_
+
+#include <indexes/stringology/BitParallelIndex.h>
+#include <string/String.h>
+#include <string/LinearString.h>
+#include <core/multipleDispatch.hpp>
+#include <exception/CommonException.h>
+
+namespace stringology {
+
+namespace matching {
+
+/**
+ * Constructs a bit parallel index for given string.
+ *
+ */
+
+class WideBNDMMatcherConstruction : public std::SingleDispatch < WideBNDMMatcherConstruction, indexes::stringology::BitParallelIndex < DefaultSymbolType >, const string::StringBase & > {
+public:
+	/**
+	 * Creates suffix trie
+	 * @param string string to construct suffix trie for
+	 * @return automaton
+	 */
+	static indexes::stringology::BitParallelIndex < DefaultSymbolType > construct ( const string::String & string );
+
+	template < class SymbolType >
+	static indexes::stringology::BitParallelIndex < SymbolType > construct ( const string::LinearString < SymbolType > & string );
+
+};
+
+template < class SymbolType >
+indexes::stringology::BitParallelIndex < SymbolType > WideBNDMMatcherConstruction::construct ( const string::LinearString < SymbolType > & w ) {
+	std::map < SymbolType, std::vector < bool > > res;
+	for ( const SymbolType & symbol : w.getAlphabet ( ) )
+		res [ symbol ].resize ( w.getContent ( ).size ( ) );
+
+	for ( unsigned i = 0; i < w.getContent ( ).size ( ); ++i )
+		res [ w.getContent ( ) [ i ] ] [ w.getContent ( ).size ( ) - i - 1 ] = true;
+
+	return indexes::stringology::BitParallelIndex < SymbolType > ( w.getAlphabet ( ), res );
+}
+
+} /* namespace matching */
+
+} /* namespace stringology */
+
+#endif /* WIDE_BNDM_MATCHER_CONSTRUCTION_H_ */
diff --git a/alib2algo/src/stringology/query/BNDMOccurrences.cpp b/alib2algo/src/stringology/query/BNDMOccurrences.cpp
new file mode 100644
index 0000000000..d184d80aa4
--- /dev/null
+++ b/alib2algo/src/stringology/query/BNDMOccurrences.cpp
@@ -0,0 +1,24 @@
+/*
+ * BNDMOccurrences.cpp
+ *
+ *  Created on: 2. 1. 2017
+ *      Author: Jan Travnicek
+ */
+
+#include "BNDMOccurrences.h"
+
+#include <string/LinearString.h>
+
+namespace stringology {
+
+namespace query {
+
+std::set < unsigned > BNDMOccurrences::query ( const indexes::stringology::BNDMMatcher < > & pattern, const string::String & subject ) {
+	return dispatch ( pattern, subject.getData ( ) );
+}
+
+auto bndmOccurrencesLinearString = BNDMOccurrences::RegistratorWrapper < std::set < unsigned >, string::LinearString < > > ( BNDMOccurrences::query );
+
+} /* namespace query */
+
+} /* namespace stringology */
diff --git a/alib2algo/src/stringology/query/BNDMOccurrences.h b/alib2algo/src/stringology/query/BNDMOccurrences.h
new file mode 100644
index 0000000000..77252196b8
--- /dev/null
+++ b/alib2algo/src/stringology/query/BNDMOccurrences.h
@@ -0,0 +1,101 @@
+/*
+ * BNDMOccurrences.h
+ *
+ *  Created on: 2. 1. 2017
+ *	  Author: Jan Travnicek
+ */
+
+#ifndef BNDM_OCCURRENCES_H_
+#define BNDM_OCCURRENCES_H_
+
+#include <indexes/stringology/BNDMMatcher.h>
+#include <string/String.h>
+#include <string/LinearString.h>
+#include <core/multipleDispatch.hpp>
+#include <global/GlobalData.h>
+
+#include <foreach>
+
+namespace stringology {
+
+namespace query {
+
+/**
+ * Based on backward nondeterministic dawg matching.
+ *
+ */
+
+class BNDMOccurrences : public std::SingleDispatchFirstStaticParam < BNDMOccurrences, std::set < unsigned >, const indexes::stringology::BNDMMatcher < > &, const string::StringBase & > {
+
+public:
+	/**
+	 * Query a suffix trie
+	 * @param suffix trie to query
+	 * @param string string to query by
+	 * @return occurences of factors
+	 */
+	static std::set < unsigned > query ( const indexes::stringology::BNDMMatcher < DefaultSymbolType > & pattern, const string::String & subject );
+
+	template < class SymbolType, size_t BitmaskBitCount >
+	static std::set < unsigned > query ( const indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount > & pattern, const string::LinearString < SymbolType > & subject );
+
+};
+
+template < class SymbolType, size_t BitmaskBitCount >
+std::set < unsigned > BNDMOccurrences::query ( const indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount > & pattern, const string::LinearString < SymbolType > & subject ) {
+
+	std::set < unsigned > occ;
+
+	size_t patternLength = pattern.getString ( ).size ( );
+	size_t subjectLength = subject.getContent ( ).size ( );
+	size_t posInSubject = 0;
+	size_t bitmaskLength = std::min ( BitmaskBitCount, patternLength );
+
+	std::bitset < BitmaskBitCount > currentBitmask;
+
+	while ( posInSubject <= subjectLength - patternLength ) {
+		size_t posInPattern = bitmaskLength;
+		size_t lastPosOfFactor = bitmaskLength;
+
+		 // Set the bitmask to all ones
+		currentBitmask.set ( );
+
+		while ( posInPattern > 0 && currentBitmask.any ( ) ) {
+			typename std::map < SymbolType, std::bitset < BitmaskBitCount > >::const_iterator symbolVectorIter = pattern.getData ( ).find ( subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) );
+			if ( symbolVectorIter == pattern.getData ( ).end ( ) )
+				break;
+
+			currentBitmask &= symbolVectorIter->second;
+			posInPattern--;
+
+			 // Test whether the most significant bit is set
+			if ( currentBitmask.test ( bitmaskLength - 1 ) ) {
+				 // and we didn't process all symbols of the pattern
+				if ( posInPattern > 0 )
+					lastPosOfFactor = posInPattern;
+				else {
+					size_t k = bitmaskLength;
+
+					 // out of bitset fallback to naive checking of occurrence here
+					while ( k < patternLength && pattern.getString ( ).at ( k ) == subject.getContent ( ).at ( posInSubject + k ) ) k++;
+
+					if ( k == patternLength )
+						 // Yay, there is match!!!
+						occ.insert ( posInSubject );
+				}
+			}
+
+			currentBitmask <<= 1;
+		}
+
+		posInSubject += lastPosOfFactor;
+	}
+
+	return occ;
+}
+
+} /* namespace query */
+
+} /* namespace stringology */
+
+#endif /* BNDM_OCCURRENCES_H_ */
diff --git a/alib2algo/src/stringology/query/WideBNDMOccurrences.cpp b/alib2algo/src/stringology/query/WideBNDMOccurrences.cpp
new file mode 100644
index 0000000000..e028f64898
--- /dev/null
+++ b/alib2algo/src/stringology/query/WideBNDMOccurrences.cpp
@@ -0,0 +1,24 @@
+/*
+ * WideBNDMOccurrences.cpp
+ *
+ *  Created on: 2. 1. 2017
+ *      Author: Jan Travnicek
+ */
+
+#include "WideBNDMOccurrences.h"
+
+#include <string/LinearString.h>
+
+namespace stringology {
+
+namespace query {
+
+std::set < unsigned > WideBNDMOccurrences::query ( const indexes::stringology::BitParallelIndex < DefaultSymbolType > & pattern, const string::String & subject ) {
+	return dispatch ( pattern, subject.getData ( ) );
+}
+
+auto wideBNDMOccurrencesLinearString = WideBNDMOccurrences::RegistratorWrapper < std::set < unsigned >, string::LinearString < > > ( WideBNDMOccurrences::query );
+
+} /* namespace query */
+
+} /* namespace stringology */
diff --git a/alib2algo/src/stringology/query/WideBNDMOccurrences.h b/alib2algo/src/stringology/query/WideBNDMOccurrences.h
new file mode 100644
index 0000000000..94f632d9cc
--- /dev/null
+++ b/alib2algo/src/stringology/query/WideBNDMOccurrences.h
@@ -0,0 +1,94 @@
+/*
+ * WideBNDMOccurrences.h
+ *
+ *  Created on: 2. 1. 2017
+ *	  Author: Jan Travnicek
+ */
+
+#ifndef WIDE_BNDM_OCCURRENCES_H_
+#define WIDE_BNDM_OCCURRENCES_H_
+
+#include <indexes/stringology/BitParallelIndex.h>
+#include <string/String.h>
+#include <string/LinearString.h>
+#include <core/multipleDispatch.hpp>
+#include <global/GlobalData.h>
+
+#include <foreach>
+
+namespace stringology {
+
+namespace query {
+
+/**
+ * Based on backward nondeterministic dawg matching.
+ *
+ */
+
+class WideBNDMOccurrences : public std::SingleDispatchFirstStaticParam < WideBNDMOccurrences, std::set < unsigned >, const indexes::stringology::BitParallelIndex < DefaultSymbolType > &, const string::StringBase & > {
+
+public:
+	/**
+	 * Query a suffix trie
+	 * @param suffix trie to query
+	 * @param string string to query by
+	 * @return occurences of factors
+	 */
+	static std::set < unsigned > query ( const indexes::stringology::BitParallelIndex < DefaultSymbolType > & pattern, const string::String & subject );
+
+	template < class SymbolType >
+	static std::set < unsigned > query ( const indexes::stringology::BitParallelIndex < SymbolType > & pattern, const string::LinearString < SymbolType > & subject );
+
+};
+
+template < class SymbolType >
+std::set < unsigned > WideBNDMOccurrences::query ( const indexes::stringology::BitParallelIndex < SymbolType > & pattern, const string::LinearString < SymbolType > & subject ) {
+
+	std::set < unsigned > occ;
+
+	size_t patternLength = pattern.getData ( ).begin ( )->second.size ( );
+	size_t subjectLength = subject.getContent ( ).size ( );
+	size_t posInSubject = 0;
+
+	std::vector < bool > currentBitmask;
+	currentBitmask.resize ( patternLength );
+
+	while ( posInSubject <= subjectLength - patternLength ) {
+		size_t posInPattern = patternLength;
+		size_t lastPosOfFactor = patternLength;
+
+		 // Set the bitmask to all ones
+		std::fill ( currentBitmask );
+
+		while ( posInPattern > 0 && std::any ( currentBitmask ) ) {
+			typename std::map < SymbolType, std::vector < bool > >::const_iterator symbolVectorIter = pattern.getData ( ).find ( subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) );
+			if ( symbolVectorIter == pattern.getData ( ).end ( ) )
+				break;
+
+			currentBitmask &= symbolVectorIter->second;
+			posInPattern--;
+
+			 // Test whether the most significant bit is set
+			if ( currentBitmask [ patternLength - 1 ] ) {
+				 // and we didn't process all symbols of the pattern
+				if ( posInPattern > 0 )
+					lastPosOfFactor = posInPattern;
+				else /* posInPattern == 0 */
+					 // Yay, there is match!!!
+					occ.insert ( posInSubject );
+			}
+
+			currentBitmask <<= 1;
+		}
+
+		posInSubject += lastPosOfFactor;
+	}
+
+	return occ;
+}
+
+} /* namespace query */
+
+} /* namespace stringology */
+
+#endif /* WIDE_BNDM_OCCURRENCES_H_ */
diff --git a/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.cpp b/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.cpp
new file mode 100644
index 0000000000..e07b3baaf8
--- /dev/null
+++ b/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.cpp
@@ -0,0 +1,58 @@
+#include "BackwardNondeterministicDAWGMatchingTest.h"
+
+#include <string/String.h>
+#include <stringology/matching/WideBNDMMatcherConstruction.h>
+#include <stringology/query/WideBNDMOccurrences.h>
+#include <stringology/matching/BNDMMatcherConstruction.h>
+#include <stringology/query/BNDMOccurrences.h>
+#include <stringology/exact/ExactFactorMatch.h>
+
+#include <string/generate/RandomStringFactory.h>
+#include <string/generate/RandomSubstringFactory.h>
+
+#include <primitive/Character.h>
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( BackwardNondeterministicDAWGMatchingTest, "stringology" );
+CPPUNIT_TEST_SUITE_REGISTRATION ( BackwardNondeterministicDAWGMatchingTest );
+
+void BackwardNondeterministicDAWGMatchingTest::setUp ( ) {
+}
+
+void BackwardNondeterministicDAWGMatchingTest::tearDown ( ) {
+}
+
+void BackwardNondeterministicDAWGMatchingTest::testBNDM ( ) {
+
+	std::vector<std::string> subjects;
+	std::vector<std::string> patterns;
+	std::vector<std::set<unsigned>> expectedOccs;
+
+	subjects.push_back("a"); patterns.push_back("a"); expectedOccs.push_back({0});
+	subjects.push_back("a"); patterns.push_back("b"); expectedOccs.push_back({});
+	subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfa"); expectedOccs.push_back({0});
+	subjects.push_back("alfalfalfa"); patterns.push_back("blfalfalfa"); expectedOccs.push_back({});
+	subjects.push_back("alfalfalfa"); patterns.push_back("alfalfalfb"); expectedOccs.push_back({});
+	subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({0});
+	subjects.push_back("alfalfalfaalfalfalfaabfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfa"); expectedOccs.push_back({});
+	subjects.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); patterns.push_back("alfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfalfalfaalfaa"); expectedOccs.push_back({0});
+	subjects.push_back("atggccttgcc"); patterns.push_back("gcc"); expectedOccs.push_back({3,8});
+	subjects.push_back("aaaaaaaaaa"); patterns.push_back("a"); expectedOccs.push_back({0,1,2,3,4,5,6,7,8,9});
+
+	for(size_t i = 0; i < subjects.size(); ++i) {
+		indexes::stringology::BitParallelIndex < char > bndmPattern1 = stringology::matching::WideBNDMMatcherConstruction::construct ( string::LinearString < char > ( patterns[i] ) );
+		indexes::stringology::BNDMMatcher < char > bndmPattern2 = stringology::matching::BNDMMatcherConstruction::construct ( string::LinearString < char > ( patterns[i] ) );
+		std::set < unsigned > res1 = stringology::query::WideBNDMOccurrences::query ( bndmPattern1, string::LinearString < char > ( subjects[i] ) );
+		std::set < unsigned > res2 = stringology::query::BNDMOccurrences::query ( bndmPattern2, string::LinearString < char > ( subjects[i] ) );
+		CPPUNIT_ASSERT ( res1 == expectedOccs[i] );
+		CPPUNIT_ASSERT ( res2 == expectedOccs[i] );
+		std::cout << subjects[i] << ' ' << patterns[i] << ' ' << res1 << std::endl;
+	}
+
+	auto longSubject = string::generate::RandomStringFactory::generateLinearString (64 * 64 * 64, 512, false, true);
+	auto longPattern = string::generate::RandomSubstringFactory::generateSubstring(64 * 32 * 32, longSubject );
+	indexes::stringology::BNDMMatcher < > pattern = stringology::matching::BNDMMatcherConstruction::construct ( longPattern );
+	std::set < unsigned > res = stringology::query::BNDMOccurrences::query ( pattern, longSubject );
+	std::set < unsigned > ref = stringology::exact::ExactFactorMatch::match ( longSubject, longPattern );
+	std::cout << "long: " << res << std::endl;
+	CPPUNIT_ASSERT ( res == ref);
+}
diff --git a/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.h b/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.h
new file mode 100644
index 0000000000..9d50ac034b
--- /dev/null
+++ b/alib2algo/test-src/stringology/matching/BackwardNondeterministicDAWGMatchingTest.h
@@ -0,0 +1,18 @@
+#ifndef BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_
+#define BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_
+
+#include <cppunit/extensions/HelperMacros.h>
+
+class BackwardNondeterministicDAWGMatchingTest : public CppUnit::TestFixture {
+	CPPUNIT_TEST_SUITE ( BackwardNondeterministicDAWGMatchingTest );
+	CPPUNIT_TEST ( testBNDM );
+	CPPUNIT_TEST_SUITE_END ( );
+
+public:
+	void setUp ( );
+	void tearDown ( );
+
+	void testBNDM ( );
+};
+
+#endif // BACKWARD_NONDETERMINISTIC_DAWG_MATCHING_TEST_H_
diff --git a/alib2data/src/indexes/stringology/BNDMMatcher.cpp b/alib2data/src/indexes/stringology/BNDMMatcher.cpp
new file mode 100644
index 0000000000..091929d0e4
--- /dev/null
+++ b/alib2data/src/indexes/stringology/BNDMMatcher.cpp
@@ -0,0 +1,14 @@
+/*
+ * BNDMMatcher.cpp
+ *
+ *  Created on: Jan 8, 2017
+ *      Author: Jan Travnicek
+ */
+
+#include "BNDMMatcher.h"
+
+namespace alib {
+
+auto bndmMatcherParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::stringology::BNDMMatcher < > > ( );
+
+} /* namespace alib */
diff --git a/alib2data/src/indexes/stringology/BNDMMatcher.h b/alib2data/src/indexes/stringology/BNDMMatcher.h
new file mode 100644
index 0000000000..ea82fa48a5
--- /dev/null
+++ b/alib2data/src/indexes/stringology/BNDMMatcher.h
@@ -0,0 +1,243 @@
+/*
+ * BNDMMatcher.h
+ *
+ *  Created on: Jan 8, 2017
+ *      Author: Jan Travnicek
+ */
+
+#ifndef BNDM_MATCHER_H_
+#define BNDM_MATCHER_H_
+
+#include <string>
+#include <iostream>
+#include <sstream>
+
+#include <common/DefaultSymbolType.h>
+
+#include <core/components.hpp>
+#include <exception/CommonException.h>
+
+#include <object/UniqueObject.h>
+#include <object/ObjectBase.h>
+
+#include <sax/FromXMLParserHelper.h>
+#include <core/xmlApi.hpp>
+
+#include <container/ObjectsSet.h>
+#include <container/ObjectsMap.h>
+#include <container/ObjectsVector.h>
+#include <container/ObjectsBitset.h>
+#include <primitive/Bool.h>
+#include <bitset>
+
+#include <alphabet/common/SymbolNormalize.h>
+
+namespace indexes {
+
+namespace stringology {
+
+class GeneralAlphabet;
+
+/**
+ * Represents regular expression parsed from the XML. Regular expression is stored
+ * as a tree of RegExpElement.
+ */
+template < class SymbolType = DefaultSymbolType, size_t BitmaskBitCount = 64 >
+class BNDMMatcher : public alib::ObjectBase, public std::Components < BNDMMatcher < SymbolType >, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > {
+protected:
+	std::map < SymbolType, std::bitset < BitmaskBitCount > > m_vectors;
+	std::vector < SymbolType > m_string;
+
+public:
+	/**
+	 * @copydoc SuffixTrieNode::clone() const
+	 */
+	virtual ObjectBase * clone ( ) const;
+
+	/**
+	 * @copydoc SuffixTrieNode::plunder() const
+	 */
+	virtual ObjectBase * plunder ( ) &&;
+
+	explicit BNDMMatcher ( std::set < SymbolType > alphabet, std::map < SymbolType, std::bitset < BitmaskBitCount > > vectors, std::vector < SymbolType > string );
+
+	/**
+	 * @return Root node of the trie
+	 */
+	const std::map < SymbolType, std::bitset < BitmaskBitCount > > & getData ( ) const;
+
+	const std::vector < SymbolType > & getString ( ) const;
+
+	const std::set < SymbolType > & getAlphabet ( ) const {
+		return this->template accessComponent < GeneralAlphabet > ( ).get ( );
+	}
+
+	/**
+	 * Sets the bit vector for given symbol
+	 * @param tree root node to set
+	 */
+	void setBitVectorForSymbol ( SymbolType symbol, std::bitset < BitmaskBitCount > data );
+
+	/**
+	 * Removes symbol from the alphabet of symbol available in the regular expression
+	 * @param symbol removed symbol from the alphabet
+	 */
+	bool removeSymbolFromAlphabet ( const SymbolType & symbol ) {
+		return this->template accessComponent < GeneralAlphabet > ( ).remove ( symbol );
+	}
+
+	/**
+	 * Prints XML representation of the tree to the output stream.
+	 * @param out output stream to which print the tree
+	 * @param tree tree to print
+	 */
+	virtual void operator >>( std::ostream & out ) const;
+
+	virtual int compare ( const ObjectBase & other ) const {
+		if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other );
+
+		return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) );
+	}
+
+	virtual int compare ( const BNDMMatcher & other ) const;
+
+	virtual explicit operator std::string ( ) const;
+
+	static const std::string & getXmlTagName() {
+		static std::string xmlTagName = "BNDMMatcher";
+
+		return xmlTagName;
+	}
+
+	static BNDMMatcher parse ( std::deque < sax::Token >::iterator & input );
+
+	void compose ( std::deque < sax::Token > & out ) const;
+
+	virtual alib::ObjectBase * inc ( ) &&;
+
+	virtual ObjectBase * normalize ( ) && {
+		if ( typeid ( BNDMMatcher < > ) == typeid ( BNDMMatcher < SymbolType > ) )
+			return this;
+
+		std::set < DefaultSymbolType > alphabet = alphabet::SymbolNormalize::normalizeAlphabet ( std::move ( this->template accessComponent < GeneralAlphabet > ( ).get ( ) ) );
+
+		std::map < DefaultSymbolType, std::bitset < BitmaskBitCount > > vectors;
+		for ( std::pair < SymbolType, std::bitset < BitmaskBitCount > > && vector : std::make_moveable_map ( m_vectors ) )
+			vectors.insert ( std::make_pair ( alphabet::SymbolNormalize::normalizeSymbol ( std::move ( vector.first ) ), std::move ( vector.second ) ) );
+
+		std::vector < DefaultSymbolType > string = alphabet::SymbolNormalize::normalizeSymbols ( std::move ( m_string ) );
+
+		return new BNDMMatcher < > ( std::move ( alphabet ), std::move ( vectors ), std::move ( string ) );
+	}
+};
+
+} /* namespace stringology */
+
+} /* namespace indexes */
+
+namespace indexes {
+
+namespace stringology {
+
+template < class SymbolType, size_t BitmaskBitCount >
+BNDMMatcher < SymbolType, BitmaskBitCount >::BNDMMatcher ( std::set < SymbolType > alphabet, std::map < SymbolType, std::bitset < BitmaskBitCount > > vectors, std::vector < SymbolType > string ) : std::Components < BNDMMatcher, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( alphabet ) ), std::tuple < > ( ) ), m_vectors ( std::move ( vectors ) ), m_string ( std::move ( string ) ) {
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+alib::ObjectBase * BNDMMatcher < SymbolType, BitmaskBitCount >::clone ( ) const {
+	return new BNDMMatcher ( * this );
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+alib::ObjectBase * BNDMMatcher < SymbolType, BitmaskBitCount >::plunder ( ) && {
+	return new BNDMMatcher ( std::move ( * this ) );
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+const std::map < SymbolType, std::bitset < BitmaskBitCount > > & BNDMMatcher < SymbolType, BitmaskBitCount >::getData ( ) const {
+	return m_vectors;
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+const std::vector < SymbolType > & BNDMMatcher < SymbolType, BitmaskBitCount >::getString ( ) const {
+	return m_string;
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+void BNDMMatcher < SymbolType, BitmaskBitCount >::setBitVectorForSymbol ( SymbolType symbol, std::bitset < BitmaskBitCount > data ) {
+	this->m_vectors [ symbol ] = std::move ( data );
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+void BNDMMatcher < SymbolType, BitmaskBitCount >::operator >>( std::ostream & out ) const {
+	out << "(BNDMMatcher " << this->m_vectors << ")";
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+int BNDMMatcher < SymbolType, BitmaskBitCount >::compare ( const BNDMMatcher & other ) const {
+	auto first = std::tie ( getData ( ), getAlphabet ( ) );
+	auto second = std::tie ( other.getData ( ), other.getAlphabet ( ) );
+
+	static std::compare < decltype ( first ) > comp;
+
+	return comp ( first, second );
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+BNDMMatcher < SymbolType, BitmaskBitCount >::operator std::string ( ) const {
+	std::stringstream ss;
+	ss << * this;
+	return ss.str ( );
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+BNDMMatcher < SymbolType, BitmaskBitCount > BNDMMatcher < SymbolType, BitmaskBitCount >::parse ( std::deque < sax::Token >::iterator & input ) {
+	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, BNDMMatcher::getXmlTagName() );
+	std::set < SymbolType > alphabet = alib::xmlApi < std::set < SymbolType > >::parse ( input );
+	std::map < SymbolType, std::bitset < BitmaskBitCount > > data = alib::xmlApi < std::map < SymbolType, std::bitset < BitmaskBitCount > > >::parse ( input );
+	std::vector < SymbolType > string = alib::xmlApi < std::vector < SymbolType > >::parse ( input );
+	BNDMMatcher < SymbolType, BitmaskBitCount > res ( std::move ( alphabet ), std::move ( data ), std::move ( string ) );
+
+	sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, BNDMMatcher::getXmlTagName() );
+	return res;
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+void BNDMMatcher < SymbolType, BitmaskBitCount >::compose ( std::deque < sax::Token > & out ) const {
+	out.emplace_back ( BNDMMatcher::getXmlTagName(), sax::Token::TokenType::START_ELEMENT );
+	alib::xmlApi < std::set < SymbolType > >::compose ( out, getAlphabet ( ) );
+	alib::xmlApi < std::map < SymbolType, std::bitset < BitmaskBitCount > > >::compose ( out, getData ( ) );
+	alib::xmlApi < std::vector < SymbolType > >::compose ( out, getString ( ) );
+	out.emplace_back ( BNDMMatcher::getXmlTagName(), sax::Token::TokenType::END_ELEMENT );
+}
+
+template < class SymbolType, size_t BitmaskBitCount >
+alib::ObjectBase* BNDMMatcher < SymbolType, BitmaskBitCount >::inc() && {
+	return new alib::UniqueObject(alib::Object(std::move(*this)), primitive::Integer(0));
+}
+
+} /* namespace stringology */
+
+} /* namespace indexes */
+
+namespace std {
+
+template < class SymbolType, size_t BitmaskBitCount >
+class ComponentConstraint < indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount >, SymbolType, indexes::stringology::GeneralAlphabet > {
+public:
+	static bool used ( const indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount > & index, const SymbolType & symbol ) {
+		const std::map < SymbolType, std::bitset < BitmaskBitCount > > & content = index.getData ( );
+		return content.find( symbol ) != content.end();
+	}
+
+	static bool available ( const indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount > &, const SymbolType & ) {
+		return true;
+	}
+
+	static void valid ( const indexes::stringology::BNDMMatcher < SymbolType, BitmaskBitCount > &, const SymbolType & ) {
+	}
+};
+
+} /* namespace std */
+
+#endif /* BNDM_MATCHER_H_ */
diff --git a/alib2std/src/extensions/vector.hpp b/alib2std/src/extensions/vector.hpp
index 52c4af268c..09bb51c568 100644
--- a/alib2std/src/extensions/vector.hpp
+++ b/alib2std/src/extensions/vector.hpp
@@ -218,9 +218,8 @@ vector < bool, Ts ... > & operator >>= ( vector < bool, Ts ... > & A, size_t dis
 		auto itAReverse = A.end ( ) - 1;
 
 		// upper part of the last word in the vector can contain some garbage so it needs to be cleared
-		vectorBoolInternalType maskTopWord = getMask ( sizeWithin );
-		if ( maskTopWord != 0 )
-			* ( itAReverse._M_p ) &= maskTopWord;
+		if ( sizeWithin != 0 )
+			* ( itAReverse._M_p ) &= getMask ( sizeWithin );
 
 		// simulate behavior of reverse iterator
 		while ( itAReverse >= A.begin ( ) ) {
@@ -254,10 +253,26 @@ bool any ( const vector < bool, Ts ... > & v ) {
 
 	if ( sizeWithin == 0 )
 		return * itV._M_p != 0;
-	else {
-		vectorBoolInternalType maskTopWord = getMask ( sizeWithin );
-		return ( * itV._M_p & maskTopWord ) != 0;
-	}
+	else
+		return ( * itV._M_p & getMask ( sizeWithin ) ) != 0;
+}
+
+template < class ... Ts >
+void fill ( const vector < bool, Ts ... > & v ) {
+	typename vector < bool, Ts ... >::const_iterator itV = v.begin ( );
+
+	// c++ implementation-specific
+	while ( itV < v.end ( ) )
+		* ( itV._M_p ++ ) = ~ vectorBoolInternalType { };
+}
+
+template < class ... Ts >
+void clear ( const vector < bool, Ts ... > & v ) {
+	typename vector < bool, Ts ... >::const_iterator itV = v.begin ( );
+
+	// c++ implementation-specific
+	while ( itV < v.end ( ) )
+		* ( itV._M_p ++ ) = 0;
 }
 
 } /* namespace std */
diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp
index 9d2f3fa84e..4c770213af 100644
--- a/astringology2/src/astringology.cpp
+++ b/astringology2/src/astringology.cpp
@@ -24,7 +24,7 @@
 #include <stringology/exact/BoyerMoore.h>
 #include <stringology/exact/ReversedBoyerMooreHorspool.h>
 #include <stringology/exact/DeadZoneUsingBadCharacterShift.h>
-#include <stringology/exact/BackwardNondeterministicDAWGMatching.hpp>
+#include <stringology/query/BNDMOccurrences.h>
 #include <stringology/exact/BackwardOracleMatching.h>
 #include <stringology/exact/BackwardDAWGMatching.h>
 #include <stringology/exact/ExactMatchingAutomaton.h>
@@ -39,6 +39,7 @@
 #include <stringology/indexing/PositionHeapNaive.h>
 #include <stringology/indexing/SuffixArrayNaive.h>
 #include <stringology/indexing/BitParallelIndexConstruction.h>
+#include <stringology/matching/BNDMMatcherConstruction.h>
 #include <stringology/indexing/CompressedBitParallelIndexConstruction.h>
 
 int main ( int argc, char * argv[] ) {
@@ -61,12 +62,13 @@ int main ( int argc, char * argv[] ) {
 		allowed.push_back ( "boyerMoore" );
 		allowed.push_back ( "reversedBoyerMooreHorspool" );
 		allowed.push_back ( "deadZoneUsingBadCharacterShift" );
-		allowed.push_back ( "backwardNondeterministicDAWGMatching" );
+		allowed.push_back ( "bndmOccurrences" );
 		allowed.push_back ( "backwardOracleMatching" );
 		allowed.push_back ( "backwardDAWGMatching" );
 		allowed.push_back ( "suffixTrie" );
 		allowed.push_back ( "positionHeap" );
 		allowed.push_back ( "bitParallelIndex" );
+		allowed.push_back ( "bndmMatcher" );
 		allowed.push_back ( "compressedBitParallelIndex" );
 		allowed.push_back ( "suffixArray" );
 
@@ -165,14 +167,14 @@ int main ( int argc, char * argv[] ) {
 			measurements::start ( "Output write", measurements::Type::AUXILIARY );
 
 			alib::XmlDataFactory::toStdout ( res );
-		} else if ( algorithm.getValue ( ) == "backwardNondeterministicDAWGMatching" ) {
+		} else if ( algorithm.getValue ( ) == "bndmOccurrences" ) {
 			string::String subject = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );
-			string::String pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) );
+			indexes::stringology::BNDMMatcher < DefaultSymbolType > pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) );
 
 			measurements::end ( );
 			measurements::start ( "Algorithm", measurements::Type::MAIN );
 
-			std::set < unsigned > res = stringology::exact::BackwardNondeterministicDAWGMatching::match ( subject, pattern );
+			std::set < unsigned > res = stringology::query::BNDMOccurrences::query ( pattern, subject );
 
 			measurements::end ( );
 			measurements::start ( "Output write", measurements::Type::AUXILIARY );
@@ -341,6 +343,18 @@ int main ( int argc, char * argv[] ) {
 			measurements::start ( "Output write", measurements::Type::AUXILIARY );
 
 			alib::XmlDataFactory::toStdout ( bitParallelIndex );
+		} else if ( algorithm.getValue ( ) == "bndmMatcher" ) {
+			string::String pattern = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, patternInput).front ( ) ) );
+
+			measurements::end ( );
+			measurements::start ( "Algorithm", measurements::Type::MAIN );
+
+			indexes::stringology::BNDMMatcher < DefaultSymbolType > bndmMatcher = stringology::matching::BNDMMatcherConstruction::construct ( pattern );
+
+			measurements::end ( );
+			measurements::start ( "Output write", measurements::Type::AUXILIARY );
+
+			alib::XmlDataFactory::toStdout ( bndmMatcher );
 		} else if ( algorithm.getValue ( ) == "compressedBitParallelIndex" ) {
 			string::String subject = alib::XmlDataFactory::fromTokens ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );
 
diff --git a/tests.astringology.sh b/tests.astringology.sh
index 33c521d1fe..711615434a 100755
--- a/tests.astringology.sh
+++ b/tests.astringology.sh
@@ -212,6 +212,7 @@ function runTest {
 	clearResults
 }
 
+runTest "BNDM Matcher" "./astringology2 -a bndmMatcher -p \"\$PATTERN_FILE\" | ./astringology2 -a bndmOccurrences -p - -s \"\$SUBJECT_FILE\" | ./astat2 -p size"
 runTest "Exact Boyer Moore" "./astringology2 -a boyerMoore -s \"\$SUBJECT_FILE\" -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -a <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) | ./astat2 -p size"
 runTest "Compressed Bit Parallelism Factors" "./astringology2 -a compressedBitParallelIndex -s \"\$SUBJECT_FILE\" | ./aquery2 -q compressedBitParallelismFactors -p \"\$PATTERN_FILE\" | ./astat2 -p size"
 runTest "Bit Parallelism Factors" "./astringology2 -a bitParallelIndex -s \"\$SUBJECT_FILE\" | ./aquery2 -q bitParallelismFactors -p \"\$PATTERN_FILE\" | ./astat2 -p size"
-- 
GitLab