diff --git a/alib2algo/src/string/compress/LZ77.cpp b/alib2algo/src/string/compress/LZ77.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9a75bfc58fac8b6682a787ee485a97bee5ba580a --- /dev/null +++ b/alib2algo/src/string/compress/LZ77.cpp @@ -0,0 +1,22 @@ +/* + * LZ77.cpp + * + * Created on: 1. 3. 2017 + * Author: Jan Parma + */ + +#include "LZ77.h" + +namespace string { + +namespace compress { + +std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > LZ77::compress ( const string::String & string, unsigned int searchBufferSize, unsigned int lookaheadBufferSize ) { + return dispatch ( string.getData ( ), searchBufferSize, lookaheadBufferSize ); +} + +auto LZ77Compress = LZ77::RegistratorWrapper < std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > >, string::LinearString < > > ( LZ77::compress ); + +} /* namespace compress */ + +} /* namespace string */ diff --git a/alib2algo/src/string/compress/LZ77.h b/alib2algo/src/string/compress/LZ77.h new file mode 100644 index 0000000000000000000000000000000000000000..26970c048d8311415a05ee323637f609c79f8e30 --- /dev/null +++ b/alib2algo/src/string/compress/LZ77.h @@ -0,0 +1,103 @@ +/* + * LZ77.h + * + * Created on: 1. 3. 2017 + * Author: Jan Parma + */ + +#ifndef _LZ77_H_ +#define _LZ77_H_ + +#include <core/multipleDispatch.hpp> +#include <string/StringFeatures.h> +#include <tuple> +#include <vector> + +#include <string/LinearString.h> +#include <string/String.h> + +#include <exception/CommonException.h> + +namespace string { + +namespace compress { + +class LZ77 : public std::SingleDispatch < LZ77, std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > >, const string::StringBase &, unsigned int, unsigned int > { +public: + static std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > compress ( const string::String & string, unsigned int searchBufferSize, unsigned int lookaheadBufferSize ); + + template < class SymbolType > + static std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > compress ( const string::LinearString < SymbolType > & string, unsigned int searchBufferSize, unsigned int lookaheadBufferSize ); + +private: + template < class SymbolType > + static int equal ( const string::LinearString < SymbolType > & string, unsigned int first1, unsigned int last1, unsigned int first2 ); +}; + +// Main method that handle compress +template < class SymbolType > +std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > LZ77::compress ( const string::LinearString < SymbolType > & string, unsigned int searchBufferSize, unsigned int lookaheadBufferSize ) { + + if(searchBufferSize == 0) + throw exception::CommonException("LZ77: search buffer size must be greater than 0"); + + if(lookaheadBufferSize == 0) + throw exception::CommonException("LZ77: lookahead buffer size must be greater than 0"); + + + size_t pointer = 0; + unsigned int match = 0; + unsigned int maxMatch = 0; + unsigned int sbPointer = 0; + + std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > output; + + while ( pointer < string.getContent ( ).size ( ) ) { + if ( pointer + lookaheadBufferSize >= string.getContent ( ).size ( ) ) + lookaheadBufferSize = string.getContent ( ).size ( ) - pointer - 1; + + for ( int j = pointer - 1; j > ( int ) pointer - 1 - ( int ) searchBufferSize; j-- ) { + if ( j < 0 ) break; + + match = equal ( string, pointer, pointer + lookaheadBufferSize - 1, j ); + + if ( maxMatch < match ) { + maxMatch = match; + sbPointer = j; + } + } + + std::tuple < unsigned int, unsigned int, SymbolType > triple ( maxMatch == 0 ? 0 : pointer - sbPointer, maxMatch, string.getContent ( )[pointer + maxMatch] ); + output.push_back ( triple ); + + pointer = pointer + maxMatch + 1; + + maxMatch = 0; + } + + return output; +} + +// Method that return longest match of two substrings that are defined as incoming string and index of first staring letter +template < class SymbolType > +int LZ77::equal ( const string::LinearString < SymbolType > & string, unsigned int first1, unsigned int last1, unsigned int first2 ) { + + int steps = 0; + + while ( first1 <= last1 ) { + if ( string.getContent ( )[first1] != string.getContent ( )[first2] ) + return steps; + + first1++; + first2++; + steps++; + } + + return steps; +} + +} /* namespace compress */ + +} /* namespace string */ + +#endif /* _LZ77_H_ */ diff --git a/alib2algo/src/string/compress/LZ77Decompress.cpp b/alib2algo/src/string/compress/LZ77Decompress.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7de478f01f219c70fb2e78a68bbcebc90dcc5f88 --- /dev/null +++ b/alib2algo/src/string/compress/LZ77Decompress.cpp @@ -0,0 +1,22 @@ +/* + * LZ77Decompress.cpp + * + * Created on: 1. 3. 2017 + * Author: Jan Parma + */ + +#include "LZ77Decompress.h" + +namespace string { + +namespace compress { + +string::String LZ77Decompress::decompress ( const std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > & input ) { + return dispatch ( input ); +} + +auto LZ77Decompress = LZ77Decompress::RegistratorWrapper < string::LinearString < >, std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > > ( LZ77Decompress::decompress ); + +} /* namespace compress */ + +} /* namespace string */ diff --git a/alib2algo/src/string/compress/LZ77Decompress.h b/alib2algo/src/string/compress/LZ77Decompress.h new file mode 100644 index 0000000000000000000000000000000000000000..598c12a108a1aed356c0bc6b1c495dbbc7a303dc --- /dev/null +++ b/alib2algo/src/string/compress/LZ77Decompress.h @@ -0,0 +1,67 @@ +/* + * LZ77Decompress.h + * + * Created on: 1. 3. 2017 + * Author: Jan Parma + */ + +#ifndef _LZ77DECOMPRESS_H_ +#define _LZ77DECOMPRESS_H_ + +#define SEARCH_BUFFER_LENGTH 6 +#define LOOKAHEAD_BUFFER_LENGTH 4 + +#include <core/multipleDispatch.hpp> +#include <string/StringFeatures.h> +#include <tuple> +#include <vector> + +#include <string/LinearString.h> +#include <string/String.h> + +namespace string { + +namespace compress { + +class LZ77Decompress : public std::SingleDispatch < LZ77Decompress, string::String, const std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > & > { +public: + static string::String decompress ( const std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > & input ); + + template < class SymbolType > + static string::LinearString < SymbolType > decompress ( const std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > & input ); +}; + +// Main method that handle decompress +template < class SymbolType > +string::LinearString < SymbolType > LZ77Decompress::decompress ( const std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > & input ) { + + string::LinearString < SymbolType > output; + + for ( unsigned int i = 0; i < input.size ( ); i++ ) + { + if ( ( std::get < 0 > ( input[i] ) == 0 ) && ( std::get < 1 > ( input[i] ) == 0 ) ) + { + output.extendAlphabet ( std::set < SymbolType > { std::get < 2 > ( input[i] ) } ); + output.appendSymbol ( std::get < 2 > ( input[i] ) ); + } + else + { + for ( unsigned int j = 0; j < std::get < 1 > ( input[i] ); j++ ) + { + output.extendAlphabet ( std::set < SymbolType > { output.getContent ( )[output.getContent ( ).size ( ) - std::get < 0 > ( input[i] )] } ); + output.appendSymbol ( output.getContent ( )[output.getContent ( ).size ( ) - std::get < 0 > ( input[i] )] ); + } + + output.extendAlphabet ( std::set < SymbolType > { std::get < 2 > ( input[i] ) } ); + output.appendSymbol ( std::get < 2 > ( input[i] ) ); + } + } + + return output; +} + +} /* namespace compress */ + +} /* namespace string */ + +#endif /* _LZ77DECOMPRESS_H_ */ diff --git a/alib2algo/test-src/string/compress/LZ77DecompressTest.cpp b/alib2algo/test-src/string/compress/LZ77DecompressTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f721ff262fbc61764d494548ce8d65612c4e0a95 --- /dev/null +++ b/alib2algo/test-src/string/compress/LZ77DecompressTest.cpp @@ -0,0 +1,71 @@ +#include "LZ77DecompressTest.h" + +#include <string/String.h> +#include <string/compress/LZ77.h> +#include <string/compress/LZ77Decompress.h> + +#define CPPUNIT_IMPLY( x, y ) CPPUNIT_ASSERT ( !( x ) || ( y ) ) + +#include <iostream> +#include <tuple> + + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( LZ77DecompressTest, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( LZ77DecompressTest ); + +void LZ77DecompressTest::setUp ( ) { +} + +void LZ77DecompressTest::tearDown ( ) { +} + +void LZ77DecompressTest::testLZ77Decompress1 ( ) { + + std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > input; + + input.push_back(std::make_tuple(0, 0, DefaultSymbolType ( 'a' ))); + input.push_back(std::make_tuple(1, 1, DefaultSymbolType ( 'b' ))); + input.push_back(std::make_tuple(3, 2, DefaultSymbolType ( 'c' ))); + input.push_back(std::make_tuple(3, 2, DefaultSymbolType ( 'a' ))); + input.push_back(std::make_tuple(1, 3, DefaultSymbolType ( 'b' ))); + input.push_back(std::make_tuple(2, 3, DefaultSymbolType ( 'b' ))); + + string::String res = string::compress::LZ77Decompress::decompress ( input ); + + std::string string = "aabaacaaaaaababab"; + string::String expectedString = string::stringFrom ( string ); + + CPPUNIT_ASSERT ( res == expectedString ); + +} + +void LZ77DecompressTest::testLZ77Decompress2 ( ) { + + std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > input; + + input.push_back(std::make_tuple(0, 0, DefaultSymbolType ( 'a' ))); + input.push_back(std::make_tuple(1, 1, DefaultSymbolType ( 'c' ))); + input.push_back(std::make_tuple(3, 4, DefaultSymbolType ( 'b' ))); + input.push_back(std::make_tuple(3, 3, DefaultSymbolType ( 'a' ))); + input.push_back(std::make_tuple(1, 2, DefaultSymbolType ( 'c' ))); + + string::String res = string::compress::LZ77Decompress::decompress ( input ); + + std::string string = "aacaacabcabaaac"; + string::String expectedString = string::stringFrom ( string ); + + CPPUNIT_ASSERT ( res == expectedString ); + +} + +void LZ77DecompressTest::testLZ77Decompress3 ( ) { + + std::string string = ""; + string::String input = string::stringFrom ( string ); + + std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > out = string::compress::LZ77::compress ( input, 6, 4 ); + + string::String res = string::compress::LZ77Decompress::decompress ( out ); + + CPPUNIT_ASSERT ( input == res ); +} diff --git a/alib2algo/test-src/string/compress/LZ77DecompressTest.h b/alib2algo/test-src/string/compress/LZ77DecompressTest.h new file mode 100644 index 0000000000000000000000000000000000000000..f4a3eecfe66078db99ff6d343524bb2c0c0d1023 --- /dev/null +++ b/alib2algo/test-src/string/compress/LZ77DecompressTest.h @@ -0,0 +1,22 @@ +#ifndef LZ77DECOMPRESS_TEST +#define LZ77DECOMPRESS_TEST + +#include <cppunit/extensions/HelperMacros.h> + +class LZ77DecompressTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE ( LZ77DecompressTest ); + CPPUNIT_TEST ( testLZ77Decompress1 ); + CPPUNIT_TEST ( testLZ77Decompress2 ); + CPPUNIT_TEST ( testLZ77Decompress3 ); + CPPUNIT_TEST_SUITE_END ( ); + +public: + void setUp ( ); + void tearDown ( ); + + void testLZ77Decompress1 ( ); + void testLZ77Decompress2 ( ); + void testLZ77Decompress3 ( ); +}; + +#endif // LZ77DECOMPRESS_TEST \ No newline at end of file diff --git a/alib2algo/test-src/string/compress/LZ77Test.cpp b/alib2algo/test-src/string/compress/LZ77Test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88bec4d7f0a8c7a191c11582a99f64c52edecb96 --- /dev/null +++ b/alib2algo/test-src/string/compress/LZ77Test.cpp @@ -0,0 +1,56 @@ +#include "LZ77Test.h" + +#include <string/String.h> +#include <string/compress/LZ77.h> + +#define CPPUNIT_IMPLY( x, y ) CPPUNIT_ASSERT ( !( x ) || ( y ) ) + +#include <iostream> +#include <tuple> + + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( LZ77Test, "stringology" ); +CPPUNIT_TEST_SUITE_REGISTRATION ( LZ77Test ); + +void LZ77Test::setUp ( ) { +} + +void LZ77Test::tearDown ( ) { +} + +void LZ77Test::testLZ77_1 ( ) { + std::string testStr = "aabaacaaaaaababab"; + + string::String string = string::stringFrom ( testStr ); + std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > res = string::compress::LZ77::compress ( string, 6, 4 ); + + std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > expectedOutput; + + expectedOutput.push_back(std::make_tuple(0, 0, DefaultSymbolType ( 'a' ))); + expectedOutput.push_back(std::make_tuple(1, 1, DefaultSymbolType ( 'b' ))); + expectedOutput.push_back(std::make_tuple(3, 2, DefaultSymbolType ( 'c' ))); + expectedOutput.push_back(std::make_tuple(3, 2, DefaultSymbolType ( 'a' ))); + expectedOutput.push_back(std::make_tuple(1, 3, DefaultSymbolType ( 'b' ))); + expectedOutput.push_back(std::make_tuple(2, 3, DefaultSymbolType ( 'b' ))); + + CPPUNIT_ASSERT ( res == expectedOutput ); + +} + +void LZ77Test::testLZ77_2 ( ) { + + std::string testStr = "aacaacabcabaaac"; + + string::String string = string::stringFrom ( testStr ); + std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > res = string::compress::LZ77::compress ( string, 6, 4 ); + + std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > expectedOutput; + + expectedOutput.push_back(std::make_tuple(0, 0, DefaultSymbolType ( 'a' ))); + expectedOutput.push_back(std::make_tuple(1, 1, DefaultSymbolType ( 'c' ))); + expectedOutput.push_back(std::make_tuple(3, 4, DefaultSymbolType ( 'b' ))); + expectedOutput.push_back(std::make_tuple(3, 3, DefaultSymbolType ( 'a' ))); + expectedOutput.push_back(std::make_tuple(1, 2, DefaultSymbolType ( 'c' ))); + + CPPUNIT_ASSERT ( res == expectedOutput ); +} diff --git a/alib2algo/test-src/string/compress/LZ77Test.h b/alib2algo/test-src/string/compress/LZ77Test.h new file mode 100644 index 0000000000000000000000000000000000000000..5e30daa043f98c98ff1e093429120a341c576436 --- /dev/null +++ b/alib2algo/test-src/string/compress/LZ77Test.h @@ -0,0 +1,20 @@ +#ifndef LZ77_TEST +#define LZ77_TEST + +#include <cppunit/extensions/HelperMacros.h> + +class LZ77Test : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE ( LZ77Test ); + CPPUNIT_TEST ( testLZ77_1 ); + CPPUNIT_TEST ( testLZ77_2 ); + CPPUNIT_TEST_SUITE_END ( ); + +public: + void setUp ( ); + void tearDown ( ); + + void testLZ77_1 ( ); + void testLZ77_2 ( ); +}; + +#endif // LZ77_TEST \ No newline at end of file