Skip to content
Snippets Groups Projects
Commit 80a6c2a3 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

LZ77 compression and decompression

parent 1115e58c
No related branches found
No related tags found
1 merge request!49First compression algorithms
/*
* LZ77.cpp
*
* Created on: 1. 3. 2017
* Author: Jan Parma
*/
#include "LZ77.h"
namespace string {
namespace compress {
std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > LZ77::compress ( const string::String & string, unsigned int searchBufferSize, unsigned int lookaheadBufferSize ) {
return dispatch ( string.getData ( ), searchBufferSize, lookaheadBufferSize );
}
auto LZ77Compress = LZ77::RegistratorWrapper < std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > >, string::LinearString < > > ( LZ77::compress );
} /* namespace compress */
} /* namespace string */
/*
* LZ77.h
*
* Created on: 1. 3. 2017
* Author: Jan Parma
*/
#ifndef _LZ77_H_
#define _LZ77_H_
#include <core/multipleDispatch.hpp>
#include <string/StringFeatures.h>
#include <tuple>
#include <vector>
#include <string/LinearString.h>
#include <string/String.h>
#include <exception/CommonException.h>
namespace string {
namespace compress {
class LZ77 : public std::SingleDispatch < LZ77, std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > >, const string::StringBase &, unsigned int, unsigned int > {
public:
static std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > compress ( const string::String & string, unsigned int searchBufferSize, unsigned int lookaheadBufferSize );
template < class SymbolType >
static std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > compress ( const string::LinearString < SymbolType > & string, unsigned int searchBufferSize, unsigned int lookaheadBufferSize );
private:
template < class SymbolType >
static int equal ( const string::LinearString < SymbolType > & string, unsigned int first1, unsigned int last1, unsigned int first2 );
};
// Main method that handle compress
template < class SymbolType >
std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > LZ77::compress ( const string::LinearString < SymbolType > & string, unsigned int searchBufferSize, unsigned int lookaheadBufferSize ) {
if(searchBufferSize == 0)
throw exception::CommonException("LZ77: search buffer size must be greater than 0");
if(lookaheadBufferSize == 0)
throw exception::CommonException("LZ77: lookahead buffer size must be greater than 0");
size_t pointer = 0;
unsigned int match = 0;
unsigned int maxMatch = 0;
unsigned int sbPointer = 0;
std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > output;
while ( pointer < string.getContent ( ).size ( ) ) {
if ( pointer + lookaheadBufferSize >= string.getContent ( ).size ( ) )
lookaheadBufferSize = string.getContent ( ).size ( ) - pointer - 1;
for ( int j = pointer - 1; j > ( int ) pointer - 1 - ( int ) searchBufferSize; j-- ) {
if ( j < 0 ) break;
match = equal ( string, pointer, pointer + lookaheadBufferSize - 1, j );
if ( maxMatch < match ) {
maxMatch = match;
sbPointer = j;
}
}
std::tuple < unsigned int, unsigned int, SymbolType > triple ( maxMatch == 0 ? 0 : pointer - sbPointer, maxMatch, string.getContent ( )[pointer + maxMatch] );
output.push_back ( triple );
pointer = pointer + maxMatch + 1;
maxMatch = 0;
}
return output;
}
// Method that return longest match of two substrings that are defined as incoming string and index of first staring letter
template < class SymbolType >
int LZ77::equal ( const string::LinearString < SymbolType > & string, unsigned int first1, unsigned int last1, unsigned int first2 ) {
int steps = 0;
while ( first1 <= last1 ) {
if ( string.getContent ( )[first1] != string.getContent ( )[first2] )
return steps;
first1++;
first2++;
steps++;
}
return steps;
}
} /* namespace compress */
} /* namespace string */
#endif /* _LZ77_H_ */
/*
* LZ77Decompress.cpp
*
* Created on: 1. 3. 2017
* Author: Jan Parma
*/
#include "LZ77Decompress.h"
namespace string {
namespace compress {
string::String LZ77Decompress::decompress ( const std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > & input ) {
return dispatch ( input );
}
auto LZ77Decompress = LZ77Decompress::RegistratorWrapper < string::LinearString < >, std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > > ( LZ77Decompress::decompress );
} /* namespace compress */
} /* namespace string */
/*
* LZ77Decompress.h
*
* Created on: 1. 3. 2017
* Author: Jan Parma
*/
#ifndef _LZ77DECOMPRESS_H_
#define _LZ77DECOMPRESS_H_
#define SEARCH_BUFFER_LENGTH 6
#define LOOKAHEAD_BUFFER_LENGTH 4
#include <core/multipleDispatch.hpp>
#include <string/StringFeatures.h>
#include <tuple>
#include <vector>
#include <string/LinearString.h>
#include <string/String.h>
namespace string {
namespace compress {
class LZ77Decompress : public std::SingleDispatch < LZ77Decompress, string::String, const std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > & > {
public:
static string::String decompress ( const std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > & input );
template < class SymbolType >
static string::LinearString < SymbolType > decompress ( const std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > & input );
};
// Main method that handle decompress
template < class SymbolType >
string::LinearString < SymbolType > LZ77Decompress::decompress ( const std::vector < std::tuple < unsigned int, unsigned int, SymbolType > > & input ) {
string::LinearString < SymbolType > output;
for ( unsigned int i = 0; i < input.size ( ); i++ )
{
if ( ( std::get < 0 > ( input[i] ) == 0 ) && ( std::get < 1 > ( input[i] ) == 0 ) )
{
output.extendAlphabet ( std::set < SymbolType > { std::get < 2 > ( input[i] ) } );
output.appendSymbol ( std::get < 2 > ( input[i] ) );
}
else
{
for ( unsigned int j = 0; j < std::get < 1 > ( input[i] ); j++ )
{
output.extendAlphabet ( std::set < SymbolType > { output.getContent ( )[output.getContent ( ).size ( ) - std::get < 0 > ( input[i] )] } );
output.appendSymbol ( output.getContent ( )[output.getContent ( ).size ( ) - std::get < 0 > ( input[i] )] );
}
output.extendAlphabet ( std::set < SymbolType > { std::get < 2 > ( input[i] ) } );
output.appendSymbol ( std::get < 2 > ( input[i] ) );
}
}
return output;
}
} /* namespace compress */
} /* namespace string */
#endif /* _LZ77DECOMPRESS_H_ */
#include "LZ77DecompressTest.h"
#include <string/String.h>
#include <string/compress/LZ77.h>
#include <string/compress/LZ77Decompress.h>
#define CPPUNIT_IMPLY( x, y ) CPPUNIT_ASSERT ( !( x ) || ( y ) )
#include <iostream>
#include <tuple>
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( LZ77DecompressTest, "stringology" );
CPPUNIT_TEST_SUITE_REGISTRATION ( LZ77DecompressTest );
void LZ77DecompressTest::setUp ( ) {
}
void LZ77DecompressTest::tearDown ( ) {
}
void LZ77DecompressTest::testLZ77Decompress1 ( ) {
std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > input;
input.push_back(std::make_tuple(0, 0, DefaultSymbolType ( 'a' )));
input.push_back(std::make_tuple(1, 1, DefaultSymbolType ( 'b' )));
input.push_back(std::make_tuple(3, 2, DefaultSymbolType ( 'c' )));
input.push_back(std::make_tuple(3, 2, DefaultSymbolType ( 'a' )));
input.push_back(std::make_tuple(1, 3, DefaultSymbolType ( 'b' )));
input.push_back(std::make_tuple(2, 3, DefaultSymbolType ( 'b' )));
string::String res = string::compress::LZ77Decompress::decompress ( input );
std::string string = "aabaacaaaaaababab";
string::String expectedString = string::stringFrom ( string );
CPPUNIT_ASSERT ( res == expectedString );
}
void LZ77DecompressTest::testLZ77Decompress2 ( ) {
std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > input;
input.push_back(std::make_tuple(0, 0, DefaultSymbolType ( 'a' )));
input.push_back(std::make_tuple(1, 1, DefaultSymbolType ( 'c' )));
input.push_back(std::make_tuple(3, 4, DefaultSymbolType ( 'b' )));
input.push_back(std::make_tuple(3, 3, DefaultSymbolType ( 'a' )));
input.push_back(std::make_tuple(1, 2, DefaultSymbolType ( 'c' )));
string::String res = string::compress::LZ77Decompress::decompress ( input );
std::string string = "aacaacabcabaaac";
string::String expectedString = string::stringFrom ( string );
CPPUNIT_ASSERT ( res == expectedString );
}
void LZ77DecompressTest::testLZ77Decompress3 ( ) {
std::string string = "";
string::String input = string::stringFrom ( string );
std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > out = string::compress::LZ77::compress ( input, 6, 4 );
string::String res = string::compress::LZ77Decompress::decompress ( out );
CPPUNIT_ASSERT ( input == res );
}
#ifndef LZ77DECOMPRESS_TEST
#define LZ77DECOMPRESS_TEST
#include <cppunit/extensions/HelperMacros.h>
class LZ77DecompressTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE ( LZ77DecompressTest );
CPPUNIT_TEST ( testLZ77Decompress1 );
CPPUNIT_TEST ( testLZ77Decompress2 );
CPPUNIT_TEST ( testLZ77Decompress3 );
CPPUNIT_TEST_SUITE_END ( );
public:
void setUp ( );
void tearDown ( );
void testLZ77Decompress1 ( );
void testLZ77Decompress2 ( );
void testLZ77Decompress3 ( );
};
#endif // LZ77DECOMPRESS_TEST
\ No newline at end of file
#include "LZ77Test.h"
#include <string/String.h>
#include <string/compress/LZ77.h>
#define CPPUNIT_IMPLY( x, y ) CPPUNIT_ASSERT ( !( x ) || ( y ) )
#include <iostream>
#include <tuple>
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION ( LZ77Test, "stringology" );
CPPUNIT_TEST_SUITE_REGISTRATION ( LZ77Test );
void LZ77Test::setUp ( ) {
}
void LZ77Test::tearDown ( ) {
}
void LZ77Test::testLZ77_1 ( ) {
std::string testStr = "aabaacaaaaaababab";
string::String string = string::stringFrom ( testStr );
std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > res = string::compress::LZ77::compress ( string, 6, 4 );
std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > expectedOutput;
expectedOutput.push_back(std::make_tuple(0, 0, DefaultSymbolType ( 'a' )));
expectedOutput.push_back(std::make_tuple(1, 1, DefaultSymbolType ( 'b' )));
expectedOutput.push_back(std::make_tuple(3, 2, DefaultSymbolType ( 'c' )));
expectedOutput.push_back(std::make_tuple(3, 2, DefaultSymbolType ( 'a' )));
expectedOutput.push_back(std::make_tuple(1, 3, DefaultSymbolType ( 'b' )));
expectedOutput.push_back(std::make_tuple(2, 3, DefaultSymbolType ( 'b' )));
CPPUNIT_ASSERT ( res == expectedOutput );
}
void LZ77Test::testLZ77_2 ( ) {
std::string testStr = "aacaacabcabaaac";
string::String string = string::stringFrom ( testStr );
std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > res = string::compress::LZ77::compress ( string, 6, 4 );
std::vector < std::tuple < unsigned int, unsigned int, DefaultSymbolType > > expectedOutput;
expectedOutput.push_back(std::make_tuple(0, 0, DefaultSymbolType ( 'a' )));
expectedOutput.push_back(std::make_tuple(1, 1, DefaultSymbolType ( 'c' )));
expectedOutput.push_back(std::make_tuple(3, 4, DefaultSymbolType ( 'b' )));
expectedOutput.push_back(std::make_tuple(3, 3, DefaultSymbolType ( 'a' )));
expectedOutput.push_back(std::make_tuple(1, 2, DefaultSymbolType ( 'c' )));
CPPUNIT_ASSERT ( res == expectedOutput );
}
#ifndef LZ77_TEST
#define LZ77_TEST
#include <cppunit/extensions/HelperMacros.h>
class LZ77Test : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE ( LZ77Test );
CPPUNIT_TEST ( testLZ77_1 );
CPPUNIT_TEST ( testLZ77_2 );
CPPUNIT_TEST_SUITE_END ( );
public:
void setUp ( );
void tearDown ( );
void testLZ77_1 ( );
void testLZ77_2 ( );
};
#endif // LZ77_TEST
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment