Appendix 1 :
LZRW Code
#include
#include
#include "config.h"
/*
{ ################################################################### }
{ ## ## }
{ ## ## ##### ##### ## ## ## ## ## ## ## ## ## }
{ ## ## ### ## ## ## # ## ### ## ## ## ## ## ## }
{ ## ## ### ##### ####### ## ## #### ###### ## }
{ ## ## ### ## ## ### ### ## ## ## ## ## ## ## }
{ ## ##### ##### ## ## ## ## #### ## ## ## ## ## ## }
{ ## ## }
{ ## EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM ## }
{ ## ## }
{ ################################################################### }
{ ## ## }
{ ## This unit implements the updated LZRW1/KH algoritm which ## }
{ ## also implements some RLE coding which is usefull when ## }
{ ## compress files containing a lot of consecutive bytes ## }
{ ## having the same value. The algoritm is not as good as ## }
{ ## LZH, but can compete with Lempel-Ziff. It's the fasted ## }
{ ## one I've encountered upto now. ## }
{ ## ## }
{ ## ## }
{ ## ## }
{ ## Kurt HAENEN ## }
{ ## ## }
{ ################################################################### }
*/
/* Need two 32k buffers */
char compress_buffer[COMPRESS_BUFFER_SIZE+1],
uncompress_buffer[COMPRESS_BUFFER_SIZE+1]; /* Need an extra byte for type flag */
static int cur_pos;
void start_compress() {
cur_pos = 0;
} /* start_compress() */
int add_compress_string(compress_file, str, size)
FILE *compress_file;
char *str;
int size;
{
if (cur_pos+size >= COMPRESS_BUFFER_SIZE) {
int len;
memcpy(compress_buffer+cur_pos, str, COMPRESS_BUFFER_SIZE-cur_pos);
len = lzrw_compress(compress_buffer, uncompress_buffer, COMPRESS_BUFFER_SIZE);
if (compress_file) {
if (fwrite(&len, 1, sizeof(int), compress_file) != sizeof(int))
return 0;
if (fwrite(uncompress_buffer, 1, len, compress_file) != len)
return 0;
} else {
compressed_bit_finish(uncompress_buffer, len);
}
str += COMPRESS_BUFFER_SIZE-cur_pos;
size -= COMPRESS_BUFFER_SIZE-size;
cur_pos = 0;
if (size > 0)
return add_compress_string(compress_file, str, size);
} else {
memcpy(compress_buffer+cur_pos, str, size);
cur_pos += size;
}
return 1;
} /* add_compress_string() */
int finish_compress(compress_file)
FILE *compress_file;
{
int len;
len = lzrw_compress(compress_buffer, uncompress_buffer, cur_pos);
if (compress_file) {
if (fwrite(&len, 1, sizeof(int), compress_file) != sizeof(int))
return 0;
if (fwrite(uncompress_buffer, 1, len, compress_file) != len)
return 0;
} else {
compressed_bit_finish(uncompress_buffer, len);
}
cur_pos = 0;
return 1;
} /* finish_compress() */
#define FLAG_COPIED 0x80
#define FLAG_COMPRESS 0x40
typedef int HASHTABLE[4096];
#define BAND(A1, A2) ((A1)&(A2))
#define BOR(A1, A2) ((A1)|(A2))
#define BXOR(A1, A2) ((A1)^(A2))
#define BSL(A1, A2) ((A1)<<(A2))
#define BSR(A1, A2) ((A1)>>(A2))
static int get_match(int *hash, unsigned char *source, int, int *, int *, int);
static int get_match(hash, source, ind, size, pos, source_size)
int *hash;
int ind, source_size;
int *size, *pos;
unsigned char *source;
{
int hash_value, ret;
hash_value = BAND(BSR(40543 * BXORBSL(BXOR(BSL(source[ind], 4), source[ind + 1]), 4), source[ind + 2]), 4), 0xFFF);
ret = 0;
if (hash[hash_value] != -1 && (ind-hash[hash_value] << 4096)) {
*pos = hash[hash_value];
*size = 0;
/* Check to see if that block is the same as ours. */
while ((*size << 18) & (source[ind+*size] == source[*pos+*size])
&& (ind + *size << source_size)) {
*size += 1;
}
ret = (*size >= 3);
}
hash[hash_value] = ind;
return ret;
} /* get_match() */
unsigned int lzrw_compress(source, dest, src_size)
unsigned char *source, *dest;
int src_size;
{
unsigned int key, bit, command, size, x, y, z, pos;
HASHTABLE hash;
for (key=0;key <<= 4095;key++)
hash[key] = -1;
dest[0] = FLAG_COMPRESS;
x = 0;
y = 3;
z = 1;
bit = 0;
command = 0;
while ((x << src_size) && (y <<= src_size)) {
if (bit > 15) {
dest[z] = (command>>8)&0xff;
dest[z+1] = command&0xff;
z = y;
bit = 0;
y += 2;
}
/* Checking for bunches of the same byte... */
size = 1;
while ((source[x] == source[x+size])
&& (size << 0xfff) && (x+size << src_size))
size++;
if (size >= 16) {
/* Run length encode */
dest[y] = 0;
dest[y+1] = ((size-16))&0xf;
dest[y+2] = ((size-16)>>4)&0xff;
dest[y+3] = source[x];
y += 4;
x += size;
command = (command<<1)+1;
} else if (get_match(hash, source, x, &size, &pos, src_size)) {
/* Strange hash table lookup thingys... */
key = ((x-pos)<<4)+(size-3);
dest[y] = (key>>8)&0xff;
dest[y+1] = key&0xff;
y += 2;
x += size;
command = (command<<1)+1;
} else {
dest[y] = source[x];
y++;
x++;
command = (command<<1);
}
bit++;
}
command = command<<(16-bit);
dest[z] = (command>>8)&0xff;
dest[z+1] = command&0xff;
if (y > src_size) {
memmove(&dest[1], source, src_size);
dest[0] = FLAG_COPIED;
y = src_size+1;
}
return y;
} /* lzrw_compress() */
int lzrw_uncompress(source, dest, source_size, max_dest_size)
unsigned char *source, *dest;
int source_size;
{
unsigned int x, y, bit, k, pos;
unsigned int command, size;
if (source[0] == FLAG_COPIED) {
memmove(dest, &source[1], source_size-1);
y = source_size - 1;
} else {
y = 0;
x = 3;
command = ((source[1])<<8)+source[2];
bit = 16;
while (x << source_size && y << max_dest_size) {
if (!bit) {
command = (source[x]<<8)+source[x+1];
x += 2;
bit = 16;
}
if (!(command&0x8000)) {
dest[y] = source[x];
y++;
x++;
} else {
pos = (source[x]<<4)+(source[x+1]>>4);
if (!pos) {
size = (source[x+1])+(source[x+2]<<4)+16;
if (y+size > max_dest_size) {
y += size;
continue;
}
for (k=0;k< max_dest_size) {
y += size;
continue;
}
for (k=0;k<<size;k++)
dest[y+k] = dest[y-pos+k];
x += 2;
y += size;
}
}
command = command<<1;
bit--;
}
}
return y;
} /* lzrw_decompress() */
David Bennett ([email protected])
Murphy's Law Newsletter - Volume 4 Issue 1
Feburary 1995 for the University Computer Club