Skip to content

Instantly share code, notes, and snippets.

@odzhan
Created January 18, 2020 18:30
Show Gist options
  • Save odzhan/77280db940c42f73ced7b6dd6e9055f3 to your computer and use it in GitHub Desktop.
Save odzhan/77280db940c42f73ced7b6dd6e9055f3 to your computer and use it in GitHub Desktop.
ZX7 compressor
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_OFFSET 2176 /* range 1..2176 */
#define MAX_LEN 65536 /* range 2..65536 */
typedef struct match_t {
size_t index;
struct match_t *next;
} Match;
typedef struct optimal_t {
size_t bits;
int offset;
int len;
} Optimal;
Optimal *optimize(unsigned char *input_data, size_t input_size);
int elias_gamma_bits(int value) {
int bits;
bits = 1;
while (value > 1) {
bits += 2;
value >>= 1;
}
return bits;
}
int count_bits(int offset, int len) {
return 1 + (offset > 128 ? 12 : 8) + elias_gamma_bits(len-1);
}
Optimal* optimize(unsigned char *input_data, size_t input_size) {
size_t *min;
size_t *max;
Match *matches;
Match *match_slots;
Optimal *optimal;
Match *match;
int match_index;
int offset;
size_t len;
size_t best_len;
size_t bits;
size_t i;
/* allocate all data structures at once */
min = (size_t *)calloc(MAX_OFFSET+1, sizeof(size_t));
max = (size_t *)calloc(MAX_OFFSET+1, sizeof(size_t));
matches = (Match *)calloc(256*256, sizeof(Match));
match_slots = (Match *)calloc(input_size, sizeof(Match));
optimal = (Optimal *)calloc(input_size, sizeof(Optimal));
if (!min || !max || !matches || !match_slots || !optimal) {
fprintf(stderr, "Error: Insufficient memory\n");
exit(1);
}
/* first byte is always literal */
optimal[0].bits = 8;
/* process remaining bytes */
for (i = 1; i < input_size; i++) {
optimal[i].bits = optimal[i-1].bits + 9;
match_index = input_data[i-1] << 8 | input_data[i];
best_len = 1;
for (match = &matches[match_index]; match->next != NULL && best_len < MAX_LEN; match = match->next) {
offset = i - match->next->index;
if (offset > MAX_OFFSET) {
match->next = NULL;
break;
}
for (len = 2; len <= MAX_LEN; len++) {
if (len > best_len) {
best_len = len;
bits = optimal[i-len].bits + count_bits(offset, len);
if (optimal[i].bits > bits) {
optimal[i].bits = bits;
optimal[i].offset = offset;
optimal[i].len = len;
}
} else if (i+1 == max[offset]+len && max[offset] != 0) {
len = i-min[offset];
if (len > best_len) {
len = best_len;
}
}
if (i < offset+len || input_data[i-len] != input_data[i-len-offset]) {
break;
}
}
min[offset] = i+1-len;
max[offset] = i;
}
match_slots[i].index = i;
match_slots[i].next = matches[match_index].next;
matches[match_index].next = &match_slots[i];
}
/* save time by releasing the largest block only, the O.S. will clean everything else later */
free(match_slots);
return optimal;
}
uint32_t zx7_compress(Optimal *optimal, unsigned char *input_data, size_t input_size);
unsigned char* output_data;
size_t output_index;
size_t bit_index;
int bit_mask;
void write_byte(int value) {
output_data[output_index++] = value;
}
void write_bit(int value) {
if (bit_mask == 0) {
bit_mask = 128;
bit_index = output_index;
write_byte(0);
}
if (value > 0) {
output_data[bit_index] |= bit_mask;
}
bit_mask >>= 1;
}
void write_elias_gamma(int value) {
int i;
for (i = 2; i <= value; i <<= 1) {
write_bit(0);
}
while ((i >>= 1) > 0) {
write_bit(value & i);
}
}
uint32_t zx7_compress(Optimal *optimal, unsigned char *input_data, size_t input_size) {
size_t input_index;
size_t input_prev, outlen;
int offset1;
int mask;
int i;
/* calculate and allocate output buffer */
input_index = input_size-1;
outlen = (optimal[input_index].bits+18+7)/8;
output_data = (unsigned char *)malloc(outlen);
if (!output_data) {
fprintf(stderr, "Error: Insufficient memory\n");
exit(1);
}
/* un-reverse optimal sequence */
optimal[input_index].bits = 0;
while (input_index > 0) {
input_prev = input_index - (optimal[input_index].len > 0 ? optimal[input_index].len : 1);
optimal[input_prev].bits = input_index;
input_index = input_prev;
}
output_index = 0;
bit_mask = 0;
/* first byte is always literal */
write_byte(input_data[0]);
/* process remaining bytes */
while ((input_index = optimal[input_index].bits) > 0) {
if (optimal[input_index].len == 0) {
/* literal indicator */
write_bit(0);
/* literal value */
write_byte(input_data[input_index]);
} else {
/* sequence indicator */
write_bit(1);
/* sequence length */
write_elias_gamma(optimal[input_index].len-1);
/* sequence offset */
offset1 = optimal[input_index].offset-1;
if (offset1 < 128) {
write_byte(offset1);
} else {
offset1 -= 128;
write_byte((offset1 & 127) | 128);
for (mask = 1024; mask > 127; mask >>= 1) {
write_bit(offset1 & mask);
}
}
}
}
/* sequence indicator */
write_bit(1);
/* end marker > MAX_LEN */
for (i = 0; i < 16; i++) {
write_bit(0);
}
write_bit(1);
return outlen;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment