Created
January 18, 2020 18:30
-
-
Save odzhan/77280db940c42f73ced7b6dd6e9055f3 to your computer and use it in GitHub Desktop.
ZX7 compressor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define MAX_OFFSET 2176 /* range 1..2176 */ | |
#define MAX_LEN 65536 /* range 2..65536 */ | |
typedef struct match_t { | |
size_t index; | |
struct match_t *next; | |
} Match; | |
typedef struct optimal_t { | |
size_t bits; | |
int offset; | |
int len; | |
} Optimal; | |
Optimal *optimize(unsigned char *input_data, size_t input_size); | |
int elias_gamma_bits(int value) { | |
int bits; | |
bits = 1; | |
while (value > 1) { | |
bits += 2; | |
value >>= 1; | |
} | |
return bits; | |
} | |
int count_bits(int offset, int len) { | |
return 1 + (offset > 128 ? 12 : 8) + elias_gamma_bits(len-1); | |
} | |
Optimal* optimize(unsigned char *input_data, size_t input_size) { | |
size_t *min; | |
size_t *max; | |
Match *matches; | |
Match *match_slots; | |
Optimal *optimal; | |
Match *match; | |
int match_index; | |
int offset; | |
size_t len; | |
size_t best_len; | |
size_t bits; | |
size_t i; | |
/* allocate all data structures at once */ | |
min = (size_t *)calloc(MAX_OFFSET+1, sizeof(size_t)); | |
max = (size_t *)calloc(MAX_OFFSET+1, sizeof(size_t)); | |
matches = (Match *)calloc(256*256, sizeof(Match)); | |
match_slots = (Match *)calloc(input_size, sizeof(Match)); | |
optimal = (Optimal *)calloc(input_size, sizeof(Optimal)); | |
if (!min || !max || !matches || !match_slots || !optimal) { | |
fprintf(stderr, "Error: Insufficient memory\n"); | |
exit(1); | |
} | |
/* first byte is always literal */ | |
optimal[0].bits = 8; | |
/* process remaining bytes */ | |
for (i = 1; i < input_size; i++) { | |
optimal[i].bits = optimal[i-1].bits + 9; | |
match_index = input_data[i-1] << 8 | input_data[i]; | |
best_len = 1; | |
for (match = &matches[match_index]; match->next != NULL && best_len < MAX_LEN; match = match->next) { | |
offset = i - match->next->index; | |
if (offset > MAX_OFFSET) { | |
match->next = NULL; | |
break; | |
} | |
for (len = 2; len <= MAX_LEN; len++) { | |
if (len > best_len) { | |
best_len = len; | |
bits = optimal[i-len].bits + count_bits(offset, len); | |
if (optimal[i].bits > bits) { | |
optimal[i].bits = bits; | |
optimal[i].offset = offset; | |
optimal[i].len = len; | |
} | |
} else if (i+1 == max[offset]+len && max[offset] != 0) { | |
len = i-min[offset]; | |
if (len > best_len) { | |
len = best_len; | |
} | |
} | |
if (i < offset+len || input_data[i-len] != input_data[i-len-offset]) { | |
break; | |
} | |
} | |
min[offset] = i+1-len; | |
max[offset] = i; | |
} | |
match_slots[i].index = i; | |
match_slots[i].next = matches[match_index].next; | |
matches[match_index].next = &match_slots[i]; | |
} | |
/* save time by releasing the largest block only, the O.S. will clean everything else later */ | |
free(match_slots); | |
return optimal; | |
} | |
uint32_t zx7_compress(Optimal *optimal, unsigned char *input_data, size_t input_size); | |
unsigned char* output_data; | |
size_t output_index; | |
size_t bit_index; | |
int bit_mask; | |
void write_byte(int value) { | |
output_data[output_index++] = value; | |
} | |
void write_bit(int value) { | |
if (bit_mask == 0) { | |
bit_mask = 128; | |
bit_index = output_index; | |
write_byte(0); | |
} | |
if (value > 0) { | |
output_data[bit_index] |= bit_mask; | |
} | |
bit_mask >>= 1; | |
} | |
void write_elias_gamma(int value) { | |
int i; | |
for (i = 2; i <= value; i <<= 1) { | |
write_bit(0); | |
} | |
while ((i >>= 1) > 0) { | |
write_bit(value & i); | |
} | |
} | |
uint32_t zx7_compress(Optimal *optimal, unsigned char *input_data, size_t input_size) { | |
size_t input_index; | |
size_t input_prev, outlen; | |
int offset1; | |
int mask; | |
int i; | |
/* calculate and allocate output buffer */ | |
input_index = input_size-1; | |
outlen = (optimal[input_index].bits+18+7)/8; | |
output_data = (unsigned char *)malloc(outlen); | |
if (!output_data) { | |
fprintf(stderr, "Error: Insufficient memory\n"); | |
exit(1); | |
} | |
/* un-reverse optimal sequence */ | |
optimal[input_index].bits = 0; | |
while (input_index > 0) { | |
input_prev = input_index - (optimal[input_index].len > 0 ? optimal[input_index].len : 1); | |
optimal[input_prev].bits = input_index; | |
input_index = input_prev; | |
} | |
output_index = 0; | |
bit_mask = 0; | |
/* first byte is always literal */ | |
write_byte(input_data[0]); | |
/* process remaining bytes */ | |
while ((input_index = optimal[input_index].bits) > 0) { | |
if (optimal[input_index].len == 0) { | |
/* literal indicator */ | |
write_bit(0); | |
/* literal value */ | |
write_byte(input_data[input_index]); | |
} else { | |
/* sequence indicator */ | |
write_bit(1); | |
/* sequence length */ | |
write_elias_gamma(optimal[input_index].len-1); | |
/* sequence offset */ | |
offset1 = optimal[input_index].offset-1; | |
if (offset1 < 128) { | |
write_byte(offset1); | |
} else { | |
offset1 -= 128; | |
write_byte((offset1 & 127) | 128); | |
for (mask = 1024; mask > 127; mask >>= 1) { | |
write_bit(offset1 & mask); | |
} | |
} | |
} | |
} | |
/* sequence indicator */ | |
write_bit(1); | |
/* end marker > MAX_LEN */ | |
for (i = 0; i < 16; i++) { | |
write_bit(0); | |
} | |
write_bit(1); | |
return outlen; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment