Last active
April 17, 2018 23:28
-
-
Save vurtun/2f756222c677e03adae7e88974c9b2ef to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <limits.h> | |
#include <sys/time.h> | |
#include <unistd.h> | |
#include <time.h> | |
/* =============================================================== | |
* UTIL | |
* ===============================================================*/ | |
#define cast(t,p) ((t)(p)) | |
#define flag(n) ((1u)<<(n)) | |
#define min(a,b) ((a)<(b)?(a):(b)) | |
#define max(a,b) ((a)>(b)?(a):(b)) | |
#define cntof(a) ((int)(sizeof(a)/sizeof((a)[0]))) | |
#define BITS_PER_BYTE 8 | |
#define BITS_PER_LONG (sizeof(long)*BITS_PER_BYTE) | |
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) | |
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) | |
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(unsigned long)) | |
#define BITMAP_LAST_WORD_MASK(nbits)(((nbits) % BITS_PER_LONG) ? (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL) | |
typedef unsigned char uchar; | |
typedef unsigned short ushort; | |
struct file_contents { | |
unsigned valid:1; | |
int size; | |
uchar *data; | |
}; | |
static struct file_contents | |
read_file_content(const char *file) | |
{ | |
FILE *fp = 0; | |
struct file_contents res; | |
memset(&res, 0, sizeof(res)); | |
fp = fopen(file, "rb"); | |
if (!fp) return res; | |
res.valid = 1; | |
fseek(fp, 0, SEEK_END); | |
res.size = (int)ftell(fp); | |
fseek(fp, 0, SEEK_SET); | |
res.data = calloc(1,(size_t)res.size); | |
res.size = (int)fread(res.data, 1,(size_t)res.size, fp); | |
fclose(fp); | |
return res; | |
} | |
static long | |
timestamp(void) | |
{ | |
struct timeval tv; | |
if (gettimeofday(&tv, NULL) < 0) return 0; | |
return (long)((long)tv.tv_sec * 1000 + (long)tv.tv_usec/1000); | |
} | |
static void | |
bitmap_shift_left(unsigned long *dst, const unsigned long *src, | |
unsigned int shift, unsigned int nbits) | |
{ | |
int k; | |
unsigned int lim = BITS_TO_LONGS(nbits); | |
unsigned int off = shift/BITS_PER_LONG; | |
unsigned int rem = shift % BITS_PER_LONG; | |
for (k = lim - off - 1; k >= 0; --k) { | |
unsigned long upper, lower; | |
if (rem && k > 0) | |
lower = src[k - 1] >> (BITS_PER_LONG - rem); | |
else lower = 0; | |
upper = src[k] << rem; | |
dst[k + off] = lower | upper; | |
} | |
if (off) | |
memset(dst, 0, off*sizeof(unsigned long)); | |
} | |
static void | |
bitmap_shift_right(unsigned long *dst, const unsigned long *src, | |
unsigned shift, unsigned nbits) | |
{ | |
unsigned k, lim = BITS_TO_LONGS(nbits); | |
unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; | |
unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); | |
for (k = 0; off + k < lim; ++k) { | |
unsigned long upper, lower; | |
if (!rem || off + k + 1 >= lim) | |
upper = 0; | |
else { | |
upper = src[off + k + 1]; | |
if (off + k + 1 == lim - 1) | |
upper &= mask; | |
upper <<= (BITS_PER_LONG - rem); | |
} | |
lower = src[off + k]; | |
if (off + k == lim - 1) | |
lower &= mask; | |
lower >>= rem; | |
dst[k] = lower | upper; | |
} | |
if (off) | |
memset(&dst[lim - off], 0, off*sizeof(unsigned long)); | |
} | |
static char* | |
bin(char *b, const uchar *c, int cnt, unsigned int bitcnt) | |
{ | |
int i; | |
unsigned long tmp[256]; | |
assert(sizeof(tmp) > bitcnt/BITS_PER_BYTE); | |
memcpy(tmp, c, bitcnt/BITS_PER_BYTE); | |
for (i = 0; i < cnt; ++i) { | |
b[i] = (tmp[0] & 0x01) ? '1': '0'; | |
bitmap_shift_right(tmp, tmp, 1, bitcnt); | |
} b[i] = '\0'; | |
return b; | |
} | |
/* =============================================================== | |
* PROFILER | |
* ===============================================================*/ | |
#define PROFILE_MAP(STAT)\ | |
STAT(COMPRESS)\ | |
STAT(UNCOMPRESS) | |
enum profile_type { | |
#define STAT(a) PROFILE_##a, | |
PROFILE_MAP(STAT) | |
#undef STAT | |
PROFILE_COUNT | |
}; | |
static const char *stat_group_name[] = { | |
#define STAT(a) #a, | |
PROFILE_MAP(STAT) | |
#undef STAT | |
}; | |
enum profile_flag { | |
PROFILE_FLAG_COMPRESS = flag(PROFILE_COMPRESS), | |
PROFILE_FLAG_UNCOMPRESS = flag(PROFILE_UNCOMPRESS) | |
}; | |
struct stat_group { | |
long time, begin, end; | |
int compressed; | |
int uncompressed; | |
}; | |
struct profiler { | |
unsigned flags; | |
enum profile_type cur; | |
struct stat_group grps[PROFILE_COUNT]; | |
}; | |
static double | |
percent(int num, int den) | |
{ | |
double res = 0.0f; | |
if (den != 0) | |
res = (double)num/(double)den; | |
return 100.0*res; | |
} | |
static void | |
profiler_begin(struct profiler *p, | |
enum profile_type type, int size) | |
{ | |
struct stat_group *sg; | |
sg = p->grps + type; | |
p->cur = type; | |
p->flags |= flag(type); | |
switch (type) { | |
case PROFILE_COMPRESS: | |
sg->uncompressed = size; | |
break; | |
case PROFILE_UNCOMPRESS: | |
sg->compressed = size; | |
break; | |
} sg->begin = timestamp(); | |
} | |
static void | |
profiler_end(struct profiler *p, int size) | |
{ | |
struct stat_group *sg; | |
sg = p->grps + p->cur; | |
sg->end = timestamp(); | |
sg->time = sg->end - sg->begin; | |
switch (p->cur) { | |
case PROFILE_COMPRESS: | |
sg->compressed = size; | |
break; | |
case PROFILE_UNCOMPRESS: | |
sg->uncompressed = size; | |
break; | |
} | |
} | |
static void | |
profiler_print(FILE *fp, const struct profiler *p) | |
{ | |
int i = 0; | |
fprintf(fp, "Profiler:\n"); | |
for (i = 0; i < PROFILE_COUNT; ++i) { | |
const struct stat_group *grp = p->grps + i; | |
if (!(p->flags & flag(i))) continue; | |
fprintf(fp, "\t%s:\n", stat_group_name[i]); | |
fprintf(fp, "\t\ttime: %lu ms\n", grp->time); | |
fprintf(fp, "\t\tsize: %.0f -> %.0f (%.02f%%)\n", | |
(double)grp->uncompressed, | |
(double)grp->compressed, | |
percent(grp->compressed,grp->uncompressed)); | |
} | |
} | |
/* =============================================================== | |
* HUFFMAN | |
* ===============================================================*/ | |
#define HUFFMAN_BIT_COUNT 256 | |
#define MAX_HUFFMAN_CODE_SIZE (HUFFMAN_BIT_COUNT/BITS_PER_LONG) | |
struct huff_symbol; | |
struct huff_node { | |
uchar codepoint; | |
unsigned short weight; | |
const struct huff_symbol *sym; | |
struct huff_node *parent; | |
struct huff_node *left; | |
struct huff_node *right; | |
struct huff_node *next; | |
}; | |
struct huff_symbol { | |
int cnt; | |
unsigned short weight; | |
uchar codepoint; | |
}; | |
struct huff_table_entry { | |
unsigned long code[MAX_HUFFMAN_CODE_SIZE]; | |
uchar bitcnt; | |
uchar symbol; | |
}; | |
static int | |
huf_compressed_size(int size) | |
{ | |
return max(size*2, size + 512*3+3); | |
} | |
static int | |
huf_symbol_compare(const void *p1, const void *p2) | |
{ | |
const struct huff_symbol *a = p1; | |
const struct huff_symbol *b = p2; | |
return (a->weight - b->weight); | |
} | |
static int | |
huf_compress(struct profiler *p, | |
uchar *out, const uchar *in, int size) | |
{ | |
int i = 0; | |
int node_count = 0; | |
int node_index = 0; | |
int leaf_index = 1; | |
uchar *marker = 0; | |
uchar *o = out; | |
struct huff_node *head = 0; | |
struct huff_node *tail = 0; | |
struct huff_node *root = 0; | |
#define MAX_HUFF_TABLE_COUNT 256 | |
#define MAX_HUFF_NODE_COUNT (MAX_HUFF_TABLE_COUNT*2) | |
struct huff_node nodes[MAX_HUFF_NODE_COUNT]; | |
struct huff_symbol syms[MAX_HUFF_TABLE_COUNT]; | |
struct huff_table_entry htbl[MAX_HUFF_TABLE_COUNT]; | |
/* I.) initialize table */ | |
for (i = 0; i < cntof(htbl); ++i) { | |
memset(&htbl[i].code, 0, sizeof(htbl[i].code)); | |
htbl[i].symbol = (uchar)i; | |
htbl[i].bitcnt = 0; | |
syms[i].codepoint = (uchar)i; | |
syms[i].weight = 0; | |
syms[i].cnt = 0; | |
} | |
/* initialize symbols */ | |
for (i = 0; i < size; ++i) | |
syms[in[i]].cnt++; | |
for (i = 0; i < cntof(syms); ++i) { | |
double weight = 0; | |
weight = (double)syms[i].cnt/(double)size; | |
syms[i].weight = (unsigned short)(weight*SHRT_MAX); | |
} qsort(syms, cntof(syms), sizeof(struct huff_symbol), huf_symbol_compare); | |
node_count = cntof(nodes); | |
for (i = 0; i < cntof(syms); ++i) | |
if (syms[i].cnt) break; | |
/* insert sorted symbols directly into first node heap */ | |
node_count = cntof(syms) - i; | |
for (node_index = 1; node_index <= node_count; ++node_index) { | |
struct huff_node *n = nodes + node_index; | |
n->sym = syms + i + node_index - 1; | |
n->codepoint = n->sym->codepoint; | |
n->weight = n->sym->weight; | |
n->parent = 0; | |
n->left = 0; | |
n->right = 0; | |
n->next = 0; | |
} | |
/* II.) generate tree */ | |
while (leaf_index <= node_count || tail) { | |
struct huff_node *first = 0; | |
struct huff_node *second = 0; | |
struct huff_node *parent = 0; | |
/* get first node with lowest weight */ | |
struct huff_node *leaf = 0; | |
struct huff_node *inner = 0; | |
if (tail) inner = tail; | |
if (leaf_index <= node_count) | |
leaf = nodes + leaf_index; | |
first = (!inner) ? leaf: (!leaf) ? inner: (inner->weight < leaf->weight) ? inner: leaf; | |
if (first == inner) { | |
tail = tail->next; | |
if (first == head) | |
head = 0; | |
} else leaf_index++; | |
leaf = 0; inner = 0; | |
if (tail) inner = tail; | |
if (leaf_index <= node_count) | |
leaf = nodes + leaf_index; | |
second = (!inner) ? leaf: (!leaf) ? inner: (inner->weight < leaf->weight) ? inner: leaf; | |
if (!second) { | |
/* reached root element */ | |
assert(!first->sym); | |
nodes[0] = *first; | |
root = first; | |
first->parent = 0; | |
break; | |
} else if (second == inner) { | |
tail = tail->next; | |
if (second == head) | |
head = 0; | |
} else leaf_index++; | |
/* create parent node from both nodes */ | |
parent = nodes + node_index++; | |
parent->left = first; | |
parent->right = second; | |
parent->weight = first->weight + second->weight; | |
parent->parent = 0; | |
parent->sym = 0; | |
parent->next = 0; | |
/* insert parent node at end of queue */ | |
if (!head) { | |
head = parent; | |
tail = parent; | |
} else { | |
head->next = parent; | |
head = parent; | |
} | |
/* set node as parent */ | |
first->parent = parent; | |
second->parent = parent; | |
} | |
/* III.) generate table from tree */ | |
{int si = 0; | |
for (si = 0; si < node_count; ++si) { | |
const struct huff_node *node = nodes + si + 1; | |
const struct huff_symbol *sym = node->sym; | |
/* init table entry */ | |
struct huff_table_entry *entry = htbl + sym->codepoint; | |
entry->symbol = sym->codepoint; | |
/* walk from leaf to root and get bitcode and its length*/ | |
{const struct huff_node *prev = node; | |
const struct huff_node *iter = node->parent; | |
while (iter) { | |
bitmap_shift_left(entry->code, entry->code, 1, HUFFMAN_BIT_COUNT); | |
if (iter->left == prev) | |
entry->code[0] |= 0x01u; | |
else entry->code[0] &= ~0x01u; | |
entry->bitcnt++; | |
prev = iter; | |
iter = iter->parent; | |
}} | |
}} | |
/* write out number of nodes */ | |
node_index-=1; | |
memcpy(out, &node_index, 2); | |
out += 2; | |
marker = out; | |
out++; | |
/* III.) Save tree to memory */ | |
{int qhead = 0, qtail = 1; | |
struct huff_node *que[MAX_HUFF_NODE_COUNT+1]; | |
que[qtail] = root; | |
while (qhead < qtail) { | |
struct huff_node *n = que[++qhead]; | |
*out++ = n->codepoint; | |
node_index--; | |
assert(node_index >= 0); | |
if (n->left) { | |
que[++qtail] = n->left; | |
assert((uchar)(qtail - qhead) == (qtail - qhead)); | |
*out++ = (uchar)(qtail - qhead); | |
} else *out++ = 0; | |
if (n->right) { | |
que[++qtail] = n->right; | |
assert((uchar)(qtail - qhead) == (qtail - qhead)); | |
*out++ = (uchar)(qtail - qhead); | |
} else *out++ = 0; | |
}} | |
/* IV.) Compression */ | |
{int si, bitoff = 0; | |
for (si = 0; si < size; ++si) { | |
unsigned long mask[MAX_HUFFMAN_CODE_SIZE]; | |
unsigned char sym = *((const unsigned char*)in + si); | |
struct huff_table_entry *entry = &htbl[sym]; | |
if (bitoff + entry->bitcnt < BITS_PER_BYTE) { | |
/* symbol still fits into byte */ | |
bitmap_shift_left(mask, entry->code, bitoff, HUFFMAN_BIT_COUNT); | |
*out |= ((uchar*)mask)[0]; | |
bitoff += entry->bitcnt; | |
} else { | |
/* symbol across byte boundaries */ | |
int remain = entry->bitcnt; | |
if (bitoff) { | |
bitmap_shift_left(mask, entry->code, bitoff, HUFFMAN_BIT_COUNT); | |
remain = entry->bitcnt - (BITS_PER_BYTE - bitoff); | |
*out |= ((uchar*)mask)[0]; | |
out++; | |
bitmap_shift_right(mask, entry->code, BITS_PER_BYTE-bitoff, HUFFMAN_BIT_COUNT); | |
} else memcpy(mask, entry->code, sizeof(entry->code)); | |
bitoff = remain % BITS_PER_BYTE; | |
memcpy(out, mask, (size_t)(remain / BITS_PER_BYTE)); | |
out += remain / BITS_PER_BYTE; | |
if (bitoff) *out = ((uchar*)mask)[remain/BITS_PER_BYTE]; | |
} | |
} *marker = (uchar)bitoff;} | |
*out = 0; | |
return (int)(out-o); | |
} | |
static int | |
huf_decompress(struct profiler *p, | |
uchar *out, const uchar *in, int size) | |
{ | |
const uchar *e = in + size, *o = out; | |
unsigned short ncnt = 0; | |
const uchar *tree = 0; | |
const uchar *iter = 0; | |
uchar end_bits = 0; | |
size_t bitoff = 0; | |
memcpy(&ncnt, in, 2); | |
end_bits = *(in+2); | |
tree = in + 3; | |
in = tree + 3*ncnt; | |
iter = tree; | |
while (in <= e) { | |
int bit = *in & (1 << bitoff); | |
retry: | |
if (bit) { | |
if (!iter[1]) { | |
*out++ = iter[0]; | |
iter = tree; | |
goto retry; | |
} else iter = iter + 3*iter[1]; | |
} else { | |
if (!iter[2]) { | |
*out++ = iter[0]; | |
iter = tree; | |
goto retry; | |
} else iter = iter + 3*iter[2]; | |
} | |
bitoff++; | |
in += bitoff / BITS_PER_BYTE; | |
bitoff = bitoff % BITS_PER_BYTE; | |
if (in == e && bitoff == end_bits+1) | |
break; | |
} return (int)(out-o); | |
} | |
/* =============================================================== | |
* RLE | |
* ===============================================================*/ | |
static int | |
rle_compressed_size(int size) | |
{ | |
return size*2; | |
} | |
static int | |
rle_compress(struct profiler *p, uchar *out, const uchar *in, int size) | |
{ | |
uchar lit[128], litcnt = 0, i; | |
const uchar *e = in + size; | |
uchar *o = out; | |
while (in <= e) { | |
int cnt = 1; | |
while (in+cnt < e && in[cnt] == *in && cnt < cntof(lit)-1) | |
cnt++; | |
if (cnt > 1 || litcnt >= cntof(lit) || in == e) { | |
/* flush literals */ | |
*o++ = (uchar)litcnt | flag(7); | |
for (i = 0; i < litcnt; ++i) | |
*o++ = (uchar)lit[i]; | |
o -= litcnt == 0; | |
if (in == e) break; | |
/* write duplication */ | |
*o++ = (uchar)cnt; | |
*o++ = (uchar)*in; | |
o -= (cnt < 2)*2; | |
litcnt = 0; | |
} else lit[litcnt++] = *in; | |
in += cnt; | |
} assert(in == e); | |
return (int)(o - out); | |
} | |
static int | |
rle_decompress(struct profiler *p, | |
uchar *out, const uchar *in, int size) | |
{ | |
int i = 0; | |
const uchar *e = in + size, *o = out; | |
while (in < e) { | |
uchar len = (uchar)*in++; | |
if (len & flag(7)) { | |
/* literal */ | |
len = len & ~flag(7); | |
assert(len); | |
while (len--) *out++ = *in++; | |
} else { | |
/* duplication */ | |
uchar sym = *in++; | |
assert(len && len < 128); | |
for (i = 0; i < len; ++i) | |
*out++ = sym; | |
} | |
} assert(in == e); | |
return (int)(out-o); | |
} | |
/* =============================================================== | |
* LZ | |
* ===============================================================*/ | |
static int | |
lz_compressed_size(int size) | |
{ | |
return size*2; | |
} | |
static int | |
lz_compress(struct profiler *p, uchar *out, const uchar *base_in, int size) | |
{ | |
#define MAX_WINDOW_SIZE 255 | |
#define MAX_LOOP_BACK_COUNT 255 | |
#define MAX_LITERAL_COUNT 255 | |
uchar litcnt = 0, i; | |
uchar lit[MAX_LITERAL_COUNT]; | |
const uchar *in = base_in; | |
const uchar *e = in + size; | |
uchar *o = out; | |
while (in <= e) { | |
int output_run = 0; | |
int best_run = 0, best_dist = 0; | |
const uchar *win_begin = 0; | |
int look_back = min((int)(in-base_in), MAX_LOOP_BACK_COUNT); | |
for (win_begin = in - look_back; win_begin < in; ++win_begin) { | |
/* find matching string duplication O(n^2) */ | |
int win_size = min((int)(e-win_begin), MAX_WINDOW_SIZE); | |
const uchar *win_end = win_begin + win_size; | |
const uchar *test = in; | |
const uchar *win = win_begin; | |
int test_run = 0; | |
while ((win < win_end) && | |
(*test++ == *win++)) | |
test_run++; | |
/* remember best match */ | |
if (test_run > best_run) { | |
best_run = test_run; | |
best_dist = cast(int,in - win_begin); | |
} | |
} | |
output_run = (litcnt) ? (best_run > 4): (best_run > 2); | |
if (output_run || litcnt >= cntof(lit) || in == e) { | |
if (litcnt) { | |
/* flush literals */ | |
assert((uchar)litcnt == litcnt); | |
*o++ = (uchar)litcnt; | |
*o++ = (uchar)0; | |
for (i = 0; i < litcnt; ++i) | |
*o++ = (uchar)lit[i]; | |
litcnt = 0; | |
} | |
if (in >= e) break; | |
if (output_run) { | |
/* write string duplication */ | |
assert((uchar)best_run == best_run); | |
assert((uchar)best_dist == best_dist); | |
*o++ = (uchar)best_run; | |
*o++ = (uchar)best_dist; | |
in += best_run; | |
} | |
} else lit[litcnt++] = *in++; | |
} return (int)(o - out); | |
} | |
static int | |
lz_decompress(struct profiler *p, | |
uchar *out, const uchar *in, int size) | |
{ | |
int i = 0; | |
const uchar *e = in + size, *o = out; | |
while (in < e) { | |
int cnt = *in++; | |
int off = *in++; | |
const uchar *src = 0; | |
src = (!off) ? in: out - off; | |
for (i = 0; i < cnt; ++i) | |
*out++ = *src++; | |
in += (!off) * cnt; | |
} return (int)(out-o); | |
} | |
/* =============================================================== | |
* LZW | |
* ===============================================================*/ | |
struct lzw { | |
int bitcnt; | |
int bits, off; | |
}; | |
static int | |
lzw_compressed_size(int size) | |
{ | |
return size*2; | |
} | |
static uchar* | |
lzw_write(uchar *out, struct lzw *s, int code) | |
{ | |
s->bits |= code << s->off; | |
s->off += s->bitcnt; | |
while (s->off >= 8) { | |
*out++ = (uchar)(s->bits & 0xFF); | |
s->bits >>= 8; | |
s->off -= 8; | |
} return out; | |
} | |
static int | |
lzw_compress(struct profiler *p, uchar *out, const uchar *in, int size) | |
{ | |
#define LZW_HASH_SIZE 5003 | |
int htbl[LZW_HASH_SIZE]; /* {{sym,entry},...} */ | |
short codetbl[LZW_HASH_SIZE]; /* {entry,...} */ | |
short free_ent = 0x102; /* 0x100 reset dict, 0x101 eof */ | |
int maxcode = 511; | |
uchar *o = out; | |
int ent = *in++; | |
struct lzw st; | |
memset(&st, 0, sizeof(st)); | |
st.bitcnt=9; | |
o = lzw_write(o, &st, 0x100); | |
memset(htbl, 0xFF, sizeof(htbl)); | |
rn: while (--size) { | |
int c = *in++; | |
int code = (c << 12) + ent; /* {sym,entry} */ | |
int key = (c << 4) ^ ent; /* hash */ | |
while (htbl[key] >= 0) { | |
if (htbl[key] == code) { | |
ent = codetbl[key]; | |
goto rn; | |
} | |
++key; | |
key = key >= LZW_HASH_SIZE ? key - LZW_HASH_SIZE : key; | |
} o = lzw_write(o, &st, ent); | |
ent = c; | |
if (free_ent < 4096) { | |
if(free_ent > maxcode) { | |
++st.bitcnt; | |
if (st.bitcnt == 12) | |
maxcode = 4096; | |
else maxcode = (1 << st.bitcnt)-1; | |
} | |
codetbl[key] = free_ent++; | |
htbl[key] = code; | |
} else { | |
o = lzw_write(o, &st, 0x100); | |
memset(htbl, 0xFF, sizeof(htbl)); | |
free_ent = 0x102; | |
maxcode = 511; | |
st.bitcnt = 9; | |
} | |
} | |
o = lzw_write(o, &st, ent); | |
o = lzw_write(o, &st, 0x101); | |
o = lzw_write(o, &st, 0); | |
return (int)(o-out); | |
} | |
static const uchar* | |
lzw_read(const uchar *in, struct lzw *s, int *code) | |
{ | |
if (s->off < s->bitcnt) { | |
s->bits |= ((*in++) << s->off); | |
s->bits |= ((*in++) << (s->off+8)); | |
s->off += 16; | |
} | |
*code = s->bits & ((1 << s->bitcnt)-1); | |
s->bits >>= s->bitcnt; | |
s->off -= s->bitcnt; | |
return in; | |
} | |
static int | |
lzw_decompress(struct profiler *p, uchar *out, const uchar *in, int size) | |
{ | |
const uchar *e = in + size, *o = out; | |
int dict[LZW_HASH_SIZE]; | |
short free_ent = 0x102; | |
int maxcode = 511; | |
int next = 0, last = 0; | |
struct lzw st; | |
memset(&st, 0, sizeof(st)); | |
st.bitcnt=9; | |
in = lzw_read(in, &st, &last); | |
assert(last == 0x100); /* skip */ | |
in = lzw_read(in, &st, &last); | |
while (in <= e) { | |
int code, c, ent; | |
if (last <= 0xFF) { | |
*out++ = (uchar)last; | |
} else { | |
/* unpack dict-entry */ | |
int idx = last, cnt = 1; | |
assert(free_ent > idx); | |
while (idx > 0x101) { | |
code = dict[idx]; | |
c = (code >> 12); | |
*out++ = (uchar)c; | |
idx = code & 0xFFF; | |
cnt++; | |
} *out++ = (uchar)idx; | |
/* reverse output */ | |
{uchar *s = out - cnt; | |
uchar *d = out - 1; | |
cnt >>= 1; | |
while (cnt--) { | |
uchar t = *s; /* swap */ | |
*s++ = *d; *d-- = t; | |
}} | |
} | |
/* grow bit width */ | |
if (free_ent > maxcode) { | |
++st.bitcnt; | |
if(st.bitcnt == 12) | |
maxcode = 4096; | |
else maxcode = (1<<st.bitcnt)-1; | |
} | |
/* read next entry */ | |
in = lzw_read(in, &st, &next); | |
if (next == 0x100) { | |
maxcode = 511; | |
st.bitcnt = 9; | |
free_ent = 0x102; | |
in = lzw_read(in, &st, &last); | |
continue; | |
} else if (next == 0x101) { | |
break; | |
} | |
/* find first symbol */ | |
ent = (next < free_ent) ? next: last; | |
while (ent > 0x101) { | |
code = dict[ent]; | |
ent = code & 0xFFF; | |
} | |
/* add new entry into dict */ | |
code = (ent << 12) + last; | |
assert(free_ent < LZW_HASH_SIZE); | |
dict[free_ent++] = code; | |
last = next; | |
} return (int)(out-o); | |
} | |
static int | |
sdefl_compressed_size(int size) | |
{ | |
return size*2; | |
} | |
/* =============================================================== | |
* SDEFL | |
* =============================================================== | |
* public domain - no warranty implied; use at your own risk | |
* References: | |
https://bitbucket.org/rmitton/tigr/src/be3832bee7fb2f274fe5823e38f8ec7fa94e0ce9/src/tigr_inflate.c?at=default&fileviewer=file-view-default | |
https://github.com/github/putty/blob/49fb598b0e78d09d6a2a42679ee0649df482090e/sshzlib.c | |
https://www.ietf.org/rfc/rfc1951.txt | |
*/ | |
#define SDEFL_DICT_BITS 12 | |
#define SDEFL_DICT_SIZE (1 << SDEFL_DICT_BITS) | |
#define SDEFL_MAX_OFF (1 << 15) | |
#define SDEFL_MIN_MATCH 3 | |
#define SDEFL_MAX_MATCH 258 | |
//typedef unsigned char uchar; | |
struct sdefl {int bits, off;}; | |
#define sinfl_rev16(n) ((sdefl_mirror[(n)&0xff] << 8) | sdefl_mirror[((n)>>8)&0xff]) | |
static const uchar sdefl_mirror[256] = { | |
#define R2(n) n, n + 128, n + 64, n + 192 | |
#define R4(n) R2(n), R2(n + 32), R2(n + 16), R2(n + 48) | |
#define R6(n) R4(n), R4(n + 8), R4(n + 4), R4(n + 12) | |
R6(0), R6(2), R6(1), R6(3), | |
}; | |
static int | |
sdefl_nxpow2i(int n) | |
{ | |
n--; | |
n |= n >> 1; | |
n |= n >> 2; | |
n |= n >> 4; | |
n |= n >> 8; | |
n |= n >> 16; | |
return ++n; | |
} | |
static int | |
sdefl_ilog2(int n) | |
{ | |
#define lt(n) n,n,n,n, n,n,n,n, n,n,n,n ,n,n,n,n | |
static const char tbl[256] = {-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,lt(4),lt(5), | |
lt(5),lt(6),lt(6),lt(6),lt(6),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7) | |
}; int tt, t; | |
if ((tt = (n >> 16))) | |
return (t = (tt >> 8)) ? 24+tbl[t]: 16+tbl[tt]; | |
else return (t = (n >> 8)) ? 8+tbl[t]: tbl[n]; | |
#undef lt | |
} | |
static int | |
sdefl_hash(const uchar *s) | |
{ | |
int a = s[0], b = s[1], c = s[2]; | |
int v = (a << 16) | (b << 8) | c; | |
return ((v >> (3*8 - SDEFL_DICT_BITS)) - v) & (SDEFL_DICT_SIZE-1); | |
} | |
static uchar* | |
sdefl_write(uchar *dst, struct sdefl *s, int code, int bitcnt) | |
{ | |
s->bits |= (code << s->off); | |
s->off += bitcnt; | |
while (s->off >= 8) { | |
*dst++ = (uchar)(s->bits & 0xFF); | |
s->bits >>= 8; | |
s->off -= 8; | |
} return dst; | |
} | |
static uchar* | |
sdefl_match(uchar *dst, struct sdefl *s, int dist, int len) | |
{ | |
static const short lxmin[] = {0,11,19,35,67,131}; | |
static const short dxmax[] = {0,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576}; | |
static const short lmin[] = {11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227}; | |
static const short dmin[] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257, | |
385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577}; | |
/* length encoding */ | |
int lc = len; | |
int lx = sdefl_ilog2(len - 3) - 2; | |
if (!(lx = (lx < 0) ? 0: lx)) lc += 254; | |
else if (len >= 258) lx = 0, lc = 285; | |
else lc = ((lx-1) << 2) + 265 + ((len - lxmin[lx]) >> lx); | |
if (lc <= 279) | |
dst = sdefl_write(dst, s, sdefl_mirror[(lc - 256) << 1], 7); | |
else dst = sdefl_write(dst, s, sdefl_mirror[0xc0 - 280 + lc], 8); | |
if (lx) dst = sdefl_write(dst, s, len - lmin[lc - 265], lx); | |
/* distance encoding */ | |
{int dc = dist - 1; | |
int dx = sdefl_ilog2(sdefl_nxpow2i(dist) >> 2); | |
if ((dx = (dx < 0) ? 0: dx)) | |
dc = ((dx + 1) << 1) + (dist > dxmax[dx]); | |
dst = sdefl_write(dst, s, sdefl_mirror[dc << 3], 5); | |
if (dx) dst = sdefl_write(dst, s, dist - dmin[dc], dx);} | |
return dst; | |
} | |
static uchar* | |
sdefl_lit(uchar *dst, struct sdefl *s, int c) | |
{ | |
if (c <= 143) | |
return sdefl_write(dst, s, sdefl_mirror[0x30+c], 8); | |
else return sdefl_write(dst, s, 1 + 2 * sdefl_mirror[0x90 - 144 + c], 9); | |
} | |
static int | |
sdeflate(struct profiler *p, uchar *out, const uchar *in, int size) | |
{ | |
struct sdefl st; | |
uchar *dst = out; | |
const uchar *e = in + size; | |
const uchar *dict[SDEFL_DICT_SIZE]; | |
memset(dict,0,sizeof(dict)); | |
memset(&st,0,sizeof(st)); | |
dst = sdefl_write(dst,&st,0x01,1); /* block */ | |
dst = sdefl_write(dst,&st,0x01,2); /* static huffman */ | |
while (in < e) { | |
const int h = sdefl_hash(in); | |
const uchar **ent = &dict[h & (SDEFL_DICT_SIZE-1)]; | |
const uchar *sub = *ent; *ent = in; | |
if (sub && (in > sub) && ((in-sub) < SDEFL_MAX_OFF) && | |
!memcmp(in,sub,SDEFL_MIN_MATCH)) { | |
/* match */ | |
const uchar *s = sub + SDEFL_MIN_MATCH; | |
int len, dist = (int)(in - sub); | |
in += (len = SDEFL_MIN_MATCH); | |
while ((*s == *in) && (len < SDEFL_MAX_MATCH)) | |
len++, s++, in++; | |
dst = sdefl_match(dst, &st, dist, len); | |
} else dst = sdefl_lit(dst, &st, *in++); | |
} | |
/* zlib partial flush */ | |
dst = sdefl_write(dst, &st, 0, 7); | |
dst = sdefl_write(dst, &st, 2, 10); | |
dst = sdefl_write(dst, &st, 2, 3); | |
return (int)(dst - out); | |
} | |
struct sinfl { | |
int bits, bitoff; | |
unsigned lits[288]; | |
unsigned dsts[32]; | |
unsigned lens[19]; | |
int tlit, tdist, tlen; | |
}; | |
static int | |
sinfl_read(const uchar **src, const uchar *end, | |
struct sinfl *s, int n) | |
{ | |
const uchar *in = *src; | |
int v = s->bits & ((1 << n)-1); | |
s->bits >>= n; | |
s->bitoff -= n; | |
while (s->bitoff < 16 && in < end) { | |
s->bits |= (*in++) << s->bitoff; | |
s->bitoff += 8; | |
} *src = in; | |
return v; | |
} | |
static int | |
sinfl_build(unsigned *tree, uchar *lens, int symcnt) | |
{ | |
int n, cnt[16], first[16], codes[16]; | |
memset(cnt, 0, sizeof(cnt)); | |
cnt[0] = first[0] = codes[0] = 0; | |
for (n = 0; n < symcnt; ++n) cnt[lens[n]]++; | |
for (n = 1; n <= 15; n++) { | |
codes[n] = (codes[n-1] + cnt[n-1]) << 1; | |
first[n] = first[n-1] + cnt[n-1]; | |
} | |
for (n = 0; n < symcnt; n++) { | |
int slot, code, len = lens[n]; | |
if (!len) continue; | |
code = codes[len]++; | |
slot = first[len]++; | |
tree[slot] = (unsigned)((code << (32-len)) | (n << 4) | len); | |
} return first[15]; | |
} | |
static int | |
sinfl_decode(const uchar **in, const uchar *end, | |
struct sinfl *s, unsigned *tree, int max) | |
{ | |
/* bsearch next prefix code */ | |
unsigned key, lo = 0, hi = (unsigned)max; | |
unsigned search = (unsigned)(sinfl_rev16(s->bits) << 16) | 0xffff; | |
while (lo < hi) { | |
unsigned guess = (lo + hi) / 2; | |
if (search < tree[guess]) hi = guess; | |
else lo = guess + 1; | |
} | |
/* pull out and check key */ | |
key = tree[lo-1]; | |
assert(((search^key) >> (32-(key&0xf))) == 0); | |
sinfl_read(in, end, s, key & 0x0f); | |
return (key >> 4) & 0x0fff; | |
} | |
static int | |
sinflate(struct profiler *p, uchar *out, const uchar *in, int size) | |
{ | |
static const char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; | |
static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, | |
257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577}; | |
static const uchar dbits[30+2] = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9, | |
10,10,11,11,12,12,13,13,0,0}; | |
static const short lbase[29+2] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35, | |
43,51,59,67,83,99,115,131,163,195,227,258,0,0}; | |
static const uchar lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4, | |
4,4,4,5,5,5,5,0,0,0}; | |
enum sinfl_states {hdr,stored,fixed,dyn,blk}; | |
enum sinfl_states state = hdr; | |
const uchar *e = in + size, *o = out; | |
struct sinfl s; | |
int last = 0; | |
memset(&s, 0, sizeof(s)); | |
sinfl_read(&in,e,&s,0); /* buffer input */ | |
while (in < e || s.bitoff) { | |
switch (state) { | |
case hdr: { | |
int type = 0; /* block header */ | |
last = sinfl_read(&in,e,&s,1); | |
type = sinfl_read(&in,e,&s,2); | |
switch (type) {default: return (int)(out-o); | |
case 0x00: state = stored; break; | |
case 0x01: state = fixed; break; | |
case 0x02: state = dyn; break;} | |
} break; | |
case stored: { | |
int len; /* uncompressed block */ | |
sinfl_read(&in,e,&s,s.bitoff & 7); | |
len = sinfl_read(&in,e,&s,16); | |
memcpy(out, in, (size_t)len); | |
in += len, out += len; | |
sinfl_read(&in,e,&s,16); | |
state = hdr; | |
} break; | |
case fixed: { | |
/* fixed huffman codes */ | |
int n; uchar lens[288+32]; | |
for (n = 0; n <= 143; n++) lens[n] = 8; | |
for (n = 144; n <= 255; n++) lens[n] = 9; | |
for (n = 256; n <= 279; n++) lens[n] = 7; | |
for (n = 280; n <= 287; n++) lens[n] = 8; | |
for (n = 0; n < 32; n++) lens[288+n] = 5; | |
/* build trees */ | |
s.tlit = sinfl_build(s.lits, lens, 288); | |
s.tdist = sinfl_build(s.dsts, lens + 288, 32); | |
state = blk; | |
} break; | |
case dyn: { | |
/* dynamic huffman codes */ | |
int n, i, nlit, ndist, nlen; | |
uchar nlens[19] = {0}, lens[288+32]; | |
nlit = 257 + sinfl_read(&in,e,&s,5); | |
ndist = 1 + sinfl_read(&in,e,&s,5); | |
nlen = 4 + sinfl_read(&in,e,&s,4); | |
for (n = 0; n < nlen; n++) | |
nlens[order[n]] = (uchar)sinfl_read(&in,e,&s,3); | |
s.tlen = sinfl_build(s.lens, nlens, 19); | |
/* decode code lengths */ | |
for (n = 0; n < nlit + ndist;) { | |
int sym = sinfl_decode(&in, e, &s, s.lens, s.tlen); | |
switch (sym) {default: lens[n++] = (uchar)sym; break; | |
case 16: for (i=3+sinfl_read(&in,e,&s,2);i;i--,n++) lens[n]=lens[n-1]; break; | |
case 17: for (i=3+sinfl_read(&in,e,&s,3);i;i--,n++) lens[n]=0; break; | |
case 18: for (i=11+sinfl_read(&in,e,&s,7);i;i--,n++) lens[n]=0; break;} | |
} | |
/* build lit/dist trees */ | |
s.tlit = sinfl_build(s.lits, lens, nlit); | |
s.tdist = sinfl_build(s.dsts, lens+nlit, ndist); | |
state = blk; | |
} break; | |
case blk: { | |
/* decompress block */ | |
int sym = sinfl_decode(&in, e, &s, s.lits, s.tlit); | |
if (sym > 256) {sym -= 257; /* match symbol */ | |
{int len = sinfl_read(&in, e, &s, lbits[sym]) + lbase[sym]; | |
int dsym = sinfl_decode(&in, e, &s, s.dsts, s.tdist); | |
int offs = sinfl_read(&in, e, &s, dbits[dsym]) + dbase[dsym]; | |
if (offs > (int)(out-o)) return (int)(out-o); | |
while (len--) *out = *(out-offs), out++;} | |
} else if (sym == 256) { | |
if (last) return (int)(out-o); | |
state = hdr; | |
} else *out++ = (uchar)sym; | |
} break;} | |
} return (int)(out-o); | |
} | |
/* =============================================================== | |
* APP | |
* ===============================================================*/ | |
enum compressor_type { | |
COMPRESSOR_RLE, | |
COMPRESSOR_LZ, | |
COMPRESSOR_LZW, | |
COMPRESSOR_HUF, | |
COMPRESSOR_DEFL, | |
COMPRESSOR_CNT | |
}; | |
static const | |
struct compressor { | |
enum compressor_type type; | |
const char *str; | |
int(*compressed_size)(int size); | |
int(*compress)(struct profiler *p, uchar*, const uchar*, int); | |
int(*decompress)(struct profiler *p, uchar*, const uchar*, int); | |
} comprs[] = { | |
{COMPRESSOR_RLE, "rle", rle_compressed_size, rle_compress, rle_decompress}, | |
{COMPRESSOR_LZ, "lz", lz_compressed_size, lz_compress, lz_decompress}, | |
{COMPRESSOR_LZW, "lzw", lzw_compressed_size, lzw_compress, lzw_decompress}, | |
{COMPRESSOR_HUF, "huf", huf_compressed_size, huf_compress, huf_decompress}, | |
{COMPRESSOR_DEFL, "defl", sdefl_compressed_size, sdeflate, sinflate} | |
}; | |
enum operation_type { | |
OP_COMPRESS, | |
OP_DECOMPRESS, | |
OP_TEST, | |
OP_CNT | |
}; | |
static const | |
struct operation { | |
enum operation_type type; | |
const char *str; | |
} ops[] = { | |
{OP_COMPRESS, "compress"}, | |
{OP_DECOMPRESS, "decompress"}, | |
{OP_TEST, "test"} | |
}; | |
static void | |
usage(const char *app) | |
{ | |
fprintf(stderr, "usage:\n"); | |
fprintf(stderr, "\t%s compress [rle|lz|lzw|huf|defl] [in] [out]\n", app); | |
fprintf(stderr, "\t%s decompress [rle|lz|lzw|huf|defl] [in] [out]\n", app); | |
fprintf(stderr, "\t%s test [rle|lz|lzw|huf|defl] [in]\n", app); | |
exit(-1); | |
} | |
int main(int argc, char **argv) | |
{ | |
int i = 0; | |
uchar *out = 0; | |
int size = 0; | |
struct profiler p; | |
const struct compressor *compr = 0; | |
const struct operation *op = 0; | |
/* check input arguments */ | |
struct file_contents fc; | |
memset(&p, 0, sizeof(p)); | |
if (argc < 4) { | |
fprintf(stderr, "Not enough arguments\n"); | |
usage(argv[0]); | |
} | |
/* select operation */ | |
for (i = 0; i < cntof(ops); ++i) { | |
const struct operation *o = ops + i; | |
if (!strcmp(o->str, argv[1])) | |
{op = o; break;}; | |
} | |
if (!op) { | |
fprintf(stderr, "Unkown command: %s\n", argv[1]); | |
usage(argv[0]); | |
} | |
/* select compressor */ | |
for (i = 0; i < cntof(comprs); ++i) { | |
const struct compressor *c = comprs + i; | |
if (!strcmp(c->str, argv[2])) | |
{compr = c; break;}; | |
} | |
if (!compr) { | |
fprintf(stderr, "Unkown compressor: %s\n", argv[2]); | |
usage(argv[0]); | |
} | |
/* read input file */ | |
fc = read_file_content(argv[3]); | |
if (!fc.valid) { | |
fprintf(stderr, "Failed to read file : %s\n", argv[3]); | |
usage(argv[0]); | |
} | |
/* execute operation */ | |
switch (op->type) { | |
case OP_COMPRESS: { | |
size = compr->compressed_size(fc.size); | |
out = calloc(1,(size_t)size+4); | |
memcpy(out,&fc.size,4); | |
profiler_begin(&p, PROFILE_COMPRESS, fc.size); | |
size = compr->compress(&p, out+4, fc.data, fc.size); | |
profiler_end(&p, size); | |
size += 4; | |
} break; | |
case OP_DECOMPRESS: { | |
if (fc.size < 4) { | |
printf("Invalid file %s size to small", argv[3]); | |
usage(argv[0]); | |
} | |
memcpy(&size, fc.data, 4); | |
out = calloc(1,(size_t)size); | |
profiler_begin(&p, PROFILE_UNCOMPRESS, fc.size-4); | |
compr->decompress(&p, out, fc.data+4, fc.size-4); | |
profiler_end(&p, size); | |
} break; | |
case OP_TEST: { | |
/* compress */ | |
int final_size; | |
size = compr->compressed_size(fc.size); | |
out = calloc(1,(size_t)size); | |
profiler_begin(&p, PROFILE_COMPRESS, fc.size); | |
size = compr->compress(&p, out, fc.data, fc.size); | |
profiler_end(&p, size); | |
/* decompress */ | |
{void *old = calloc(1, (size_t)fc.size); | |
profiler_begin(&p, PROFILE_UNCOMPRESS, size); | |
final_size = compr->decompress(&p, old, out, size); | |
profiler_end(&p, fc.size); | |
/* validate */ | |
if (final_size == fc.size && !memcmp(old, fc.data, (size_t)fc.size)) | |
fprintf(stderr, "Success\n"); | |
else fprintf(stderr, "Failure\n");} | |
profiler_print(stdout, &p); | |
return 0; | |
}} | |
if (argc < 5) { | |
fprintf(stderr, "Not enough arguments\n"); | |
usage(argv[0]); | |
} | |
/* write output file */ | |
{FILE *fp = fopen(argv[4], "wb"); | |
if (!fp) { | |
fprintf(stderr, "failed to write to file :%s\n", argv[4]); | |
usage(argv[0]); | |
} fwrite(out, (size_t)size, 1, fp); | |
fclose(fp);} | |
profiler_print(stdout, &p); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment