Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active April 17, 2018 23:28
Show Gist options
  • Save vurtun/2f756222c677e03adae7e88974c9b2ef to your computer and use it in GitHub Desktop.
Save vurtun/2f756222c677e03adae7e88974c9b2ef to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <sys/time.h>
#include <unistd.h>
#include <time.h>
/* ===============================================================
* UTIL
* ===============================================================*/
#define cast(t,p) ((t)(p))
#define flag(n) ((1u)<<(n))
#define min(a,b) ((a)<(b)?(a):(b))
#define max(a,b) ((a)>(b)?(a):(b))
#define cntof(a) ((int)(sizeof(a)/sizeof((a)[0])))
#define BITS_PER_BYTE 8
#define BITS_PER_LONG (sizeof(long)*BITS_PER_BYTE)
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(unsigned long))
#define BITMAP_LAST_WORD_MASK(nbits)(((nbits) % BITS_PER_LONG) ? (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL)
typedef unsigned char uchar;
typedef unsigned short ushort;
struct file_contents {
unsigned valid:1;
int size;
uchar *data;
};
static struct file_contents
read_file_content(const char *file)
{
FILE *fp = 0;
struct file_contents res;
memset(&res, 0, sizeof(res));
fp = fopen(file, "rb");
if (!fp) return res;
res.valid = 1;
fseek(fp, 0, SEEK_END);
res.size = (int)ftell(fp);
fseek(fp, 0, SEEK_SET);
res.data = calloc(1,(size_t)res.size);
res.size = (int)fread(res.data, 1,(size_t)res.size, fp);
fclose(fp);
return res;
}
static long
timestamp(void)
{
struct timeval tv;
if (gettimeofday(&tv, NULL) < 0) return 0;
return (long)((long)tv.tv_sec * 1000 + (long)tv.tv_usec/1000);
}
static void
bitmap_shift_left(unsigned long *dst, const unsigned long *src,
unsigned int shift, unsigned int nbits)
{
int k;
unsigned int lim = BITS_TO_LONGS(nbits);
unsigned int off = shift/BITS_PER_LONG;
unsigned int rem = shift % BITS_PER_LONG;
for (k = lim - off - 1; k >= 0; --k) {
unsigned long upper, lower;
if (rem && k > 0)
lower = src[k - 1] >> (BITS_PER_LONG - rem);
else lower = 0;
upper = src[k] << rem;
dst[k + off] = lower | upper;
}
if (off)
memset(dst, 0, off*sizeof(unsigned long));
}
static void
bitmap_shift_right(unsigned long *dst, const unsigned long *src,
unsigned shift, unsigned nbits)
{
unsigned k, lim = BITS_TO_LONGS(nbits);
unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
for (k = 0; off + k < lim; ++k) {
unsigned long upper, lower;
if (!rem || off + k + 1 >= lim)
upper = 0;
else {
upper = src[off + k + 1];
if (off + k + 1 == lim - 1)
upper &= mask;
upper <<= (BITS_PER_LONG - rem);
}
lower = src[off + k];
if (off + k == lim - 1)
lower &= mask;
lower >>= rem;
dst[k] = lower | upper;
}
if (off)
memset(&dst[lim - off], 0, off*sizeof(unsigned long));
}
static char*
bin(char *b, const uchar *c, int cnt, unsigned int bitcnt)
{
int i;
unsigned long tmp[256];
assert(sizeof(tmp) > bitcnt/BITS_PER_BYTE);
memcpy(tmp, c, bitcnt/BITS_PER_BYTE);
for (i = 0; i < cnt; ++i) {
b[i] = (tmp[0] & 0x01) ? '1': '0';
bitmap_shift_right(tmp, tmp, 1, bitcnt);
} b[i] = '\0';
return b;
}
/* ===============================================================
* PROFILER
* ===============================================================*/
#define PROFILE_MAP(STAT)\
STAT(COMPRESS)\
STAT(UNCOMPRESS)
enum profile_type {
#define STAT(a) PROFILE_##a,
PROFILE_MAP(STAT)
#undef STAT
PROFILE_COUNT
};
static const char *stat_group_name[] = {
#define STAT(a) #a,
PROFILE_MAP(STAT)
#undef STAT
};
enum profile_flag {
PROFILE_FLAG_COMPRESS = flag(PROFILE_COMPRESS),
PROFILE_FLAG_UNCOMPRESS = flag(PROFILE_UNCOMPRESS)
};
struct stat_group {
long time, begin, end;
int compressed;
int uncompressed;
};
struct profiler {
unsigned flags;
enum profile_type cur;
struct stat_group grps[PROFILE_COUNT];
};
static double
percent(int num, int den)
{
double res = 0.0f;
if (den != 0)
res = (double)num/(double)den;
return 100.0*res;
}
static void
profiler_begin(struct profiler *p,
enum profile_type type, int size)
{
struct stat_group *sg;
sg = p->grps + type;
p->cur = type;
p->flags |= flag(type);
switch (type) {
case PROFILE_COMPRESS:
sg->uncompressed = size;
break;
case PROFILE_UNCOMPRESS:
sg->compressed = size;
break;
} sg->begin = timestamp();
}
static void
profiler_end(struct profiler *p, int size)
{
struct stat_group *sg;
sg = p->grps + p->cur;
sg->end = timestamp();
sg->time = sg->end - sg->begin;
switch (p->cur) {
case PROFILE_COMPRESS:
sg->compressed = size;
break;
case PROFILE_UNCOMPRESS:
sg->uncompressed = size;
break;
}
}
static void
profiler_print(FILE *fp, const struct profiler *p)
{
int i = 0;
fprintf(fp, "Profiler:\n");
for (i = 0; i < PROFILE_COUNT; ++i) {
const struct stat_group *grp = p->grps + i;
if (!(p->flags & flag(i))) continue;
fprintf(fp, "\t%s:\n", stat_group_name[i]);
fprintf(fp, "\t\ttime: %lu ms\n", grp->time);
fprintf(fp, "\t\tsize: %.0f -> %.0f (%.02f%%)\n",
(double)grp->uncompressed,
(double)grp->compressed,
percent(grp->compressed,grp->uncompressed));
}
}
/* ===============================================================
* HUFFMAN
* ===============================================================*/
#define HUFFMAN_BIT_COUNT 256
#define MAX_HUFFMAN_CODE_SIZE (HUFFMAN_BIT_COUNT/BITS_PER_LONG)
struct huff_symbol;
struct huff_node {
uchar codepoint;
unsigned short weight;
const struct huff_symbol *sym;
struct huff_node *parent;
struct huff_node *left;
struct huff_node *right;
struct huff_node *next;
};
struct huff_symbol {
int cnt;
unsigned short weight;
uchar codepoint;
};
struct huff_table_entry {
unsigned long code[MAX_HUFFMAN_CODE_SIZE];
uchar bitcnt;
uchar symbol;
};
static int
huf_compressed_size(int size)
{
return max(size*2, size + 512*3+3);
}
static int
huf_symbol_compare(const void *p1, const void *p2)
{
const struct huff_symbol *a = p1;
const struct huff_symbol *b = p2;
return (a->weight - b->weight);
}
static int
huf_compress(struct profiler *p,
uchar *out, const uchar *in, int size)
{
int i = 0;
int node_count = 0;
int node_index = 0;
int leaf_index = 1;
uchar *marker = 0;
uchar *o = out;
struct huff_node *head = 0;
struct huff_node *tail = 0;
struct huff_node *root = 0;
#define MAX_HUFF_TABLE_COUNT 256
#define MAX_HUFF_NODE_COUNT (MAX_HUFF_TABLE_COUNT*2)
struct huff_node nodes[MAX_HUFF_NODE_COUNT];
struct huff_symbol syms[MAX_HUFF_TABLE_COUNT];
struct huff_table_entry htbl[MAX_HUFF_TABLE_COUNT];
/* I.) initialize table */
for (i = 0; i < cntof(htbl); ++i) {
memset(&htbl[i].code, 0, sizeof(htbl[i].code));
htbl[i].symbol = (uchar)i;
htbl[i].bitcnt = 0;
syms[i].codepoint = (uchar)i;
syms[i].weight = 0;
syms[i].cnt = 0;
}
/* initialize symbols */
for (i = 0; i < size; ++i)
syms[in[i]].cnt++;
for (i = 0; i < cntof(syms); ++i) {
double weight = 0;
weight = (double)syms[i].cnt/(double)size;
syms[i].weight = (unsigned short)(weight*SHRT_MAX);
} qsort(syms, cntof(syms), sizeof(struct huff_symbol), huf_symbol_compare);
node_count = cntof(nodes);
for (i = 0; i < cntof(syms); ++i)
if (syms[i].cnt) break;
/* insert sorted symbols directly into first node heap */
node_count = cntof(syms) - i;
for (node_index = 1; node_index <= node_count; ++node_index) {
struct huff_node *n = nodes + node_index;
n->sym = syms + i + node_index - 1;
n->codepoint = n->sym->codepoint;
n->weight = n->sym->weight;
n->parent = 0;
n->left = 0;
n->right = 0;
n->next = 0;
}
/* II.) generate tree */
while (leaf_index <= node_count || tail) {
struct huff_node *first = 0;
struct huff_node *second = 0;
struct huff_node *parent = 0;
/* get first node with lowest weight */
struct huff_node *leaf = 0;
struct huff_node *inner = 0;
if (tail) inner = tail;
if (leaf_index <= node_count)
leaf = nodes + leaf_index;
first = (!inner) ? leaf: (!leaf) ? inner: (inner->weight < leaf->weight) ? inner: leaf;
if (first == inner) {
tail = tail->next;
if (first == head)
head = 0;
} else leaf_index++;
leaf = 0; inner = 0;
if (tail) inner = tail;
if (leaf_index <= node_count)
leaf = nodes + leaf_index;
second = (!inner) ? leaf: (!leaf) ? inner: (inner->weight < leaf->weight) ? inner: leaf;
if (!second) {
/* reached root element */
assert(!first->sym);
nodes[0] = *first;
root = first;
first->parent = 0;
break;
} else if (second == inner) {
tail = tail->next;
if (second == head)
head = 0;
} else leaf_index++;
/* create parent node from both nodes */
parent = nodes + node_index++;
parent->left = first;
parent->right = second;
parent->weight = first->weight + second->weight;
parent->parent = 0;
parent->sym = 0;
parent->next = 0;
/* insert parent node at end of queue */
if (!head) {
head = parent;
tail = parent;
} else {
head->next = parent;
head = parent;
}
/* set node as parent */
first->parent = parent;
second->parent = parent;
}
/* III.) generate table from tree */
{int si = 0;
for (si = 0; si < node_count; ++si) {
const struct huff_node *node = nodes + si + 1;
const struct huff_symbol *sym = node->sym;
/* init table entry */
struct huff_table_entry *entry = htbl + sym->codepoint;
entry->symbol = sym->codepoint;
/* walk from leaf to root and get bitcode and its length*/
{const struct huff_node *prev = node;
const struct huff_node *iter = node->parent;
while (iter) {
bitmap_shift_left(entry->code, entry->code, 1, HUFFMAN_BIT_COUNT);
if (iter->left == prev)
entry->code[0] |= 0x01u;
else entry->code[0] &= ~0x01u;
entry->bitcnt++;
prev = iter;
iter = iter->parent;
}}
}}
/* write out number of nodes */
node_index-=1;
memcpy(out, &node_index, 2);
out += 2;
marker = out;
out++;
/* III.) Save tree to memory */
{int qhead = 0, qtail = 1;
struct huff_node *que[MAX_HUFF_NODE_COUNT+1];
que[qtail] = root;
while (qhead < qtail) {
struct huff_node *n = que[++qhead];
*out++ = n->codepoint;
node_index--;
assert(node_index >= 0);
if (n->left) {
que[++qtail] = n->left;
assert((uchar)(qtail - qhead) == (qtail - qhead));
*out++ = (uchar)(qtail - qhead);
} else *out++ = 0;
if (n->right) {
que[++qtail] = n->right;
assert((uchar)(qtail - qhead) == (qtail - qhead));
*out++ = (uchar)(qtail - qhead);
} else *out++ = 0;
}}
/* IV.) Compression */
{int si, bitoff = 0;
for (si = 0; si < size; ++si) {
unsigned long mask[MAX_HUFFMAN_CODE_SIZE];
unsigned char sym = *((const unsigned char*)in + si);
struct huff_table_entry *entry = &htbl[sym];
if (bitoff + entry->bitcnt < BITS_PER_BYTE) {
/* symbol still fits into byte */
bitmap_shift_left(mask, entry->code, bitoff, HUFFMAN_BIT_COUNT);
*out |= ((uchar*)mask)[0];
bitoff += entry->bitcnt;
} else {
/* symbol across byte boundaries */
int remain = entry->bitcnt;
if (bitoff) {
bitmap_shift_left(mask, entry->code, bitoff, HUFFMAN_BIT_COUNT);
remain = entry->bitcnt - (BITS_PER_BYTE - bitoff);
*out |= ((uchar*)mask)[0];
out++;
bitmap_shift_right(mask, entry->code, BITS_PER_BYTE-bitoff, HUFFMAN_BIT_COUNT);
} else memcpy(mask, entry->code, sizeof(entry->code));
bitoff = remain % BITS_PER_BYTE;
memcpy(out, mask, (size_t)(remain / BITS_PER_BYTE));
out += remain / BITS_PER_BYTE;
if (bitoff) *out = ((uchar*)mask)[remain/BITS_PER_BYTE];
}
} *marker = (uchar)bitoff;}
*out = 0;
return (int)(out-o);
}
static int
huf_decompress(struct profiler *p,
uchar *out, const uchar *in, int size)
{
const uchar *e = in + size, *o = out;
unsigned short ncnt = 0;
const uchar *tree = 0;
const uchar *iter = 0;
uchar end_bits = 0;
size_t bitoff = 0;
memcpy(&ncnt, in, 2);
end_bits = *(in+2);
tree = in + 3;
in = tree + 3*ncnt;
iter = tree;
while (in <= e) {
int bit = *in & (1 << bitoff);
retry:
if (bit) {
if (!iter[1]) {
*out++ = iter[0];
iter = tree;
goto retry;
} else iter = iter + 3*iter[1];
} else {
if (!iter[2]) {
*out++ = iter[0];
iter = tree;
goto retry;
} else iter = iter + 3*iter[2];
}
bitoff++;
in += bitoff / BITS_PER_BYTE;
bitoff = bitoff % BITS_PER_BYTE;
if (in == e && bitoff == end_bits+1)
break;
} return (int)(out-o);
}
/* ===============================================================
* RLE
* ===============================================================*/
static int
rle_compressed_size(int size)
{
return size*2;
}
static int
rle_compress(struct profiler *p, uchar *out, const uchar *in, int size)
{
uchar lit[128], litcnt = 0, i;
const uchar *e = in + size;
uchar *o = out;
while (in <= e) {
int cnt = 1;
while (in+cnt < e && in[cnt] == *in && cnt < cntof(lit)-1)
cnt++;
if (cnt > 1 || litcnt >= cntof(lit) || in == e) {
/* flush literals */
*o++ = (uchar)litcnt | flag(7);
for (i = 0; i < litcnt; ++i)
*o++ = (uchar)lit[i];
o -= litcnt == 0;
if (in == e) break;
/* write duplication */
*o++ = (uchar)cnt;
*o++ = (uchar)*in;
o -= (cnt < 2)*2;
litcnt = 0;
} else lit[litcnt++] = *in;
in += cnt;
} assert(in == e);
return (int)(o - out);
}
static int
rle_decompress(struct profiler *p,
uchar *out, const uchar *in, int size)
{
int i = 0;
const uchar *e = in + size, *o = out;
while (in < e) {
uchar len = (uchar)*in++;
if (len & flag(7)) {
/* literal */
len = len & ~flag(7);
assert(len);
while (len--) *out++ = *in++;
} else {
/* duplication */
uchar sym = *in++;
assert(len && len < 128);
for (i = 0; i < len; ++i)
*out++ = sym;
}
} assert(in == e);
return (int)(out-o);
}
/* ===============================================================
* LZ
* ===============================================================*/
static int
lz_compressed_size(int size)
{
return size*2;
}
static int
lz_compress(struct profiler *p, uchar *out, const uchar *base_in, int size)
{
#define MAX_WINDOW_SIZE 255
#define MAX_LOOP_BACK_COUNT 255
#define MAX_LITERAL_COUNT 255
uchar litcnt = 0, i;
uchar lit[MAX_LITERAL_COUNT];
const uchar *in = base_in;
const uchar *e = in + size;
uchar *o = out;
while (in <= e) {
int output_run = 0;
int best_run = 0, best_dist = 0;
const uchar *win_begin = 0;
int look_back = min((int)(in-base_in), MAX_LOOP_BACK_COUNT);
for (win_begin = in - look_back; win_begin < in; ++win_begin) {
/* find matching string duplication O(n^2) */
int win_size = min((int)(e-win_begin), MAX_WINDOW_SIZE);
const uchar *win_end = win_begin + win_size;
const uchar *test = in;
const uchar *win = win_begin;
int test_run = 0;
while ((win < win_end) &&
(*test++ == *win++))
test_run++;
/* remember best match */
if (test_run > best_run) {
best_run = test_run;
best_dist = cast(int,in - win_begin);
}
}
output_run = (litcnt) ? (best_run > 4): (best_run > 2);
if (output_run || litcnt >= cntof(lit) || in == e) {
if (litcnt) {
/* flush literals */
assert((uchar)litcnt == litcnt);
*o++ = (uchar)litcnt;
*o++ = (uchar)0;
for (i = 0; i < litcnt; ++i)
*o++ = (uchar)lit[i];
litcnt = 0;
}
if (in >= e) break;
if (output_run) {
/* write string duplication */
assert((uchar)best_run == best_run);
assert((uchar)best_dist == best_dist);
*o++ = (uchar)best_run;
*o++ = (uchar)best_dist;
in += best_run;
}
} else lit[litcnt++] = *in++;
} return (int)(o - out);
}
static int
lz_decompress(struct profiler *p,
uchar *out, const uchar *in, int size)
{
int i = 0;
const uchar *e = in + size, *o = out;
while (in < e) {
int cnt = *in++;
int off = *in++;
const uchar *src = 0;
src = (!off) ? in: out - off;
for (i = 0; i < cnt; ++i)
*out++ = *src++;
in += (!off) * cnt;
} return (int)(out-o);
}
/* ===============================================================
* LZW
* ===============================================================*/
struct lzw {
int bitcnt;
int bits, off;
};
static int
lzw_compressed_size(int size)
{
return size*2;
}
static uchar*
lzw_write(uchar *out, struct lzw *s, int code)
{
s->bits |= code << s->off;
s->off += s->bitcnt;
while (s->off >= 8) {
*out++ = (uchar)(s->bits & 0xFF);
s->bits >>= 8;
s->off -= 8;
} return out;
}
static int
lzw_compress(struct profiler *p, uchar *out, const uchar *in, int size)
{
#define LZW_HASH_SIZE 5003
int htbl[LZW_HASH_SIZE]; /* {{sym,entry},...} */
short codetbl[LZW_HASH_SIZE]; /* {entry,...} */
short free_ent = 0x102; /* 0x100 reset dict, 0x101 eof */
int maxcode = 511;
uchar *o = out;
int ent = *in++;
struct lzw st;
memset(&st, 0, sizeof(st));
st.bitcnt=9;
o = lzw_write(o, &st, 0x100);
memset(htbl, 0xFF, sizeof(htbl));
rn: while (--size) {
int c = *in++;
int code = (c << 12) + ent; /* {sym,entry} */
int key = (c << 4) ^ ent; /* hash */
while (htbl[key] >= 0) {
if (htbl[key] == code) {
ent = codetbl[key];
goto rn;
}
++key;
key = key >= LZW_HASH_SIZE ? key - LZW_HASH_SIZE : key;
} o = lzw_write(o, &st, ent);
ent = c;
if (free_ent < 4096) {
if(free_ent > maxcode) {
++st.bitcnt;
if (st.bitcnt == 12)
maxcode = 4096;
else maxcode = (1 << st.bitcnt)-1;
}
codetbl[key] = free_ent++;
htbl[key] = code;
} else {
o = lzw_write(o, &st, 0x100);
memset(htbl, 0xFF, sizeof(htbl));
free_ent = 0x102;
maxcode = 511;
st.bitcnt = 9;
}
}
o = lzw_write(o, &st, ent);
o = lzw_write(o, &st, 0x101);
o = lzw_write(o, &st, 0);
return (int)(o-out);
}
static const uchar*
lzw_read(const uchar *in, struct lzw *s, int *code)
{
if (s->off < s->bitcnt) {
s->bits |= ((*in++) << s->off);
s->bits |= ((*in++) << (s->off+8));
s->off += 16;
}
*code = s->bits & ((1 << s->bitcnt)-1);
s->bits >>= s->bitcnt;
s->off -= s->bitcnt;
return in;
}
static int
lzw_decompress(struct profiler *p, uchar *out, const uchar *in, int size)
{
const uchar *e = in + size, *o = out;
int dict[LZW_HASH_SIZE];
short free_ent = 0x102;
int maxcode = 511;
int next = 0, last = 0;
struct lzw st;
memset(&st, 0, sizeof(st));
st.bitcnt=9;
in = lzw_read(in, &st, &last);
assert(last == 0x100); /* skip */
in = lzw_read(in, &st, &last);
while (in <= e) {
int code, c, ent;
if (last <= 0xFF) {
*out++ = (uchar)last;
} else {
/* unpack dict-entry */
int idx = last, cnt = 1;
assert(free_ent > idx);
while (idx > 0x101) {
code = dict[idx];
c = (code >> 12);
*out++ = (uchar)c;
idx = code & 0xFFF;
cnt++;
} *out++ = (uchar)idx;
/* reverse output */
{uchar *s = out - cnt;
uchar *d = out - 1;
cnt >>= 1;
while (cnt--) {
uchar t = *s; /* swap */
*s++ = *d; *d-- = t;
}}
}
/* grow bit width */
if (free_ent > maxcode) {
++st.bitcnt;
if(st.bitcnt == 12)
maxcode = 4096;
else maxcode = (1<<st.bitcnt)-1;
}
/* read next entry */
in = lzw_read(in, &st, &next);
if (next == 0x100) {
maxcode = 511;
st.bitcnt = 9;
free_ent = 0x102;
in = lzw_read(in, &st, &last);
continue;
} else if (next == 0x101) {
break;
}
/* find first symbol */
ent = (next < free_ent) ? next: last;
while (ent > 0x101) {
code = dict[ent];
ent = code & 0xFFF;
}
/* add new entry into dict */
code = (ent << 12) + last;
assert(free_ent < LZW_HASH_SIZE);
dict[free_ent++] = code;
last = next;
} return (int)(out-o);
}
static int
sdefl_compressed_size(int size)
{
return size*2;
}
/* ===============================================================
* SDEFL
* ===============================================================
* public domain - no warranty implied; use at your own risk
* References:
https://bitbucket.org/rmitton/tigr/src/be3832bee7fb2f274fe5823e38f8ec7fa94e0ce9/src/tigr_inflate.c?at=default&fileviewer=file-view-default
https://github.com/github/putty/blob/49fb598b0e78d09d6a2a42679ee0649df482090e/sshzlib.c
https://www.ietf.org/rfc/rfc1951.txt
*/
#define SDEFL_DICT_BITS 12
#define SDEFL_DICT_SIZE (1 << SDEFL_DICT_BITS)
#define SDEFL_MAX_OFF (1 << 15)
#define SDEFL_MIN_MATCH 3
#define SDEFL_MAX_MATCH 258
//typedef unsigned char uchar;
struct sdefl {int bits, off;};
#define sinfl_rev16(n) ((sdefl_mirror[(n)&0xff] << 8) | sdefl_mirror[((n)>>8)&0xff])
static const uchar sdefl_mirror[256] = {
#define R2(n) n, n + 128, n + 64, n + 192
#define R4(n) R2(n), R2(n + 32), R2(n + 16), R2(n + 48)
#define R6(n) R4(n), R4(n + 8), R4(n + 4), R4(n + 12)
R6(0), R6(2), R6(1), R6(3),
};
static int
sdefl_nxpow2i(int n)
{
n--;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
return ++n;
}
static int
sdefl_ilog2(int n)
{
#define lt(n) n,n,n,n, n,n,n,n, n,n,n,n ,n,n,n,n
static const char tbl[256] = {-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,lt(4),lt(5),
lt(5),lt(6),lt(6),lt(6),lt(6),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7),lt(7)
}; int tt, t;
if ((tt = (n >> 16)))
return (t = (tt >> 8)) ? 24+tbl[t]: 16+tbl[tt];
else return (t = (n >> 8)) ? 8+tbl[t]: tbl[n];
#undef lt
}
static int
sdefl_hash(const uchar *s)
{
int a = s[0], b = s[1], c = s[2];
int v = (a << 16) | (b << 8) | c;
return ((v >> (3*8 - SDEFL_DICT_BITS)) - v) & (SDEFL_DICT_SIZE-1);
}
static uchar*
sdefl_write(uchar *dst, struct sdefl *s, int code, int bitcnt)
{
s->bits |= (code << s->off);
s->off += bitcnt;
while (s->off >= 8) {
*dst++ = (uchar)(s->bits & 0xFF);
s->bits >>= 8;
s->off -= 8;
} return dst;
}
static uchar*
sdefl_match(uchar *dst, struct sdefl *s, int dist, int len)
{
static const short lxmin[] = {0,11,19,35,67,131};
static const short dxmax[] = {0,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576};
static const short lmin[] = {11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227};
static const short dmin[] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,
385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
/* length encoding */
int lc = len;
int lx = sdefl_ilog2(len - 3) - 2;
if (!(lx = (lx < 0) ? 0: lx)) lc += 254;
else if (len >= 258) lx = 0, lc = 285;
else lc = ((lx-1) << 2) + 265 + ((len - lxmin[lx]) >> lx);
if (lc <= 279)
dst = sdefl_write(dst, s, sdefl_mirror[(lc - 256) << 1], 7);
else dst = sdefl_write(dst, s, sdefl_mirror[0xc0 - 280 + lc], 8);
if (lx) dst = sdefl_write(dst, s, len - lmin[lc - 265], lx);
/* distance encoding */
{int dc = dist - 1;
int dx = sdefl_ilog2(sdefl_nxpow2i(dist) >> 2);
if ((dx = (dx < 0) ? 0: dx))
dc = ((dx + 1) << 1) + (dist > dxmax[dx]);
dst = sdefl_write(dst, s, sdefl_mirror[dc << 3], 5);
if (dx) dst = sdefl_write(dst, s, dist - dmin[dc], dx);}
return dst;
}
static uchar*
sdefl_lit(uchar *dst, struct sdefl *s, int c)
{
if (c <= 143)
return sdefl_write(dst, s, sdefl_mirror[0x30+c], 8);
else return sdefl_write(dst, s, 1 + 2 * sdefl_mirror[0x90 - 144 + c], 9);
}
static int
sdeflate(struct profiler *p, uchar *out, const uchar *in, int size)
{
struct sdefl st;
uchar *dst = out;
const uchar *e = in + size;
const uchar *dict[SDEFL_DICT_SIZE];
memset(dict,0,sizeof(dict));
memset(&st,0,sizeof(st));
dst = sdefl_write(dst,&st,0x01,1); /* block */
dst = sdefl_write(dst,&st,0x01,2); /* static huffman */
while (in < e) {
const int h = sdefl_hash(in);
const uchar **ent = &dict[h & (SDEFL_DICT_SIZE-1)];
const uchar *sub = *ent; *ent = in;
if (sub && (in > sub) && ((in-sub) < SDEFL_MAX_OFF) &&
!memcmp(in,sub,SDEFL_MIN_MATCH)) {
/* match */
const uchar *s = sub + SDEFL_MIN_MATCH;
int len, dist = (int)(in - sub);
in += (len = SDEFL_MIN_MATCH);
while ((*s == *in) && (len < SDEFL_MAX_MATCH))
len++, s++, in++;
dst = sdefl_match(dst, &st, dist, len);
} else dst = sdefl_lit(dst, &st, *in++);
}
/* zlib partial flush */
dst = sdefl_write(dst, &st, 0, 7);
dst = sdefl_write(dst, &st, 2, 10);
dst = sdefl_write(dst, &st, 2, 3);
return (int)(dst - out);
}
struct sinfl {
int bits, bitoff;
unsigned lits[288];
unsigned dsts[32];
unsigned lens[19];
int tlit, tdist, tlen;
};
static int
sinfl_read(const uchar **src, const uchar *end,
struct sinfl *s, int n)
{
const uchar *in = *src;
int v = s->bits & ((1 << n)-1);
s->bits >>= n;
s->bitoff -= n;
while (s->bitoff < 16 && in < end) {
s->bits |= (*in++) << s->bitoff;
s->bitoff += 8;
} *src = in;
return v;
}
static int
sinfl_build(unsigned *tree, uchar *lens, int symcnt)
{
int n, cnt[16], first[16], codes[16];
memset(cnt, 0, sizeof(cnt));
cnt[0] = first[0] = codes[0] = 0;
for (n = 0; n < symcnt; ++n) cnt[lens[n]]++;
for (n = 1; n <= 15; n++) {
codes[n] = (codes[n-1] + cnt[n-1]) << 1;
first[n] = first[n-1] + cnt[n-1];
}
for (n = 0; n < symcnt; n++) {
int slot, code, len = lens[n];
if (!len) continue;
code = codes[len]++;
slot = first[len]++;
tree[slot] = (unsigned)((code << (32-len)) | (n << 4) | len);
} return first[15];
}
static int
sinfl_decode(const uchar **in, const uchar *end,
struct sinfl *s, unsigned *tree, int max)
{
/* bsearch next prefix code */
unsigned key, lo = 0, hi = (unsigned)max;
unsigned search = (unsigned)(sinfl_rev16(s->bits) << 16) | 0xffff;
while (lo < hi) {
unsigned guess = (lo + hi) / 2;
if (search < tree[guess]) hi = guess;
else lo = guess + 1;
}
/* pull out and check key */
key = tree[lo-1];
assert(((search^key) >> (32-(key&0xf))) == 0);
sinfl_read(in, end, s, key & 0x0f);
return (key >> 4) & 0x0fff;
}
static int
sinflate(struct profiler *p, uchar *out, const uchar *in, int size)
{
static const char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
static const uchar dbits[30+2] = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,
10,10,11,11,12,12,13,13,0,0};
static const short lbase[29+2] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,
43,51,59,67,83,99,115,131,163,195,227,258,0,0};
static const uchar lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,
4,4,4,5,5,5,5,0,0,0};
enum sinfl_states {hdr,stored,fixed,dyn,blk};
enum sinfl_states state = hdr;
const uchar *e = in + size, *o = out;
struct sinfl s;
int last = 0;
memset(&s, 0, sizeof(s));
sinfl_read(&in,e,&s,0); /* buffer input */
while (in < e || s.bitoff) {
switch (state) {
case hdr: {
int type = 0; /* block header */
last = sinfl_read(&in,e,&s,1);
type = sinfl_read(&in,e,&s,2);
switch (type) {default: return (int)(out-o);
case 0x00: state = stored; break;
case 0x01: state = fixed; break;
case 0x02: state = dyn; break;}
} break;
case stored: {
int len; /* uncompressed block */
sinfl_read(&in,e,&s,s.bitoff & 7);
len = sinfl_read(&in,e,&s,16);
memcpy(out, in, (size_t)len);
in += len, out += len;
sinfl_read(&in,e,&s,16);
state = hdr;
} break;
case fixed: {
/* fixed huffman codes */
int n; uchar lens[288+32];
for (n = 0; n <= 143; n++) lens[n] = 8;
for (n = 144; n <= 255; n++) lens[n] = 9;
for (n = 256; n <= 279; n++) lens[n] = 7;
for (n = 280; n <= 287; n++) lens[n] = 8;
for (n = 0; n < 32; n++) lens[288+n] = 5;
/* build trees */
s.tlit = sinfl_build(s.lits, lens, 288);
s.tdist = sinfl_build(s.dsts, lens + 288, 32);
state = blk;
} break;
case dyn: {
/* dynamic huffman codes */
int n, i, nlit, ndist, nlen;
uchar nlens[19] = {0}, lens[288+32];
nlit = 257 + sinfl_read(&in,e,&s,5);
ndist = 1 + sinfl_read(&in,e,&s,5);
nlen = 4 + sinfl_read(&in,e,&s,4);
for (n = 0; n < nlen; n++)
nlens[order[n]] = (uchar)sinfl_read(&in,e,&s,3);
s.tlen = sinfl_build(s.lens, nlens, 19);
/* decode code lengths */
for (n = 0; n < nlit + ndist;) {
int sym = sinfl_decode(&in, e, &s, s.lens, s.tlen);
switch (sym) {default: lens[n++] = (uchar)sym; break;
case 16: for (i=3+sinfl_read(&in,e,&s,2);i;i--,n++) lens[n]=lens[n-1]; break;
case 17: for (i=3+sinfl_read(&in,e,&s,3);i;i--,n++) lens[n]=0; break;
case 18: for (i=11+sinfl_read(&in,e,&s,7);i;i--,n++) lens[n]=0; break;}
}
/* build lit/dist trees */
s.tlit = sinfl_build(s.lits, lens, nlit);
s.tdist = sinfl_build(s.dsts, lens+nlit, ndist);
state = blk;
} break;
case blk: {
/* decompress block */
int sym = sinfl_decode(&in, e, &s, s.lits, s.tlit);
if (sym > 256) {sym -= 257; /* match symbol */
{int len = sinfl_read(&in, e, &s, lbits[sym]) + lbase[sym];
int dsym = sinfl_decode(&in, e, &s, s.dsts, s.tdist);
int offs = sinfl_read(&in, e, &s, dbits[dsym]) + dbase[dsym];
if (offs > (int)(out-o)) return (int)(out-o);
while (len--) *out = *(out-offs), out++;}
} else if (sym == 256) {
if (last) return (int)(out-o);
state = hdr;
} else *out++ = (uchar)sym;
} break;}
} return (int)(out-o);
}
/* ===============================================================
* APP
* ===============================================================*/
enum compressor_type {
COMPRESSOR_RLE,
COMPRESSOR_LZ,
COMPRESSOR_LZW,
COMPRESSOR_HUF,
COMPRESSOR_DEFL,
COMPRESSOR_CNT
};
static const
struct compressor {
enum compressor_type type;
const char *str;
int(*compressed_size)(int size);
int(*compress)(struct profiler *p, uchar*, const uchar*, int);
int(*decompress)(struct profiler *p, uchar*, const uchar*, int);
} comprs[] = {
{COMPRESSOR_RLE, "rle", rle_compressed_size, rle_compress, rle_decompress},
{COMPRESSOR_LZ, "lz", lz_compressed_size, lz_compress, lz_decompress},
{COMPRESSOR_LZW, "lzw", lzw_compressed_size, lzw_compress, lzw_decompress},
{COMPRESSOR_HUF, "huf", huf_compressed_size, huf_compress, huf_decompress},
{COMPRESSOR_DEFL, "defl", sdefl_compressed_size, sdeflate, sinflate}
};
enum operation_type {
OP_COMPRESS,
OP_DECOMPRESS,
OP_TEST,
OP_CNT
};
static const
struct operation {
enum operation_type type;
const char *str;
} ops[] = {
{OP_COMPRESS, "compress"},
{OP_DECOMPRESS, "decompress"},
{OP_TEST, "test"}
};
static void
usage(const char *app)
{
fprintf(stderr, "usage:\n");
fprintf(stderr, "\t%s compress [rle|lz|lzw|huf|defl] [in] [out]\n", app);
fprintf(stderr, "\t%s decompress [rle|lz|lzw|huf|defl] [in] [out]\n", app);
fprintf(stderr, "\t%s test [rle|lz|lzw|huf|defl] [in]\n", app);
exit(-1);
}
int main(int argc, char **argv)
{
int i = 0;
uchar *out = 0;
int size = 0;
struct profiler p;
const struct compressor *compr = 0;
const struct operation *op = 0;
/* check input arguments */
struct file_contents fc;
memset(&p, 0, sizeof(p));
if (argc < 4) {
fprintf(stderr, "Not enough arguments\n");
usage(argv[0]);
}
/* select operation */
for (i = 0; i < cntof(ops); ++i) {
const struct operation *o = ops + i;
if (!strcmp(o->str, argv[1]))
{op = o; break;};
}
if (!op) {
fprintf(stderr, "Unkown command: %s\n", argv[1]);
usage(argv[0]);
}
/* select compressor */
for (i = 0; i < cntof(comprs); ++i) {
const struct compressor *c = comprs + i;
if (!strcmp(c->str, argv[2]))
{compr = c; break;};
}
if (!compr) {
fprintf(stderr, "Unkown compressor: %s\n", argv[2]);
usage(argv[0]);
}
/* read input file */
fc = read_file_content(argv[3]);
if (!fc.valid) {
fprintf(stderr, "Failed to read file : %s\n", argv[3]);
usage(argv[0]);
}
/* execute operation */
switch (op->type) {
case OP_COMPRESS: {
size = compr->compressed_size(fc.size);
out = calloc(1,(size_t)size+4);
memcpy(out,&fc.size,4);
profiler_begin(&p, PROFILE_COMPRESS, fc.size);
size = compr->compress(&p, out+4, fc.data, fc.size);
profiler_end(&p, size);
size += 4;
} break;
case OP_DECOMPRESS: {
if (fc.size < 4) {
printf("Invalid file %s size to small", argv[3]);
usage(argv[0]);
}
memcpy(&size, fc.data, 4);
out = calloc(1,(size_t)size);
profiler_begin(&p, PROFILE_UNCOMPRESS, fc.size-4);
compr->decompress(&p, out, fc.data+4, fc.size-4);
profiler_end(&p, size);
} break;
case OP_TEST: {
/* compress */
int final_size;
size = compr->compressed_size(fc.size);
out = calloc(1,(size_t)size);
profiler_begin(&p, PROFILE_COMPRESS, fc.size);
size = compr->compress(&p, out, fc.data, fc.size);
profiler_end(&p, size);
/* decompress */
{void *old = calloc(1, (size_t)fc.size);
profiler_begin(&p, PROFILE_UNCOMPRESS, size);
final_size = compr->decompress(&p, old, out, size);
profiler_end(&p, fc.size);
/* validate */
if (final_size == fc.size && !memcmp(old, fc.data, (size_t)fc.size))
fprintf(stderr, "Success\n");
else fprintf(stderr, "Failure\n");}
profiler_print(stdout, &p);
return 0;
}}
if (argc < 5) {
fprintf(stderr, "Not enough arguments\n");
usage(argv[0]);
}
/* write output file */
{FILE *fp = fopen(argv[4], "wb");
if (!fp) {
fprintf(stderr, "failed to write to file :%s\n", argv[4]);
usage(argv[0]);
} fwrite(out, (size_t)size, 1, fp);
fclose(fp);}
profiler_print(stdout, &p);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment