Skip to content

Instantly share code, notes, and snippets.

@starwing
Last active November 30, 2019 11:56
Show Gist options
  • Save starwing/ff3e7078c3c07b6f7a0c9d1b634a64d8 to your computer and use it in GitHub Desktop.
Save starwing/ff3e7078c3c07b6f7a0c9d1b634a64d8 to your computer and use it in GitHub Desktop.
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#define SHIFT (sizeof(unsigned)*CHAR_BIT)
#define RANGE_MAX (~0u)
#define CHAR_MASK ((1u<<CHAR_BIT)-1)
enum ExtraChar { ESC = 1<<CHAR_BIT, EOS, CHAR_COUNT };
static unsigned mul_div(unsigned a, unsigned b, unsigned c) {
unsigned long long ret = a;
ret *= b, ret /= c;
return (unsigned)ret;
}
typedef struct ACBuffer {
char buff[BUFSIZ];
unsigned curr, count;
const char *fname;
FILE *fp;
} ACBuffer;
static void ac_freebuffer(ACBuffer *b)
{ if (b->fp) fclose(b->fp), b->fp = NULL; }
static void ac_initbuffer(ACBuffer *b, const char *fname, const char *mode) {
memset(b, 0, sizeof(*b));
if ((b->fp = fopen(b->fname = fname, mode)) == NULL)
perror(fname), exit(1);
}
static void ac_putchar(ACBuffer *b, int ch) {
if (ch != EOS) b->buff[b->curr++] = ch;
if (ch == EOS || b->curr >= BUFSIZ) {
size_t count = fwrite(b->buff, 1, b->curr, b->fp);
if (count != b->curr) perror(b->fname), exit(1);
b->curr = 0;
}
}
static int ac_getchar(ACBuffer *b) {
if (b->curr == b->count) {
size_t count = fread(b->buff, 1, BUFSIZ, b->fp);
if (count != BUFSIZ && ferror(b->fp)) perror(b->fname), exit(1);
b->curr = 0, b->count = (unsigned)count;
if (b->count == 0) return EOS;
}
return b->buff[b->curr++] & CHAR_MASK;
}
typedef struct ACEncoder {
unsigned freq[CHAR_COUNT], sums[CHAR_COUNT], sum;
unsigned lower, upper;
unsigned carry, current;
size_t cc;
} ACEncoder;
static void ac_addbit(ACEncoder *e, int pos, unsigned v) {
int i;
for (i = pos+1; i <= CHAR_COUNT; i += i&-i)
e->sums[i-1] += v;
e->freq[pos] += v, e->sum += v;
}
static unsigned ac_fetchbit(ACEncoder *e, int pos) {
int i, sum = 0;
for (i = pos+1; i > 0; i -= i&-i)
sum += e->sums[i-1];
return (unsigned)sum;
}
static unsigned ac_log2(unsigned v) {
unsigned r, shift;
r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF ) << 3; v >>= shift; r |= shift;
shift = (v > 0xF ) << 2; v >>= shift; r |= shift;
shift = (v > 0x3 ) << 1; v >>= shift; r |= shift;
return (r |= (v >> 1)) + 1;
}
static void ac_init(ACEncoder *e) {
memset(e, 0, sizeof(*e));
ac_addbit(e, ESC, 1);
e->upper = RANGE_MAX;
}
static size_t ac_update(ACEncoder *e, unsigned prev, unsigned curr, unsigned sum) {
unsigned delta = e->upper - e->lower;
e->lower += mul_div(delta, prev, sum);
e->upper = mul_div(delta, curr, sum) + e->lower;
return SHIFT - ac_log2(e->lower^e->upper);
}
static size_t ac_underflow(ACEncoder *e, size_t shift) {
size_t pending = 0;
e->lower <<= shift, e->upper <<= shift, e->upper |= (1<<shift)-1;
if ((e->lower & ~e->upper & 1<<(SHIFT-2)) != 0) {
unsigned mask = (~e->lower|e->upper) << 1;
pending = SHIFT - ac_log2(mask);
e->lower = (e->lower << pending) ^ 1<<(SHIFT-1);
e->upper = (e->upper << pending) | 1<<(SHIFT-1) | (1<<pending)-1;
}
return pending;
}
static void ac_encode_flush(ACEncoder *e, ACBuffer *out,
unsigned bin, size_t shift, size_t pending) {
if (pending) {
ac_encode_flush(e, out, ~(~(bin>>(SHIFT-1))+1)
^(1u<<(SHIFT-1)), pending+1, 0);
bin <<= 1, shift -= 1;
}
while (shift > 0) {
size_t cc = e->cc+shift > SHIFT ? SHIFT - e->cc : shift;
e->carry = (e->carry << cc) | (bin >> (SHIFT-cc));
shift -= cc, e->cc += cc;
while (e->cc >= CHAR_BIT)
ac_putchar(out, (e->carry >> (e->cc -= CHAR_BIT)) & CHAR_MASK);
}
}
static size_t ac_encode_char(ACEncoder *e, ACBuffer *out, int ch, size_t p) {
size_t shift;
unsigned prev = ch, curr = 1, sum = CHAR_COUNT;
if (e->freq[ch])
prev = ac_fetchbit(e, ch-1), curr = e->freq[ch], sum = e->sum;
else
p = ac_encode_char(e, out, ESC, p);
if ((shift = ac_update(e, prev, curr, sum)) > 0)
ac_encode_flush(e, out, e->lower, shift, p), p = 0;
ac_addbit(e, ch, 1);
return p + ac_underflow(e, shift);
}
static void ac_encode(ACEncoder *e, ACBuffer *in, ACBuffer *out) {
size_t pending = 0;
int ch;
while ((ch = ac_getchar(in)) != EOS)
pending = ac_encode_char(e, out, ch, pending);
pending = ac_encode_char(e, out, EOS, pending);
ac_encode_flush(e, out, (1+(e->lower>>(SHIFT-2)))<<(SHIFT-2), 2, pending);
if (e->cc != 0) ac_encode_flush(e, out, 0, CHAR_BIT-e->cc, 0);
ac_putchar(out, EOS);
}
static unsigned ac_decode_fetch(ACEncoder *e, ACBuffer *in, size_t shift) {
unsigned current = 0;
while (shift > 0) {
size_t cc = shift > CHAR_BIT ? CHAR_BIT : shift;
if (e->cc < cc) {
int ch = ac_getchar(in);
if (ch == EOS) ch = 0;
e->carry = (e->carry << CHAR_BIT) | ch;
e->cc += CHAR_BIT;
}
shift -= cc, e->cc -= cc;
current = (current << cc) | (e->carry >> e->cc);
e->carry &= (1<<e->cc) - 1;
}
return current;
}
static void ac_decode_fill(ACEncoder *e, ACBuffer *in,
unsigned prev, unsigned curr, unsigned sum) {
size_t shift = ac_update(e, prev, curr, sum);
e->current = (e->current << shift) | ac_decode_fetch(e, in, shift);
if ((shift = ac_underflow(e, shift)) > 0)
e->current = ((e->current << shift)^(1<<(SHIFT-1)))
| ac_decode_fetch(e, in, shift);
if (!(e->lower <= e->current && e->current < e->upper))
fprintf(stderr, "decode fail"), exit(1);
}
static void ac_decode(ACEncoder *e, ACBuffer *in, ACBuffer *out) {
int ch = 0;
e->current = ac_decode_fetch(e, in, SHIFT);
while (ch != EOS) {
unsigned dx = mul_div(e->current-e->lower+1, e->sum, e->upper-e->lower);
unsigned i, x = 0, cx, p = 0, cp;
for (i = CHAR_BIT+1; i > 0; --i)
if ((cp = p+(1<<(i-1))) <= CHAR_COUNT
&& (cx = x+e->sums[cp-1]) <= dx)
p = cp, x = cx;
ac_decode_fill(e, in, ac_fetchbit(e, p-1), e->freq[p], e->sum);
if (p == ESC) {
ac_addbit(e, p, 1);
p = mul_div(e->current-e->lower+1, CHAR_COUNT, e->upper-e->lower);
ac_decode_fill(e, in, p, 1, CHAR_COUNT);
}
ac_putchar(out, ch = (int)p);
ac_addbit(e, ch, 1);
}
}
static void encode_file(const char *infile, const char *outfile) {
ACBuffer in, out;
ACEncoder e;
ac_initbuffer(&in, infile, "rb");
ac_initbuffer(&out, outfile, "wb");
ac_init(&e);
ac_encode(&e, &in, &out);
ac_freebuffer(&in);
ac_freebuffer(&out);
}
static void decode_file(const char *infile, const char *outfile) {
ACBuffer in, out;
ACEncoder e;
ac_initbuffer(&in, infile, "rb");
ac_initbuffer(&out, outfile, "wb");
ac_init(&e);
ac_decode(&e, &in, &out);
ac_freebuffer(&in);
ac_freebuffer(&out);
}
int main(int argc, char **argv) {
if (argc >= 4 && strcmp(argv[1], "encode") == 0)
encode_file(argv[2], argv[3]);
else if (argc >= 4 && strcmp(argv[1], "decode") == 0)
decode_file(argv[2], argv[3]);
else
fprintf(stderr, "Usage: %s (encode|decode) infile outfile\n", argv[0]);
return 0;
}
/* cc: run+='encode % out.bin' */
/* xcc: run+='decode out.bin out.txt' */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment