Skip to content

Instantly share code, notes, and snippets.

@kspalaiologos
Last active June 4, 2024 17:13
Show Gist options
  • Save kspalaiologos/6a1929873a6f14a3cd64192e234ee407 to your computer and use it in GitHub Desktop.
Save kspalaiologos/6a1929873a6f14a3cd64192e234ee407 to your computer and use it in GitHub Desktop.
PPM in 90 lines of code.
#define COPY(dst, src, n) for (int i = 0; i < n; i++) dst = src;
typedef struct {
uint32_t range, FF, cache; uint64_t low; FILE * source;
} rc_t;
void rc_init(rc_t * rc, FILE * source) {
rc->range = 0xFFFFFFFF;
rc->FF = rc->cache = 0; rc->low = 0;
rc->source = source;
}
void rc_init_dec(rc_t * rc, FILE * source) {
rc_init(rc, source);
for(int i = 0; i < 5; i++) rc->low = (rc->low << 8) | getc(source);
}
#define RC_RESCALE { \
if (rc->low >> 24 != 0xFF) { \
putc(rc->cache + (rc->low >> 32), rc->source); \
for(int c = rc->low >> 32; rc->FF; rc->FF--) putc(c + 0xFF, rc->source); \
rc->cache = rc->low >> 24; \
} else rc->FF++; \
rc->low = ((uint32_t) rc->low) << 8; \
}
uint32_t rc_decode(rc_t * rc, uint16_t * pdf, uint32_t n) {
uint32_t tot, i, cdf, freq;
for (i = 0, tot = 0, cdf = 0; i < n; i++) tot += pdf[i];
freq = rc->low / (rc->range /= tot);
for (i = 0; i < n && cdf + pdf[i] <= freq; i++) cdf += pdf[i];
rc->low -= cdf * rc->range; rc->range *= pdf[i];
for(; rc->range < (1 << 24); rc->range <<= 8)
rc->low = (rc->low << 8) | getc(rc->source);
return i;
}
void rc_encode(rc_t * rc, uint32_t sym, uint16_t * pdf, uint32_t n) {
uint32_t tot, cum, i;
for (tot = 0, cum = 0, i = 0; i < n; i++) {
tot += pdf[i]; if (i < sym) cum += pdf[i];
}
rc->low += cum * (rc->range /= tot); rc->range *= pdf[sym];
for(; rc->range < (1 << 24); rc->range <<= 8) RC_RESCALE
}
void rc_flush(rc_t * rc) { for(int i = 0; i < 5; i++) RC_RESCALE }
static uint16_t om1[257], o0[257], o1[0x100*257], o2[0x200*257], * o4, * pdf;
static uint32_t c4;
uint32_t hash32(uint32_t x) {
x ^= x >> 17; x *= 0xed5ad4bb; x ^= x >> 11; x *= 0xac4c1b51;
x ^= x >> 15; x *= 0x31848bab; x ^= x >> 14; return x;
}
static void inc(uint16_t * o, uint32_t sym) {
if (o[sym] == 0) o[256]++; // New symbol encountered: escape is more likely.
if (++o[sym] > 0xFFF0) for (int i = 0; i < 0x100; i++) o[i] >>= 1, o[i]++;
}
static void update(int sym) {
inc(o0, sym); inc(o1 + 257 * (c4 & 0xFF), sym);
inc(o2 + 257 * (hash32(c4 & 0xFFFF) & 0x1FF), sym);
inc(o4 + 257 * (hash32(c4) & 0xFFFF), sym);
c4 = (c4 << 8) | sym;
}
#define PPM(a, x, p...) pdf = a + x; if (pdf[256]) { p; }
void encode(FILE * in, FILE * out) {
#define ENC(o) \
if (!pdf[sym]) { rc_encode(&rc, 256, pdf, 257); } \
else { rc_encode(&rc, sym, pdf, 257); update(sym); continue; };
o4 = calloc(0x10000 * 257, sizeof(uint16_t));
rc_t rc; rc_init(&rc, out); COPY(om1[i], 1, 257); pdf = om1;
for (uint32_t sym; (sym = getc(in)) != EOF; ) {
PPM(o4, 257 * (hash32(c4) & 0xFFFF), ENC(o4));
PPM(o2, 257 * (hash32(c4 & 0xFFFF) & 0x1FF), ENC(o2));
PPM(o1, 257 * (c4 & 0xFF), ENC(o1));
PPM(o0, 0, ENC(o0));
pdf = om1; rc_encode(&rc, sym, pdf, 257); update(sym);
}
PPM(o4, 257 * (hash32(c4) & 0xFFFF), rc_encode(&rc, 256, pdf, 257));
PPM(o2, 257 * (hash32(c4 & 0xFFFF) & 0x1FF), rc_encode(&rc, 256, pdf, 257));
PPM(o1, 257 * (c4 & 0xFF), rc_encode(&rc, 256, pdf, 257));
PPM(o0, 0, rc_encode(&rc, 256, pdf, 257));
pdf = om1; rc_encode(&rc, 256, pdf, 257); rc_flush(&rc);
}
void decode(FILE * in, FILE * out) {
#define DEC \
sym = rc_decode(&rc, pdf, 257); \
if (sym != 256) { putc(sym, out); update(sym); continue; }
uint32_t sym; o4 = calloc(0x10000 * 257, sizeof(uint16_t));
rc_t rc; rc_init_dec(&rc, in); COPY(om1[i], 1, 257); pdf = om1;
do {
PPM(o4, 257 * (hash32(c4) & 0xFFFF), DEC);
PPM(o2, 257 * (hash32(c4 & 0xFFFF) & 0x1FF), DEC);
PPM(o1, 257 * (c4 & 0xFF), DEC);
PPM(o0, 0, DEC);
pdf = om1; DEC;
} while (sym != 256);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment