Last active
June 4, 2024 17:13
-
-
Save kspalaiologos/6a1929873a6f14a3cd64192e234ee407 to your computer and use it in GitHub Desktop.
PPM in 90 lines of code.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define COPY(dst, src, n) for (int i = 0; i < n; i++) dst = src; | |
typedef struct { | |
uint32_t range, FF, cache; uint64_t low; FILE * source; | |
} rc_t; | |
void rc_init(rc_t * rc, FILE * source) { | |
rc->range = 0xFFFFFFFF; | |
rc->FF = rc->cache = 0; rc->low = 0; | |
rc->source = source; | |
} | |
void rc_init_dec(rc_t * rc, FILE * source) { | |
rc_init(rc, source); | |
for(int i = 0; i < 5; i++) rc->low = (rc->low << 8) | getc(source); | |
} | |
#define RC_RESCALE { \ | |
if (rc->low >> 24 != 0xFF) { \ | |
putc(rc->cache + (rc->low >> 32), rc->source); \ | |
for(int c = rc->low >> 32; rc->FF; rc->FF--) putc(c + 0xFF, rc->source); \ | |
rc->cache = rc->low >> 24; \ | |
} else rc->FF++; \ | |
rc->low = ((uint32_t) rc->low) << 8; \ | |
} | |
uint32_t rc_decode(rc_t * rc, uint16_t * pdf, uint32_t n) { | |
uint32_t tot, i, cdf, freq; | |
for (i = 0, tot = 0, cdf = 0; i < n; i++) tot += pdf[i]; | |
freq = rc->low / (rc->range /= tot); | |
for (i = 0; i < n && cdf + pdf[i] <= freq; i++) cdf += pdf[i]; | |
rc->low -= cdf * rc->range; rc->range *= pdf[i]; | |
for(; rc->range < (1 << 24); rc->range <<= 8) | |
rc->low = (rc->low << 8) | getc(rc->source); | |
return i; | |
} | |
void rc_encode(rc_t * rc, uint32_t sym, uint16_t * pdf, uint32_t n) { | |
uint32_t tot, cum, i; | |
for (tot = 0, cum = 0, i = 0; i < n; i++) { | |
tot += pdf[i]; if (i < sym) cum += pdf[i]; | |
} | |
rc->low += cum * (rc->range /= tot); rc->range *= pdf[sym]; | |
for(; rc->range < (1 << 24); rc->range <<= 8) RC_RESCALE | |
} | |
void rc_flush(rc_t * rc) { for(int i = 0; i < 5; i++) RC_RESCALE } | |
static uint16_t om1[257], o0[257], o1[0x100*257], o2[0x200*257], * o4, * pdf; | |
static uint32_t c4; | |
uint32_t hash32(uint32_t x) { | |
x ^= x >> 17; x *= 0xed5ad4bb; x ^= x >> 11; x *= 0xac4c1b51; | |
x ^= x >> 15; x *= 0x31848bab; x ^= x >> 14; return x; | |
} | |
static void inc(uint16_t * o, uint32_t sym) { | |
if (o[sym] == 0) o[256]++; // New symbol encountered: escape is more likely. | |
if (++o[sym] > 0xFFF0) for (int i = 0; i < 0x100; i++) o[i] >>= 1, o[i]++; | |
} | |
static void update(int sym) { | |
inc(o0, sym); inc(o1 + 257 * (c4 & 0xFF), sym); | |
inc(o2 + 257 * (hash32(c4 & 0xFFFF) & 0x1FF), sym); | |
inc(o4 + 257 * (hash32(c4) & 0xFFFF), sym); | |
c4 = (c4 << 8) | sym; | |
} | |
#define PPM(a, x, p...) pdf = a + x; if (pdf[256]) { p; } | |
void encode(FILE * in, FILE * out) { | |
#define ENC(o) \ | |
if (!pdf[sym]) { rc_encode(&rc, 256, pdf, 257); } \ | |
else { rc_encode(&rc, sym, pdf, 257); update(sym); continue; }; | |
o4 = calloc(0x10000 * 257, sizeof(uint16_t)); | |
rc_t rc; rc_init(&rc, out); COPY(om1[i], 1, 257); pdf = om1; | |
for (uint32_t sym; (sym = getc(in)) != EOF; ) { | |
PPM(o4, 257 * (hash32(c4) & 0xFFFF), ENC(o4)); | |
PPM(o2, 257 * (hash32(c4 & 0xFFFF) & 0x1FF), ENC(o2)); | |
PPM(o1, 257 * (c4 & 0xFF), ENC(o1)); | |
PPM(o0, 0, ENC(o0)); | |
pdf = om1; rc_encode(&rc, sym, pdf, 257); update(sym); | |
} | |
PPM(o4, 257 * (hash32(c4) & 0xFFFF), rc_encode(&rc, 256, pdf, 257)); | |
PPM(o2, 257 * (hash32(c4 & 0xFFFF) & 0x1FF), rc_encode(&rc, 256, pdf, 257)); | |
PPM(o1, 257 * (c4 & 0xFF), rc_encode(&rc, 256, pdf, 257)); | |
PPM(o0, 0, rc_encode(&rc, 256, pdf, 257)); | |
pdf = om1; rc_encode(&rc, 256, pdf, 257); rc_flush(&rc); | |
} | |
void decode(FILE * in, FILE * out) { | |
#define DEC \ | |
sym = rc_decode(&rc, pdf, 257); \ | |
if (sym != 256) { putc(sym, out); update(sym); continue; } | |
uint32_t sym; o4 = calloc(0x10000 * 257, sizeof(uint16_t)); | |
rc_t rc; rc_init_dec(&rc, in); COPY(om1[i], 1, 257); pdf = om1; | |
do { | |
PPM(o4, 257 * (hash32(c4) & 0xFFFF), DEC); | |
PPM(o2, 257 * (hash32(c4 & 0xFFFF) & 0x1FF), DEC); | |
PPM(o1, 257 * (c4 & 0xFF), DEC); | |
PPM(o0, 0, DEC); | |
pdf = om1; DEC; | |
} while (sym != 256); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment