Skip to content

Instantly share code, notes, and snippets.

@kulp
Last active December 17, 2015 04:09
Show Gist options
  • Save kulp/f2dddb8980e9b84c4281 to your computer and use it in GitHub Desktop.
Save kulp/f2dddb8980e9b84c4281 to your computer and use it in GitHub Desktop.
Huffman encoder
#include "huffman.h"
int huff_decode(const dict_entry *dict, char *out, const huff_word **in,
size_t *len, size_t *bits, size_t *off)
{
const dict_entry *n = dict;
int found = 0;
while (!found && *len && *off < *bits) {
unsigned char bit = (**in >> *off) & 1;
if (bit) {
found = n->rleaf;
n += n->offset + 2;
} else {
// Left nodes are stored implicitly. This implies that the most
// efficient dictionary will be skewed right, so that there are not
// long spans of left-nodes to count over.
found = n->lleaf;
n++;
}
if (++*off == huff_wordsize && --*len != 0) {
++*in;
*off = 0;
*bits -= huff_wordsize;
}
}
if (found)
*out = n->word;
return found;
}
#include "huff_private.h"
static void walk_nodes(const struct huff_node *node,
const struct huff_node **out)
{
if (node->leaf) {
out[node->val] = node;
} else {
walk_nodes(node->left , out);
walk_nodes(node->right, out);
}
}
static int encode_bits(const struct huff_node *node,
int max, int *offset, char *out)
{
if (*offset >= max)
return 1; // failure to fit
if (node && node->parent) {
encode_bits(node->parent, max, offset, out);
long bit = 1 << (*offset % CHAR_BIT);
if (node->parent->right == node)
out[*offset / CHAR_BIT] |= bit;
else
out[*offset / CHAR_BIT] &= ~bit;
++*offset;
}
return 0;
}
int huff_encode(struct huff_state *s,
size_t in_len, valtype *in,
size_t out_len, int *offset, char *out)
{
int rc = 0;
const struct huff_node *lookup[1 << (CHAR_BIT * sizeof(valtype))] = { 0 };
walk_nodes(s->head, lookup);
valtype x;
while (!rc && in_len > 0 && (x = *in++)) {
int bits = *offset;
rc = encode_bits(lookup[x], out_len * CHAR_BIT, &bits, out);
if (!rc) {
*offset = bits;
in_len--;
}
}
return in_len; // nonzero means we didn't finish encoding
}
#ifndef HUFF_PRIVATE_H_
#define HUFF_PRIVATE_H_
#include "huffman.h"
struct huff_node {
struct huff_node *left, *right, *parent;
valtype val;
unsigned long weight;
unsigned leaf:1;
};
struct huff_state {
// size is the allocated size of queue ; used is the number of slots used
// in queue. When used grows to be equal to size, realloc().
size_t size, used;
// All nodes are stored consecutively in queue, but only the ones between
// head and tail are considered "in the queue" ; that is, when head ==
// tail, the queue is empty, and when (tail - head == 1), there is one
// node in the queue (end condition). The queue is realloc'ed as needed
// when new nodes are added, whether leaf nodes or internal.
struct huff_node *queue, *head, *tail;
unsigned built:1;
};
#endif
#include "huff_private.h"
#include <stddef.h>
#include <stdlib.h>
int huff_init(struct huff_state **_s)
{
struct huff_state *s = *_s = malloc(sizeof *s);
s->size = 64;
s->used = 0;
s->queue = calloc(s->size, sizeof *s->queue);
s->head = s->tail = s->queue;
return 0;
}
static int weight_cmp(const void *_a, const void *_b)
{
const struct huff_node *a = _a,
*b = _b;
return a->weight - b->weight;
}
static void ensure_order(struct huff_state *s)
{
// It would be better to insert in place instead of resorting an
// almost-sorted list every time, especially since this is a worst-case
// scenario for quicksort.
qsort(s->head, s->tail - s->head, sizeof *s->queue, weight_cmp);
}
// ensures there is space for at least one more node
static int ensure_space(struct huff_state *s)
{
if (s->used < s->size)
return 0;
ptrdiff_t h = s->head - s->queue,
t = s->tail - s->queue;
while (s->used >= s->size && s->queue)
s->queue = realloc(s->queue, (s->size *= 2) * sizeof *s->queue);
s->head = &s->queue[h];
s->tail = &s->queue[t];
return !s->queue;
}
int huff_add(struct huff_state *s, valtype val, unsigned long weight)
{
if (s->built || ensure_space(s))
return -1;
s->used++;
struct huff_node *q = s->tail++;
q->left = NULL;
q->right = NULL;
q->val = val;
q->weight = weight;
q->leaf = 1;
ensure_order(s);
return 0;
}
static int link_parents(struct huff_node *node)
{
if (node->leaf)
return 1;
node->left->parent = node->right->parent = node;
return link_parents(node->left) + link_parents(node->right);
}
int huff_build(struct huff_state *s)
{
while ((ensure_order(s), s->tail - s->head > 1)) {
struct huff_node *a = s->head++,
*b = s->head++;
if (ensure_space(s))
return -1;
// TODO coalesce nodes to the beginning of queue (reuse space)
s->used++;
struct huff_node *c = s->tail++;
c->left = a;
c->right = b;
c->weight = a->weight + b->weight;
c->leaf = 0;
}
// parent fields can't be written until after ensure_order() will
// never move nodes around again
s->built = 1;
link_parents(s->head);
return 0;
}
static int build_dict(const struct huff_node *node, dict_entry *curr)
{
if (node->leaf) {
curr->word = node->val;
return 1;
} else {
int leftnodes = build_dict(node->left, curr + 1);
int rightnodes = build_dict(node->right, curr + 1 + leftnodes);
curr->lleaf = leftnodes == 1;
curr->rleaf = rightnodes == 1;
curr->offset = leftnodes - 1;
return leftnodes + rightnodes + 1;
}
}
int huff_make_dict(struct huff_state *s, dict_entry **dict)
{
*dict = calloc(s->used, sizeof **dict);
build_dict(s->head, *dict);
return s->used;
}
void huff_destroy(struct huff_state *s)
{
free(s->queue);
free(s);
}
#ifndef HUFFMAN_H_
#define HUFFMAN_H_
#include <stddef.h>
#include <limits.h>
struct huff_state;
typedef unsigned char valtype;
typedef union dict_entry {
valtype word;
struct {
unsigned int lleaf :1;
unsigned int rleaf :1;
unsigned int offset :(sizeof(valtype) * CHAR_BIT - 1 - 1);
};
} dict_entry;
typedef unsigned int huff_word;
static const size_t huff_wordsize = CHAR_BIT * sizeof(huff_word);
int huff_init(struct huff_state **s);
int huff_add(struct huff_state *s, valtype val, unsigned long weight);
int huff_build(struct huff_state *s);
void huff_destroy(struct huff_state *s);
int huff_make_dict(struct huff_state *s, dict_entry **dict);
int huff_decode(const dict_entry *dict, char *out, const huff_word **in,
size_t *len, size_t *bits, size_t *off);
int huff_encode(struct huff_state *s,
size_t in_len, valtype *in,
size_t out_len, int *offset, char *out);
#endif
#include "huffman.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
struct huff_state *st;
huff_init(&st);
while (!feof(stdin)) {
char buf[BUFSIZ];
if (fgets(buf, sizeof buf, stdin)) {
valtype v;
double p;
if (sscanf(buf, "%hhx = %lf", &v, &p) == 2) {
huff_add(st, v, p);
} else {
fprintf(stderr, "Bad input `%s'\n", buf);
break;
}
} else break;
}
huff_build(st);
dict_entry *dict = NULL;
int entries = huff_make_dict(st, &dict);
fwrite(dict, sizeof *dict, entries, stdout);
huff_destroy(st);
return 0;
}
#include "huffman.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
if (argc != 2)
return 1;
size_t dict_used = 0, dict_size = 64;
dict_entry *dict = calloc(dict_size, sizeof *dict);
{
FILE *dict_file = fopen(argv[1], "rb");
if (!dict_file)
abort();
while (!feof(dict_file) && !ferror(dict_file)) {
while (dict_size < 2 * dict_used)
dict = realloc(dict, (dict_size *= 2) * sizeof *dict);
size_t count = fread(&dict[dict_used], sizeof *dict,
dict_size - dict_used, dict_file);
dict_used += count;
}
fclose(dict_file);
}
while (!feof(stdin) && !ferror(stdin)) {
huff_word message[2] = { 0 }, *m = message;
size_t len = fread(m, 1, (sizeof *m) * 16, stdin);
size_t bits = len * CHAR_BIT;
size_t offset = 0;
while (len && offset < bits) {
char val = '\0';
int found = huff_decode(dict, &val, &m, &len, &bits, &offset);
if (found)
fputc(val, stdout);
}
}
}
#include "huffman.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
struct huff_state *st;
huff_init(&st);
while (!feof(stdin)) {
char buf[BUFSIZ];
if (fgets(buf, sizeof buf, stdin)) {
valtype v;
double p;
if (sscanf(buf, "%hhx = %lf", &v, &p) == 2) {
huff_add(st, v, p);
} else {
fprintf(stderr, "Bad input `%s'\n", buf);
break;
}
} else break;
}
huff_build(st);
dict_entry *dict = NULL;
huff_make_dict(st, &dict);
int offset = 0;
unsigned char in[] = { 0xa, 0xb, 0xd, 0xc, 0xb, 0xb, 0xc, 0xc, 0xa, 0xc };
char out[2] = { 0xff, 0xff };
int left = sizeof in;
size_t start = left;
while (left) {
left = huff_encode(st, left, &in[start - left], sizeof out, &offset, out);
int partial = offset % CHAR_BIT;
fwrite(out, (sizeof out) - !!partial, 1, stdout);
offset = partial;
out[0] = out[sizeof(out) - 1]; // lap edges
}
huff_destroy(st);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment