Last active
December 17, 2015 04:09
-
-
Save kulp/f2dddb8980e9b84c4281 to your computer and use it in GitHub Desktop.
Huffman encoder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "huffman.h" | |
int huff_decode(const dict_entry *dict, char *out, const huff_word **in, | |
size_t *len, size_t *bits, size_t *off) | |
{ | |
const dict_entry *n = dict; | |
int found = 0; | |
while (!found && *len && *off < *bits) { | |
unsigned char bit = (**in >> *off) & 1; | |
if (bit) { | |
found = n->rleaf; | |
n += n->offset + 2; | |
} else { | |
// Left nodes are stored implicitly. This implies that the most | |
// efficient dictionary will be skewed right, so that there are not | |
// long spans of left-nodes to count over. | |
found = n->lleaf; | |
n++; | |
} | |
if (++*off == huff_wordsize && --*len != 0) { | |
++*in; | |
*off = 0; | |
*bits -= huff_wordsize; | |
} | |
} | |
if (found) | |
*out = n->word; | |
return found; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "huff_private.h" | |
static void walk_nodes(const struct huff_node *node, | |
const struct huff_node **out) | |
{ | |
if (node->leaf) { | |
out[node->val] = node; | |
} else { | |
walk_nodes(node->left , out); | |
walk_nodes(node->right, out); | |
} | |
} | |
static int encode_bits(const struct huff_node *node, | |
int max, int *offset, char *out) | |
{ | |
if (*offset >= max) | |
return 1; // failure to fit | |
if (node && node->parent) { | |
encode_bits(node->parent, max, offset, out); | |
long bit = 1 << (*offset % CHAR_BIT); | |
if (node->parent->right == node) | |
out[*offset / CHAR_BIT] |= bit; | |
else | |
out[*offset / CHAR_BIT] &= ~bit; | |
++*offset; | |
} | |
return 0; | |
} | |
int huff_encode(struct huff_state *s, | |
size_t in_len, valtype *in, | |
size_t out_len, int *offset, char *out) | |
{ | |
int rc = 0; | |
const struct huff_node *lookup[1 << (CHAR_BIT * sizeof(valtype))] = { 0 }; | |
walk_nodes(s->head, lookup); | |
valtype x; | |
while (!rc && in_len > 0 && (x = *in++)) { | |
int bits = *offset; | |
rc = encode_bits(lookup[x], out_len * CHAR_BIT, &bits, out); | |
if (!rc) { | |
*offset = bits; | |
in_len--; | |
} | |
} | |
return in_len; // nonzero means we didn't finish encoding | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef HUFF_PRIVATE_H_ | |
#define HUFF_PRIVATE_H_ | |
#include "huffman.h" | |
struct huff_node { | |
struct huff_node *left, *right, *parent; | |
valtype val; | |
unsigned long weight; | |
unsigned leaf:1; | |
}; | |
struct huff_state { | |
// size is the allocated size of queue ; used is the number of slots used | |
// in queue. When used grows to be equal to size, realloc(). | |
size_t size, used; | |
// All nodes are stored consecutively in queue, but only the ones between | |
// head and tail are considered "in the queue" ; that is, when head == | |
// tail, the queue is empty, and when (tail - head == 1), there is one | |
// node in the queue (end condition). The queue is realloc'ed as needed | |
// when new nodes are added, whether leaf nodes or internal. | |
struct huff_node *queue, *head, *tail; | |
unsigned built:1; | |
}; | |
#endif | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "huff_private.h" | |
#include <stddef.h> | |
#include <stdlib.h> | |
int huff_init(struct huff_state **_s) | |
{ | |
struct huff_state *s = *_s = malloc(sizeof *s); | |
s->size = 64; | |
s->used = 0; | |
s->queue = calloc(s->size, sizeof *s->queue); | |
s->head = s->tail = s->queue; | |
return 0; | |
} | |
static int weight_cmp(const void *_a, const void *_b) | |
{ | |
const struct huff_node *a = _a, | |
*b = _b; | |
return a->weight - b->weight; | |
} | |
static void ensure_order(struct huff_state *s) | |
{ | |
// It would be better to insert in place instead of resorting an | |
// almost-sorted list every time, especially since this is a worst-case | |
// scenario for quicksort. | |
qsort(s->head, s->tail - s->head, sizeof *s->queue, weight_cmp); | |
} | |
// ensures there is space for at least one more node | |
static int ensure_space(struct huff_state *s) | |
{ | |
if (s->used < s->size) | |
return 0; | |
ptrdiff_t h = s->head - s->queue, | |
t = s->tail - s->queue; | |
while (s->used >= s->size && s->queue) | |
s->queue = realloc(s->queue, (s->size *= 2) * sizeof *s->queue); | |
s->head = &s->queue[h]; | |
s->tail = &s->queue[t]; | |
return !s->queue; | |
} | |
int huff_add(struct huff_state *s, valtype val, unsigned long weight) | |
{ | |
if (s->built || ensure_space(s)) | |
return -1; | |
s->used++; | |
struct huff_node *q = s->tail++; | |
q->left = NULL; | |
q->right = NULL; | |
q->val = val; | |
q->weight = weight; | |
q->leaf = 1; | |
ensure_order(s); | |
return 0; | |
} | |
static int link_parents(struct huff_node *node) | |
{ | |
if (node->leaf) | |
return 1; | |
node->left->parent = node->right->parent = node; | |
return link_parents(node->left) + link_parents(node->right); | |
} | |
int huff_build(struct huff_state *s) | |
{ | |
while ((ensure_order(s), s->tail - s->head > 1)) { | |
struct huff_node *a = s->head++, | |
*b = s->head++; | |
if (ensure_space(s)) | |
return -1; | |
// TODO coalesce nodes to the beginning of queue (reuse space) | |
s->used++; | |
struct huff_node *c = s->tail++; | |
c->left = a; | |
c->right = b; | |
c->weight = a->weight + b->weight; | |
c->leaf = 0; | |
} | |
// parent fields can't be written until after ensure_order() will | |
// never move nodes around again | |
s->built = 1; | |
link_parents(s->head); | |
return 0; | |
} | |
static int build_dict(const struct huff_node *node, dict_entry *curr) | |
{ | |
if (node->leaf) { | |
curr->word = node->val; | |
return 1; | |
} else { | |
int leftnodes = build_dict(node->left, curr + 1); | |
int rightnodes = build_dict(node->right, curr + 1 + leftnodes); | |
curr->lleaf = leftnodes == 1; | |
curr->rleaf = rightnodes == 1; | |
curr->offset = leftnodes - 1; | |
return leftnodes + rightnodes + 1; | |
} | |
} | |
int huff_make_dict(struct huff_state *s, dict_entry **dict) | |
{ | |
*dict = calloc(s->used, sizeof **dict); | |
build_dict(s->head, *dict); | |
return s->used; | |
} | |
void huff_destroy(struct huff_state *s) | |
{ | |
free(s->queue); | |
free(s); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef HUFFMAN_H_ | |
#define HUFFMAN_H_ | |
#include <stddef.h> | |
#include <limits.h> | |
struct huff_state; | |
typedef unsigned char valtype; | |
typedef union dict_entry { | |
valtype word; | |
struct { | |
unsigned int lleaf :1; | |
unsigned int rleaf :1; | |
unsigned int offset :(sizeof(valtype) * CHAR_BIT - 1 - 1); | |
}; | |
} dict_entry; | |
typedef unsigned int huff_word; | |
static const size_t huff_wordsize = CHAR_BIT * sizeof(huff_word); | |
int huff_init(struct huff_state **s); | |
int huff_add(struct huff_state *s, valtype val, unsigned long weight); | |
int huff_build(struct huff_state *s); | |
void huff_destroy(struct huff_state *s); | |
int huff_make_dict(struct huff_state *s, dict_entry **dict); | |
int huff_decode(const dict_entry *dict, char *out, const huff_word **in, | |
size_t *len, size_t *bits, size_t *off); | |
int huff_encode(struct huff_state *s, | |
size_t in_len, valtype *in, | |
size_t out_len, int *offset, char *out); | |
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "huffman.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
int main(int argc, char *argv[]) | |
{ | |
struct huff_state *st; | |
huff_init(&st); | |
while (!feof(stdin)) { | |
char buf[BUFSIZ]; | |
if (fgets(buf, sizeof buf, stdin)) { | |
valtype v; | |
double p; | |
if (sscanf(buf, "%hhx = %lf", &v, &p) == 2) { | |
huff_add(st, v, p); | |
} else { | |
fprintf(stderr, "Bad input `%s'\n", buf); | |
break; | |
} | |
} else break; | |
} | |
huff_build(st); | |
dict_entry *dict = NULL; | |
int entries = huff_make_dict(st, &dict); | |
fwrite(dict, sizeof *dict, entries, stdout); | |
huff_destroy(st); | |
return 0; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "huffman.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
int main(int argc, char *argv[]) | |
{ | |
if (argc != 2) | |
return 1; | |
size_t dict_used = 0, dict_size = 64; | |
dict_entry *dict = calloc(dict_size, sizeof *dict); | |
{ | |
FILE *dict_file = fopen(argv[1], "rb"); | |
if (!dict_file) | |
abort(); | |
while (!feof(dict_file) && !ferror(dict_file)) { | |
while (dict_size < 2 * dict_used) | |
dict = realloc(dict, (dict_size *= 2) * sizeof *dict); | |
size_t count = fread(&dict[dict_used], sizeof *dict, | |
dict_size - dict_used, dict_file); | |
dict_used += count; | |
} | |
fclose(dict_file); | |
} | |
while (!feof(stdin) && !ferror(stdin)) { | |
huff_word message[2] = { 0 }, *m = message; | |
size_t len = fread(m, 1, (sizeof *m) * 16, stdin); | |
size_t bits = len * CHAR_BIT; | |
size_t offset = 0; | |
while (len && offset < bits) { | |
char val = '\0'; | |
int found = huff_decode(dict, &val, &m, &len, &bits, &offset); | |
if (found) | |
fputc(val, stdout); | |
} | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "huffman.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
int main(int argc, char *argv[]) | |
{ | |
struct huff_state *st; | |
huff_init(&st); | |
while (!feof(stdin)) { | |
char buf[BUFSIZ]; | |
if (fgets(buf, sizeof buf, stdin)) { | |
valtype v; | |
double p; | |
if (sscanf(buf, "%hhx = %lf", &v, &p) == 2) { | |
huff_add(st, v, p); | |
} else { | |
fprintf(stderr, "Bad input `%s'\n", buf); | |
break; | |
} | |
} else break; | |
} | |
huff_build(st); | |
dict_entry *dict = NULL; | |
huff_make_dict(st, &dict); | |
int offset = 0; | |
unsigned char in[] = { 0xa, 0xb, 0xd, 0xc, 0xb, 0xb, 0xc, 0xc, 0xa, 0xc }; | |
char out[2] = { 0xff, 0xff }; | |
int left = sizeof in; | |
size_t start = left; | |
while (left) { | |
left = huff_encode(st, left, &in[start - left], sizeof out, &offset, out); | |
int partial = offset % CHAR_BIT; | |
fwrite(out, (sizeof out) - !!partial, 1, stdout); | |
offset = partial; | |
out[0] = out[sizeof(out) - 1]; // lap edges | |
} | |
huff_destroy(st); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment