Last active
June 22, 2018 14:50
-
-
Save bit-hack/c369862f9f4869b0d9bdcb8413a20ba3 to your computer and use it in GitHub Desktop.
Addaptive shannon-fano coding table generation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <string> | |
#include <vector> | |
#include <stdio.h> | |
#include <stdlib.h> | |
static int _coin_toss() { return ((rand() % 100) < 80) ? 1 : 0; } | |
struct code_t { | |
uint8_t index; | |
uint8_t size; | |
uint32_t code; | |
}; | |
static code_t _encode(const std::string &x, uint32_t index) { | |
uint32_t val = 0; | |
for (const auto &c : x) { | |
val <<= 1; | |
val |= (c == '1') ? 1 : 0; | |
} | |
return code_t{uint8_t(index), uint8_t(x.size()), uint32_t(val)}; | |
} | |
struct node_t { | |
node_t() { | |
child[0] = nullptr; | |
child[1] = nullptr; | |
} | |
node_t *child[2]; | |
}; | |
struct tree_t { | |
node_t root; | |
void insert() { | |
node_t *n = &root; | |
while (true) { | |
int toss = _coin_toss(); | |
if (n->child[toss]) { | |
n = n->child[toss]; | |
} else { | |
n->child[toss] = new node_t; | |
return; | |
} | |
} | |
} | |
void create() { | |
for (uint32_t i = 2; i < 256; ++i) { | |
insert(); | |
} | |
} | |
void _traverse(std::vector<std::string> &prefixes, const node_t *n, | |
const std::string bits) { | |
if (n == nullptr) { | |
prefixes.push_back(bits); | |
} else { | |
#if 0 | |
_traverse(prefixes, n->child[0], bits + "0"); | |
_traverse(prefixes, n->child[1], bits + "1"); | |
#else | |
_traverse(prefixes, n->child[0], "0" + bits); | |
_traverse(prefixes, n->child[1], "1" + bits); | |
#endif | |
} | |
} | |
void traverse(std::vector<std::string> &prefixes) { | |
_traverse(prefixes, &root, ""); | |
} | |
}; | |
struct cmp_len_t { | |
bool operator()(const std::string &a, const std::string &b) const { | |
return a.size() < b.size(); | |
} | |
}; | |
struct cmp_code_t { | |
bool operator()(const code_t &a, const code_t &b) const { | |
for (uint32_t i = 0; i <= 31; ++i) { | |
const uint32_t bit = 1 << i; | |
const uint32_t xa = a.code & bit; | |
const uint32_t xb = b.code & bit; | |
if (xa == xb) | |
continue; | |
return xa >= xb; | |
} | |
} | |
}; | |
std::string to_bits(const uint32_t v) { | |
std::string out; | |
for (uint32_t i = 0; i <= 31; ++i) { | |
const uint32_t bit = 1 << i; | |
out = ((v & bit) ? "1" : "0") + out; | |
} | |
return out; | |
} | |
int main() { | |
std::vector<std::string> prefixes; | |
tree_t tree; | |
tree.create(); | |
tree.traverse(prefixes); | |
std::sort(prefixes.begin(), prefixes.end(), cmp_len_t{}); | |
std::vector<code_t> codes; | |
uint32_t index = 0; | |
for (const auto &s : prefixes) { | |
codes.push_back(_encode(s, index++)); | |
} | |
std::sort(codes.begin(), codes.end(), cmp_code_t{}); | |
for (const auto &c : codes) { | |
std::string b = to_bits(c.code); | |
printf("{ 0x%03x, 0x%02x, 0x%08x }, // %s\n", c.index, c.size, c.code, b.c_str()); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment