Created
April 28, 2018 21:16
-
-
Save teryror/40e0d55e14375a55fff434c308b4e7af to your computer and use it in GitHub Desktop.
Approximate algorithm for generating length-limited prefix codes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/******************************************************************************* | |
Author: Tristan Dannenberg | |
Notice: No warranty is offered or implied; use this code at your own risk. | |
******************************************************************************** | |
Approximate algorithm for generating length-limited prefix codes. | |
I came up with this idea while working on an animation of Huffman's algorithm, | |
and had a convincing informal argument for it being optimal. | |
Comparison with Package/Merge shows this to be false for heavily skewed symbol | |
distributions. If the pressure is low, it seems to work fine, and the highest | |
error I've seen was a few percent. | |
I'm unsure how suitable it would be as a replacement of the well-known linear | |
time heuristical algorithm [1]; this algorithm runs in O(n*L*log n) time, but it | |
does not require its input to be sorted. As I understand it, sorting can become | |
a bottle-neck in the code build phase of some encoders. | |
Before putting this into production code, you may want to do a termination | |
proof; I have observed infinite loops in other variations of this algorithm, | |
though not in this one. Again, use this code at your own risk. | |
[1]: http://cbloomrants.blogspot.de/2010/07/07-03-10-length-limitted-huffman-codes.html | |
*******************************************************************************/ | |
#include "stdint.h" | |
#include "math.h" | |
typedef struct { | |
float weight; | |
uint16_t symbol; | |
uint16_t codelen; | |
} Symbol; | |
//--- Priority Queue Implementation: | |
#define HeapLeft(i) ((i) * 2) | |
#define HeapRight(i) ((i) * 2 + 1) | |
static inline void | |
Heapify(Symbol * Heap, int HeapSize, int Idx) | |
{ | |
int l = HeapLeft(Idx); | |
int r = HeapRight(Idx); | |
int min; | |
if (l <= HeapSize && Heap[l].weight <= Heap[Idx].weight) { | |
min = l; | |
} else { | |
min = Idx; | |
} | |
if (r <= HeapSize && Heap[r].weight <= Heap[min].weight) { | |
min = r; | |
} | |
if (min != Idx) { | |
Symbol tmp = Heap[Idx]; | |
Heap[Idx] = Heap[min]; | |
Heap[min] = tmp; | |
Heapify(Heap, HeapSize, min); | |
} | |
} | |
//--- | |
static void | |
GenerateCodeLengths(int MaxCodeLen, | |
const int *Weights, | |
int *CodeLengths, | |
int SymbolCount) | |
{ | |
Symbol * queue = calloc(SymbolCount + 1, sizeof(Symbol)); | |
for (int i = 0; i < SymbolCount; ++i) { | |
queue[i + 1].weight = (float)Weights[i]; | |
queue[i + 1].symbol = (uint16_t)i; | |
queue[i + 1].codelen = 1; | |
} | |
// Build the heap structure | |
for (int i = SymbolCount / 2; i >= 1; --i) { | |
Heapify(queue, SymbolCount, i); | |
} | |
// K = SymbolCount / 2 in 32.32 fixed point | |
uint64_t K = (uint64_t)SymbolCount << 31; | |
while (K > (1ULL << 32)) { // while K > 1.0 | |
// Remove symbol from future consideration if it were to overshoot our target K = 1 | |
uint64_t dK = 1ULL << (32 - (queue[1].codelen + 1)); | |
if (K - dK < (1ULL << 32)) { | |
queue[1].weight = INFINITY; | |
Heapify(queue, SymbolCount, 1); | |
continue; | |
} | |
if (++queue[1].codelen == MaxCodeLen) { | |
queue[1].weight = INFINITY; | |
} else { | |
queue[1].weight *= 2; | |
} | |
K -= dK; | |
// Update the heap structure | |
Heapify(queue, SymbolCount, 1); | |
} | |
for (int i = 1; i <= SymbolCount; ++i) { | |
int SymIdx = queue[i].symbol; | |
CodeLengths[SymIdx] = queue[i].codelen; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment