Created
March 1, 2012 04:54
-
-
Save Wollw/1947371 to your computer and use it in GitHub Desktop.
Wavelet Tree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* An implementation of the Wavelet Tree data structure. | |
* It is similar to a binary search tree using bits. | |
* | |
* More info here: | |
* http://siganakis.com/challenge-design-a-data-structure-thats-small | |
* http://www.alexbowe.com/wavelet-trees | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <limits.h> | |
#include <string.h> | |
#include <assert.h> | |
#define SET_BIT(a, i) (a[i/(sizeof(*a)*CHAR_BIT)]\ | |
|= 1 << (i%(sizeof(*a)*CHAR_BIT))) | |
#define CLR_BIT(a, i) (a[i/(sizeof(*a)*CHAR_BIT)]\ | |
&= ~(1 << (i%(sizeof(*a)*CHAR_BIT)))) | |
typedef struct wavelet_t { | |
unsigned char *table; | |
struct wavelet_t *left; | |
struct wavelet_t *right; | |
} wavelet_t; | |
wavelet_t * wavelet_encode(char *str, size_t len, char min, char max); | |
char * wavelet_decode(char *str, wavelet_t *w); | |
char wavelet_decode_index(wavelet_t *w, size_t i, char min, char max); | |
void wavelet_free(wavelet_t *w); | |
int main(int argc, char **argv) { | |
char *str = "Hello, world!"; | |
if (argc > 1) { | |
str = argv[1]; | |
} | |
wavelet_t *w = wavelet_encode(str, strlen(str)+1, 0, 127); | |
str = malloc(strlen(str)+1 * sizeof(char)); | |
assert(str != NULL); | |
puts(wavelet_decode(str, w)); | |
free(str); | |
wavelet_free(w); | |
return 0; | |
} | |
wavelet_t * wavelet_encode(char *str, size_t len, char min, char max) { | |
wavelet_t *w = malloc(sizeof(wavelet_t)); | |
assert(w != NULL); | |
w->left = NULL; | |
w->right = NULL; | |
w->table = malloc(len * sizeof(char)); | |
assert(w->table != NULL); | |
char *str_l, *str_r; | |
size_t len_l, len_r; | |
str_l = malloc(len * sizeof(char)); | |
assert(str_l != NULL); | |
str_r = malloc(len * sizeof(char)); | |
assert(str_r != NULL); | |
len_l = 0; | |
len_r = 0; | |
/* | |
* Each char in the allocated space can hold CHAR_BIT bits | |
* so for each character being encoded we divide by CHAR_BIT | |
* to get the current byte to store in. (i % CHAR_BIT) gives | |
* the offset into that byte we need to store the next bit at. | |
* | |
* The characters themselves are sorted into left and right char | |
* arrays that will be passed to the next call of this function | |
* to repeat until there are only two values to encode. | |
*/ | |
size_t i; | |
char mid = min+(max-min)/2; | |
for (i = 0; i < len; i++) { | |
if (str[i] > mid) { | |
SET_BIT(w->table, i); | |
str_r[len_r++] = str[i]; | |
} else { | |
CLR_BIT(w->table, i); | |
str_l[len_l++] = str[i]; | |
} | |
} | |
if (max - min > 1) { | |
w->left = wavelet_encode(str_l, len_l, min, mid); | |
w->right = wavelet_encode(str_r, len_r, mid + 1, max); | |
} | |
free(str_l); | |
free(str_r); | |
return w; | |
} | |
char * wavelet_decode(char *str, wavelet_t *w) { | |
size_t i = 0; | |
while ((str[i] = wavelet_decode_index(w, i, 0, 127))) i++; | |
return str; | |
} | |
/* | |
* We keep track of the min and max values so that we can return the | |
* character code for the index being decoded based on it. I doubt this is | |
* the best way to do it but it's what I came up with. | |
*/ | |
char wavelet_decode_index(wavelet_t *w, size_t i, char min, char max) { | |
char mid = min + (max-min)/2; | |
char bit = w->table[i/CHAR_BIT] & 1 << (i % CHAR_BIT) ? 1 : 0; | |
char b; | |
size_t j; | |
size_t k = 0; | |
for (j = 0; j < i; j++) { | |
b = w->table[j/CHAR_BIT] & 1 << (j % CHAR_BIT) ? 1 : 0; | |
k += b == bit ? 1 : 0; | |
} | |
w = bit ? w->right : w->left; | |
if (w == NULL) | |
return min + (bit ? 1 : 0); | |
else if (bit) | |
return wavelet_decode_index(w, k, mid+1, max); | |
else | |
return wavelet_decode_index(w, k, min, mid); | |
} | |
void wavelet_free(wavelet_t *w) { | |
if (w->left) { | |
wavelet_free(w->left); | |
w->left = NULL; | |
} | |
if (w->right) { | |
wavelet_free(w->right); | |
w->right = NULL; | |
} | |
free(w->table); | |
free(w); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment