Skip to content

Instantly share code, notes, and snippets.

@leok7v
Created November 3, 2024 20:55
Show Gist options
  • Save leok7v/dde0911059cb56b7da726c5b3f2dfacd to your computer and use it in GitHub Desktop.
Save leok7v/dde0911059cb56b7da726c5b3f2dfacd to your computer and use it in GitHub Desktop.
3 & 4 bytes map for lz77
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
struct map { // single threaded use only
const void** p; // p[n]
size_t n;
uint32_t m; // mask 0xFFFFFF for 3 bytes and 0xFFFFFFFFu for 4 bytes
};
static const size_t map_deleted_ = 0xC01DF00Du;
static const void* map_deleted = &map_deleted_;
static size_t map_prime_under(size_t n) {
static const size_t table_of_primes[] = {
3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381,
32749, 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301,
8388593, 16777213
};
assert(4 <= n && ((n - 1) & n) == 0 && n <= (1u << 24));
uint8_t bit = 0;
size_t bits = 1 << 2;
while (bits < n) { bits <<= 1; bit++; }
return table_of_primes[bit];
}
static inline uint32_t map_hash_key(uint32_t k) {
k ^= k >> 13; // Bob Jenkins' hash
k *= 0x85EBCA6Bu;
k ^= k >> 16;
return k;
}
static inline size_t map_hash(struct map* m, uint32_t k) {
return (size_t)(map_hash_key(k) % m->n);
}
static void map_init(struct map* m, uint8_t** p, size_t n, size_t b) {
if (!(16 < n && n <= (1u << 24) && 3 <= b && b <= 4)) { exit(1); }
memset(m, 0, sizeof(*m));
m->p = p;
m->n = map_prime_under(n);
m->m = b == 3 ? 0x00FFFFFFu : 0xFFFFFFFFu;
memset(m->p, 0, n * sizeof(m->p[0]));
}
static inline void map_reduce_chain(struct map* m, size_t i) {
const size_t n = m->n;
size_t e = i + 1; // end of deleted sequence
while (e < i + n && m->p[e % n] == map_deleted) { e++; }
if (!m->p[e % n]) {
for (size_t j = i; j < e; j++) { m->p[j % n] = (void*)0; }
}
}
static inline bool map_get_hashed(struct map* m, uint32_t k, size_t i,
size_t* ix) {
const uint32_t mask = m->m;
bool reduced = false;
while (m->p[i]) {
const bool deleted = m->p[i] == map_deleted;
const bool not_deleted_and_equal = !deleted &&
(mask & *((uint32_t*)m->p[i])) == k;
if (not_deleted_and_equal) {
*ix = i;
return true;
} else {
if (deleted && !reduced) {
map_reduce_chain(m, i);
reduced = true;
}
i = (i + 1) % m->n;
}
}
return false;
}
static inline bool map_get3(struct map* m, uint32_t b3, size_t* ix) {
assert((b3 & ~0xFFFFFFu) == 0 && m->m == 0x00FFFFFFu);
return map_get_hashed(m, b3, map_hash(m, b3), ix);
}
static inline bool map_get4(struct map* m, uint32_t b4, size_t* ix) {
assert(m->m == 0xFFFFFFFFu);
return map_get_hashed(m, b4, map_hash(m, b4), ix);
}
static inline void map_remove(struct map* m, size_t ix) {
m->p[ix] = m->p[(ix + 1) % m->n] ? map_deleted : (void*)0;
if (m->p[ix]) { map_reduce_chain(m, ix); }
}
static inline bool map_put(struct map* m, const void* p, const size_t h,
uint32_t k) {
const uint32_t mask = m->m;
size_t d = (size_t)-1;
size_t i = h;
while (m->p[i]) {
const bool deleted = m->p[i] == map_deleted;
const bool not_deleted_and_equal = !deleted &&
(mask & *((uint32_t*)m->p[i])) == k;
if (not_deleted_and_equal) {
m->p[i] = p;
return true;
} else {
if (deleted && d == (size_t)-1) { d = i; }
i = (i + 1) % m->n;
if (i == h) {
if (d == (size_t)-1) { return false; }
break;
}
}
}
if (d != (size_t)-1) {
map_reduce_chain(m, d);
m->p[d] = p;
} else {
m->p[i] = p;
}
return true;
}
static inline bool map_put3(struct map* m, const void* p, uint32_t b3) {
assert((b3 & ~0xFFFFFFu) == 0 && m->m == 0x00FFFFFFu);
return map_put(m, p, map_hash(m, b3), b3);
}
static inline bool map_put4(struct map* m, const void* p, uint32_t b4) {
assert(m->m == 0xFFFFFFFFu);
return map_put(m, p, map_hash(m, b4), b4);
}
@leok7v
Copy link
Author

leok7v commented Nov 3, 2024

Future improvements:

It is possible to implement circular window[1 << max_window] and
move start pointer putting incoming LZ77 byte inside it.
This will allow to replace direct pointers to memory in the maps
by 32-bit unsigned offsets from the beginning of the window.
Why 32 bits not 16 because offset of the next byte pointer in lz77
is max_window (64KB) bytes away from the beginning of the window
offset. UINT32_MAX can be used as empty entry and UINT32_MAX - 1
as deleted.

It will reduce the memory bandwidth for p[*] pointers and half
their size on 64bit architectures and will also make stream
processing possible.

Comparing to 3 and 4 bytes not yet in the window is no issue
but expanding match beyond that may present a bit of complexity
in lz77 itself (wrap around bytes inside the window and pointer
to max_size incoming bytes). Not a big deal - doable.

Not a goal at the moment.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment