Created
November 3, 2024 20:55
-
-
Save leok7v/dde0911059cb56b7da726c5b3f2dfacd to your computer and use it in GitHub Desktop.
3 & 4 bytes map for lz77
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stdbool.h> | |
#include <stdint.h> | |
struct map { // single threaded use only | |
const void** p; // p[n] | |
size_t n; | |
uint32_t m; // mask 0xFFFFFF for 3 bytes and 0xFFFFFFFFu for 4 bytes | |
}; | |
static const size_t map_deleted_ = 0xC01DF00Du; | |
static const void* map_deleted = &map_deleted_; | |
static size_t map_prime_under(size_t n) { | |
static const size_t table_of_primes[] = { | |
3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, | |
32749, 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, | |
8388593, 16777213 | |
}; | |
assert(4 <= n && ((n - 1) & n) == 0 && n <= (1u << 24)); | |
uint8_t bit = 0; | |
size_t bits = 1 << 2; | |
while (bits < n) { bits <<= 1; bit++; } | |
return table_of_primes[bit]; | |
} | |
static inline uint32_t map_hash_key(uint32_t k) { | |
k ^= k >> 13; // Bob Jenkins' hash | |
k *= 0x85EBCA6Bu; | |
k ^= k >> 16; | |
return k; | |
} | |
static inline size_t map_hash(struct map* m, uint32_t k) { | |
return (size_t)(map_hash_key(k) % m->n); | |
} | |
static void map_init(struct map* m, uint8_t** p, size_t n, size_t b) { | |
if (!(16 < n && n <= (1u << 24) && 3 <= b && b <= 4)) { exit(1); } | |
memset(m, 0, sizeof(*m)); | |
m->p = p; | |
m->n = map_prime_under(n); | |
m->m = b == 3 ? 0x00FFFFFFu : 0xFFFFFFFFu; | |
memset(m->p, 0, n * sizeof(m->p[0])); | |
} | |
static inline void map_reduce_chain(struct map* m, size_t i) { | |
const size_t n = m->n; | |
size_t e = i + 1; // end of deleted sequence | |
while (e < i + n && m->p[e % n] == map_deleted) { e++; } | |
if (!m->p[e % n]) { | |
for (size_t j = i; j < e; j++) { m->p[j % n] = (void*)0; } | |
} | |
} | |
static inline bool map_get_hashed(struct map* m, uint32_t k, size_t i, | |
size_t* ix) { | |
const uint32_t mask = m->m; | |
bool reduced = false; | |
while (m->p[i]) { | |
const bool deleted = m->p[i] == map_deleted; | |
const bool not_deleted_and_equal = !deleted && | |
(mask & *((uint32_t*)m->p[i])) == k; | |
if (not_deleted_and_equal) { | |
*ix = i; | |
return true; | |
} else { | |
if (deleted && !reduced) { | |
map_reduce_chain(m, i); | |
reduced = true; | |
} | |
i = (i + 1) % m->n; | |
} | |
} | |
return false; | |
} | |
static inline bool map_get3(struct map* m, uint32_t b3, size_t* ix) { | |
assert((b3 & ~0xFFFFFFu) == 0 && m->m == 0x00FFFFFFu); | |
return map_get_hashed(m, b3, map_hash(m, b3), ix); | |
} | |
static inline bool map_get4(struct map* m, uint32_t b4, size_t* ix) { | |
assert(m->m == 0xFFFFFFFFu); | |
return map_get_hashed(m, b4, map_hash(m, b4), ix); | |
} | |
static inline void map_remove(struct map* m, size_t ix) { | |
m->p[ix] = m->p[(ix + 1) % m->n] ? map_deleted : (void*)0; | |
if (m->p[ix]) { map_reduce_chain(m, ix); } | |
} | |
static inline bool map_put(struct map* m, const void* p, const size_t h, | |
uint32_t k) { | |
const uint32_t mask = m->m; | |
size_t d = (size_t)-1; | |
size_t i = h; | |
while (m->p[i]) { | |
const bool deleted = m->p[i] == map_deleted; | |
const bool not_deleted_and_equal = !deleted && | |
(mask & *((uint32_t*)m->p[i])) == k; | |
if (not_deleted_and_equal) { | |
m->p[i] = p; | |
return true; | |
} else { | |
if (deleted && d == (size_t)-1) { d = i; } | |
i = (i + 1) % m->n; | |
if (i == h) { | |
if (d == (size_t)-1) { return false; } | |
break; | |
} | |
} | |
} | |
if (d != (size_t)-1) { | |
map_reduce_chain(m, d); | |
m->p[d] = p; | |
} else { | |
m->p[i] = p; | |
} | |
return true; | |
} | |
static inline bool map_put3(struct map* m, const void* p, uint32_t b3) { | |
assert((b3 & ~0xFFFFFFu) == 0 && m->m == 0x00FFFFFFu); | |
return map_put(m, p, map_hash(m, b3), b3); | |
} | |
static inline bool map_put4(struct map* m, const void* p, uint32_t b4) { | |
assert(m->m == 0xFFFFFFFFu); | |
return map_put(m, p, map_hash(m, b4), b4); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Future improvements:
It is possible to implement circular window[1 << max_window] and
move start pointer putting incoming LZ77 byte inside it.
This will allow to replace direct pointers to memory in the maps
by 32-bit unsigned offsets from the beginning of the window.
Why 32 bits not 16 because offset of the next byte pointer in lz77
is max_window (64KB) bytes away from the beginning of the window
offset. UINT32_MAX can be used as empty entry and UINT32_MAX - 1
as deleted.
It will reduce the memory bandwidth for p[*] pointers and half
their size on 64bit architectures and will also make stream
processing possible.
Comparing to 3 and 4 bytes not yet in the window is no issue
but expanding match beyond that may present a bit of complexity
in lz77 itself (wrap around bytes inside the window and pointer
to max_size incoming bytes). Not a big deal - doable.
Not a goal at the moment.