Last active
February 5, 2025 09:50
-
-
Save vurtun/fa49e27e2408a365822f6aaa3b4f3a03 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> // For AVX2 | |
#define MAX_SIZE (64*1024) | |
#define BITS_PER_WORD 64 | |
#define WORD_CNT ((MAX_SIZE + BITS_PER_WORD - 1) / BITS_PER_WORD) // 1024 words | |
#define BIT_MAP_CNT ((WORD_CNT + BITS_PER_WORD - 1) / BITS_PER_WORD) // 16 words | |
#define WORD_SHIFT 6 // log2(BITS_PER_WORD) | |
#define WORD_MSK (BITS_PER_WORD - 1) // 63 | |
#define CHUNK_CNT (BIT_MAP_CNT/4) | |
#define CACHE_LINE_SIZE 64 | |
static struct my_data data[MAX_SIZE]; | |
static unsigned long long data_used[WORD_CNT] __attribute__((aligned(CACHE_LINE_SIZE))); | |
static unsigned long long data_used_top[BIT_MAP_CNT] __attribute__((aligned(CACHE_LINE_SIZE))); | |
static inline int | |
bit_ffz() { | |
const __m256i* ptr = (__m256i*)data_used_top; | |
int chnk_idx = 0; | |
unsigned long long msk = 0; | |
while (chnk_idx < CHUNK_CNT && | |
!(msk = ~_mm256_movemask_pd(_mm256_castsi256_pd(ptr[chnk_idx])))) { | |
chnk_idx++; | |
} | |
if (chnk_idx == CHUNK_CNT) { | |
return -1; // All bits are set | |
} | |
int qidx = __builtin_ctzll(msk); | |
int grp_idx = (chnk_idx << 2) + qidx; | |
unsigned long long word = ~data_used_top[grp_idx]; | |
int widx = (grp_idx << 6) | __builtin_ctzll(word); | |
unsigned long long target_word = ~data_used[widx]; | |
return (widx << 6) | __builtin_ctzll(target_word); | |
} | |
static inline void | |
bit_set(int idx) { | |
unsigned widx = idx >> WORD_SHIFT; // Divide by 64 | |
unsigned bidx = idx & WORD_MSK; // Modulo 64 | |
unsigned long long *word_ptr = &data_used[widx]; | |
unsigned long long msk = 1ULL << bidx; | |
unsigned long long old_value = *word_ptr; | |
unsigned long long new_value = old_value | msk; | |
*word_ptr = new_value; | |
unsigned top_idx = widx >> WORD_SHIFT; // Divide by 64 again | |
unsigned top_bit = widx & WORD_MSK; // Modulo 64 again | |
unsigned long long top_msk = 1ULL << top_bit; | |
unsigned long long is_full = -(new_value == ~0ULL); | |
data_used_top[top_idx] ^= ((data_used_top[top_idx] ^ is_full) & top_msk); | |
} | |
static inline void | |
bit_clr(int idx) { | |
unsigned int widx = idx >> WORD_SHIFT; | |
unsigned long long msk = ~(1ULL << (idx & WORD_MSK)); | |
unsigned long long* word_ptr = &data_used[widx]; | |
unsigned long long new_value = *word_ptr & msk; | |
*word_ptr = new_value; | |
unsigned int top_idx = widx >> 6; | |
unsigned long long top_msk = 1ULL << (widx & WORD_MSK); | |
unsigned long long is_not_empty = -(new_value != 0); | |
unsigned long long* top_ptr = &data_used_top[top_idx]; | |
*top_ptr ^= (*top_ptr ^ is_not_empty) & top_msk; | |
} | |
static inline int | |
bit_is_set(int idx) { | |
unsigned widx = idx >> WORD_SHIFT; // Divide by 64 | |
unsigned bidx = idx & WORD_MSK; // Modulo 64 | |
return !!(data_used[widx] & (1 << bidx)); | |
} | |
#if 0 | |
find_first_zero_bit: | |
mov rax, qword ptr [rip + top_level_bitmap@GOTPCREL] | |
vmovapd ymm0, ymmword ptr [rax] | |
vmovmskpd ecx, ymm0 | |
not rcx | |
rep bsf rcx, rcx | |
mov rax, qword ptr [rax + 8*rcx] | |
not rax | |
shl ecx, 6 | |
rep bsf rdx, rax | |
or edx, ecx | |
mov rax, qword ptr [rip + bit_array@GOTPCREL] | |
mov rax, qword ptr [rax + 8*rdx] | |
not rax | |
shl edx, 6 | |
rep bsf rax, rax | |
or eax, edx | |
vzeroupper | |
ret | |
set_bit: | |
mov ecx, edi | |
shr ecx, 6 | |
mov rax, qword ptr [rip + bit_array@GOTPCREL] | |
mov rdx, qword ptr [rax + 8*rcx] | |
bts rdx, rdi | |
mov esi, 1 | |
shl rsi, cl | |
mov qword ptr [rax + 8*rcx], rdx | |
xor eax, eax | |
cmp rdx, -1 | |
sete al | |
neg rax | |
shr edi, 12 | |
mov rcx, qword ptr [rip + top_level_bitmap@GOTPCREL] | |
mov rdx, qword ptr [rcx + 8*rdi] | |
xor rax, rdx | |
and rax, rsi | |
xor rax, rdx | |
mov qword ptr [rcx + 8*rdi], rax | |
ret | |
clear_bit: | |
mov eax, edi | |
mov rdx, -2 | |
mov ecx, edi | |
rol rdx, cl | |
shr eax, 6 | |
mov rsi, qword ptr [rip + bit_array@GOTPCREL] | |
mov r8d, 1 | |
mov ecx, eax | |
shl r8, cl | |
xor ecx, ecx | |
and qword ptr [rsi + 8*rax], rdx | |
setne cl | |
neg rcx | |
shr edi, 12 | |
mov rax, qword ptr [rip + top_level_bitmap@GOTPCREL] | |
mov rdx, qword ptr [rax + 8*rdi] | |
xor rcx, rdx | |
and rcx, r8 | |
xor rcx, rdx | |
mov qword ptr [rax + 8*rdi], rcx | |
ret | |
#endif | |
// init | |
/* nothing to do here */ | |
// insert store index elsewhere (if neccessary) | |
int idx = bit_ffz(); | |
bit_set(data_used, idx); | |
data[idx] = input; | |
// lookup using stored index | |
output = data[idx]; | |
// update existing stored index | |
data[idx] = input; | |
// remove existing stored element | |
bit_clr(data_used, idx); | |
// branchless iteration over all elements | |
for (int i = 0; i < MAX_SIZE; ++i) { | |
var = bit_is_set(data_used,i) ? ... : ...; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define MAX_SIZE (64*1024) | |
static struct my_data data[MAX_SIZE]; | |
static unsigned short free_idx[MAX_SIZE]; | |
static unsigned short free_idx_cnt = MAX_SIZE; | |
// init; populate indicies in reverse | |
for (int i = 0; i < MAX_SIZE; ++i) { | |
free_idx[i] = MAX_SIZE - i - 1; | |
} | |
// insert; store index elsewhere | |
index = free_idx[--free_idx_cnt]; | |
data[index] = ...input; | |
// lookup using stored index | |
output = data[idx]; | |
// update existing stored element | |
data[idx] = input; | |
// remove used stored element | |
data[idx].active = 0; | |
free_idx[free_idx_cnt++] = idx; | |
// branchless iterate over all elements | |
for (int i = 0; i < MAX_SIZE; ++i) { | |
var = data[i].active ? ... : ...; | |
} |
Author
vurtun
commented
Feb 4, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment