Last active
October 13, 2023 09:23
-
-
Save miura1729/ed0d060f5ba7ebb0bc9092acc5702525 to your computer and use it in GitHub Desktop.
Can store 64000 data in 65536 buckets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "wyhash32.h" | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <string.h> | |
typedef int mh_key_t; | |
typedef int mh_value_t; | |
int pure; | |
int crash; | |
int crash2; | |
int move; | |
int para; | |
int mpara; | |
unsigned | |
hash_func(int key) { | |
return wy32x32(key, 0x9E3779B9ul); | |
} | |
typedef struct mhash { | |
uint64_t *exists; | |
mh_key_t *keys; | |
mh_value_t *values; | |
uint32_t *same_hash; | |
int *prev_hv; | |
int n_buckets; | |
int current_prev_hv; | |
} mhash; | |
const int same_hash_size = 16; | |
uint64_t ror(uint64_t *base, int sn, int basesize) | |
{ | |
if (sn < 0) sn += basesize; | |
int snf = sn + 31; | |
if (snf > basesize) snf -= basesize; | |
int snfpos = snf / 64; | |
int snfoff = snf & 63; | |
uint64_t snfbit = (snfoff == 63) ? -1 : (1ull << (snfoff + 1)) - 1; | |
int snb = sn - 32; | |
if (snb < 0) snb += basesize; | |
int snbpos = snb / 64; | |
int snboff = snb & 63; | |
uint64_t snbbit = (1ull << (64 - snboff + 1)) - 1;; | |
if (snboff == 1) { | |
snbbit = -1; | |
} | |
if (0 && snfpos == snbpos) { | |
/* Same bitmap */ | |
uint64_t bitmap = base[snfpos]; | |
uint64_t snbit = (1ll << (snb + 1)) - 1; | |
return (((bitmap & snbit) << (64 - snboff)) | (bitmap >> snboff)); | |
} | |
else { | |
uint64_t fbit = base[snfpos]; | |
uint64_t bbit = base[snbpos]; | |
fbit = (fbit & snfbit) << (64 - snboff); | |
bbit = (bbit >> snboff) & snbbit; | |
return fbit | bbit; | |
} | |
} | |
mhash *mh_init(int size) { | |
mhash *h = malloc(sizeof(mhash)); | |
h->n_buckets = size; | |
h->exists = calloc((size + 63) / 64, sizeof(uint64_t)); | |
h->keys = calloc(size, sizeof(mh_key_t)); | |
h->values = calloc(size, sizeof(mh_key_t)); | |
h->same_hash = calloc(size, sizeof(uint32_t)); | |
h->current_prev_hv = size; | |
h->prev_hv = calloc(size, sizeof(int)); | |
for (int i = 0; i < size; i++) { | |
h->prev_hv[i] = -size; | |
} | |
return h; | |
} | |
int mh_put(mhash *h, mh_key_t key) { | |
int hv = hash_func(key) % h->n_buckets; | |
retry0: | |
uint64_t hv_bitmap = (1ull << (hv & 63)); | |
if ((h->exists[hv / 64] & hv_bitmap) == 0) { | |
/* no confilct hash value and entry is fresh */ | |
h->exists[hv / 64] |= hv_bitmap; | |
h->keys[hv] = key; | |
h->same_hash[hv] = 1; | |
h->prev_hv[hv] = -h->current_prev_hv; | |
h->current_prev_hv = hv; | |
pure++; | |
return hv; | |
} | |
else if (h->same_hash[hv] & 1) { | |
/* useing entry for same hash value */ | |
uint64_t albitmap = ror(h->exists, hv, h->n_buckets); | |
uint64_t alorigin = albitmap >> 16; | |
uint64_t alexits = albitmap >> 32; | |
int empoff; | |
int lopop = __builtin_popcount(alorigin & ((1ull << 17) - 1)); | |
int hipop = __builtin_popcount(alorigin >> 32); | |
if (lopop > hipop) { | |
empoff = 31 - __builtin_clz((~(uint32_t)alexits) & ((1ul << same_hash_size) - 1)); | |
} | |
else { | |
empoff = __builtin_ctzll(alexits + 1); | |
} | |
if (empoff > 0 && empoff < same_hash_size) { | |
int newent = (empoff + hv) % h->n_buckets; | |
h->same_hash[hv] |= (1 << empoff); | |
h->exists[newent / 64] |= (1ull << (newent & 63)); | |
h->same_hash[newent] = (empoff << 1); | |
h->keys[newent] = key; | |
crash++; | |
return newent; | |
} | |
else { | |
for (int i = 1; i < same_hash_size; i++) { | |
int srcent = (hv + i) % h->n_buckets; | |
uint16_t csh = h->same_hash[srcent]; | |
if ((csh & 1) == 0 && csh != 0) { | |
csh >>= 1; | |
int hv2 = (srcent - csh) % h->n_buckets; | |
if (hv2 < 0) { | |
hv2 += h->n_buckets; | |
} | |
uint64_t alexits2 = ror(h->exists, hv2, h->n_buckets) >> 32; | |
int empoff = __builtin_ctzll(alexits2 + 1); | |
if (empoff < same_hash_size) { | |
int newent = (empoff + hv2) % h->n_buckets; | |
uint64_t hv_bitmap = 1ull << (newent & 63); | |
h->same_hash[hv2] ^= (1 << csh); /* Clear old entry */ | |
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */ | |
h->keys[newent] = h->keys[srcent]; | |
h->values[newent] = h->values[srcent]; | |
h->exists[newent / 64] |= hv_bitmap; | |
h->same_hash[srcent] = i << 1; | |
h->same_hash[newent] = empoff << 1; | |
h->same_hash[hv] |= (1 << i); | |
h->keys[srcent] = key; | |
crash2++; | |
return srcent; | |
} | |
} | |
} | |
} | |
if (h->prev_hv[hv] < 0) { | |
int nhv; | |
nhv = -h->prev_hv[hv]; | |
h->prev_hv[hv] = nhv; | |
hv = nhv; | |
} | |
else { | |
hv = h->prev_hv[hv]; | |
} | |
para++; | |
goto retry0; | |
/* Oh! fill my entry. Must still from other entry */ | |
abort(); | |
} | |
else { | |
/* using entry for other hash value. */ | |
int eoff = h->same_hash[hv] >> 1; | |
int hv2 = (hv - eoff) % h->n_buckets; | |
if (hv2 < 0) { | |
hv2 += h->n_buckets; | |
} | |
int ohv2 = hv2; | |
retry1: | |
uint64_t alexits = ror(h->exists, hv2, h->n_buckets) >> 32; | |
int empoff = __builtin_ctzll(alexits + 1); | |
if (empoff < same_hash_size) { | |
int newent = (empoff + hv2) % h->n_buckets; | |
uint64_t hv_bitmap = 1ull << (newent & 63); | |
h->same_hash[ohv2] ^= (1 << eoff); /* Clear old entry */ | |
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */ | |
h->keys[newent] = h->keys[hv]; | |
h->values[newent] = h->values[hv]; | |
h->same_hash[newent] = empoff << 1; | |
h->exists[newent / 64] |= hv_bitmap; | |
h->same_hash[hv] = 1; | |
h->keys[hv] = key; | |
h->prev_hv[hv] = -h->current_prev_hv; | |
h->current_prev_hv = hv; | |
move++; | |
return hv; | |
} | |
if (h->prev_hv[hv2] < 0) { | |
int nhv2; | |
nhv2 = -h->prev_hv[hv2]; | |
h->prev_hv[hv2] = nhv2; | |
hv2 = nhv2; | |
} | |
else { | |
hv2 = h->prev_hv[hv2]; | |
} | |
mpara++; | |
goto retry1; | |
abort(); | |
} | |
return h->n_buckets; | |
} | |
int mh_get(mhash *h, mh_key_t key) { | |
int hv = hash_func(key) % h->n_buckets; | |
uint16_t hbit = h->same_hash[hv]; | |
if (hbit & 1) { | |
if (h->keys[hv] == key) { | |
return hv; | |
} | |
hbit ^= 1; | |
for (int i = __builtin_ctz(hbit); i > 0 && i < 16;i = __builtin_ctz(hbit)) { | |
int ent = (i + hv) % h->n_buckets; | |
if (h->keys[ent] == key) { | |
return ent; | |
} | |
hbit ^= (1 << i); | |
} | |
} | |
while (h->prev_hv[hv] >= 0) { | |
hv = h->prev_hv[hv]; | |
hbit = h->same_hash[hv]; | |
hbit ^= 1; | |
for (int i = __builtin_ctz(hbit); i > 0 && i < 16;i = __builtin_ctz(hbit)) { | |
int ent = (i + hv) % h->n_buckets; | |
if (h->keys[ent] == key) { | |
return ent; | |
} | |
hbit ^= (1 << i); | |
} | |
} | |
return -1; | |
} | |
int main(){ | |
const int ndata = 65536; | |
mhash *h = mh_init(ndata); | |
for (int i = 0; i < ndata - 1000; i++) { | |
int ent = mh_put(h, i*i+i*2); | |
//printf("%d %d %d\n", i, ent, hash_func(i*i+i*2) % h->n_buckets); | |
h->values[ent] = i; | |
if ((i % 1000) == 0) { | |
//printf("%d %d %d %d %d %d %d \n", i, pure, crash, crash2, move, para, mpara); | |
} | |
} | |
// printf("fin\n"); | |
for (int i = 0; i < ndata - 1000; i++) { | |
int ent = mh_get(h, i*i+i*2); | |
//printf("%d %d %d \n", i, h->values[ent], ent); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment