Created
September 30, 2023 08:22
-
-
Save miura1729/637139f119ace29bbf39a87cd55fcac4 to your computer and use it in GitHub Desktop.
Can store 8192 datas in 8192 buckets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <string.h> | |
typedef int mh_key_t; | |
typedef int mh_value_t; | |
typedef struct mhash { | |
uint64_t *exists; | |
mh_key_t *keys; | |
mh_value_t *values; | |
uint16_t *same_hash; | |
int n_buckets; | |
} mhash; | |
const int same_hash_size = 16; | |
uint64_t ror(uint64_t *base, int sn, int basesize) | |
{ | |
if (sn < 0) sn += basesize; | |
int snf = sn + 31; | |
if (snf > basesize) snf -= basesize; | |
int snfpos = snf / 64; | |
int snfoff = snf & 63; | |
uint64_t snfbit = (1ull << (snfoff + 1)) - 1; | |
if (snfoff == 63) { | |
snfbit = -1; | |
} | |
int snb = sn - 32; | |
if (snb < 0) snb += basesize; | |
int snbpos = snb / 64; | |
int snboff = snb & 63; | |
uint64_t snbbit = (1ull << (64 - snboff + 1)) - 1;; | |
if (snboff == 1) { | |
snbbit = -1; | |
} | |
if (snfpos == snbpos) { | |
/* Same bitmap */ | |
uint64_t bitmap = base[snfpos]; | |
return bitmap; | |
} | |
else { | |
uint64_t fbit = base[snfpos]; | |
uint64_t bbit = base[snbpos]; | |
fbit = (fbit & snfbit) << (64 - snboff); | |
bbit = (bbit >> snboff) & snbbit; | |
return fbit | bbit; | |
} | |
} | |
mhash *mh_init(int size) { | |
mhash *h = malloc(sizeof(mhash)); | |
h->n_buckets = size; | |
h->exists = calloc((size + 63) / 64, sizeof(uint64_t)); | |
h->keys = calloc(size, sizeof(mh_key_t)); | |
h->values = calloc(size, sizeof(mh_key_t)); | |
h->same_hash = calloc(size, sizeof(uint16_t)); | |
return h; | |
} | |
#define hash_func(hash) (unsigned int)((key)^((key)<<2)^((key)>>2)) | |
int mh_put(mhash *h, mh_key_t key) { | |
int hv = hash_func(key) % h->n_buckets; | |
uint64_t hv_bitmap = (1ull << (hv & 63)); | |
if ((h->exists[hv / 64] & hv_bitmap) == 0) { | |
/* no confilct hash value and entry is fresh */ | |
h->exists[hv / 64] |= hv_bitmap; | |
h->keys[hv] = key; | |
h->same_hash[hv] = 1; | |
return hv; | |
} | |
else if (h->same_hash[hv] & 1) { | |
/* useing entry for same hash value */ | |
uint64_t albitmap = ror(h->exists, hv, h->n_buckets); | |
uint64_t alorigin = albitmap >> 16; | |
uint64_t alexits = albitmap >> 32; | |
int empoff; | |
empoff = 31 - __builtin_clz((~(uint32_t)alexits) & ((1 << same_hash_size) - 1)); | |
if (empoff > 0 && empoff < same_hash_size) { | |
int newent = (empoff + hv) % h->n_buckets; | |
h->same_hash[hv] |= (1 << empoff); | |
h->exists[newent / 64] |= (1ull << (newent & 63)); | |
h->same_hash[newent] = (empoff << 1); | |
h->keys[newent] = key; | |
return newent; | |
} | |
else { | |
for (int i = 1; i < same_hash_size; i++) { | |
int srcent = (hv + i) % h->n_buckets; | |
uint16_t csh = h->same_hash[srcent]; | |
if ((csh & 1) == 0 && csh != 0) { | |
csh >>= 1; | |
int hv2 = (srcent - csh) % h->n_buckets; | |
if (hv2 < 0) { | |
hv2 += h->n_buckets; | |
} | |
uint64_t alexits2 = ror(h->exists, hv2, h->n_buckets) >> 32; | |
int empoff = __builtin_ctzll(alexits2 + 1); | |
if (empoff < same_hash_size) { | |
int newent = (empoff + hv2) % h->n_buckets; | |
uint64_t hv_bitmap = 1ull << (newent & 63); | |
h->same_hash[hv2] ^= (1 << csh); /* Clear old entry */ | |
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */ | |
h->keys[newent] = h->keys[srcent]; | |
h->values[newent] = h->values[srcent]; | |
h->exists[newent / 64] |= hv_bitmap; | |
h->same_hash[srcent] = i << 1; | |
h->same_hash[hv] |= (1 << i); | |
h->keys[srcent] = key; | |
return srcent; | |
} | |
} | |
} | |
} | |
/* Oh! fill my entry. Must still from other entry */ | |
abort(); | |
} | |
else { | |
/* using entry for other hash value. */ | |
int eoff = h->same_hash[hv] >> 1; | |
int hv2 = (hv - eoff) % h->n_buckets; | |
if (hv2 < 0) { | |
hv2 += h->n_buckets; | |
} | |
uint64_t alexits = ror(h->exists, hv2, h->n_buckets) >> 32; | |
int empoff = __builtin_ctzll(alexits + 1); | |
if (empoff < same_hash_size) { | |
int newent = (empoff + hv2) % h->n_buckets; | |
uint64_t hv_bitmap = 1ull << (newent & 63); | |
h->same_hash[hv2] ^= (1 << eoff); /* Clear old entry */ | |
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */ | |
h->keys[newent] = h->keys[hv]; | |
h->values[newent] = h->values[hv]; | |
h->same_hash[newent] = empoff << 1; | |
h->exists[newent / 64] |= hv_bitmap; | |
h->same_hash[hv] = 1; | |
h->keys[hv] = key; | |
return hv; | |
} | |
abort(); | |
} | |
return h->n_buckets; | |
} | |
int mh_get(mhash *h, mh_key_t key) { | |
int hv = hash_func(key) % h->n_buckets; | |
uint16_t hbit = h->same_hash[hv]; | |
if (hbit & 1) { | |
if (h->keys[hv] == key) { | |
return hv; | |
} | |
hbit ^= 1; | |
for (int i = __builtin_ctz(hbit); i > 0 && i < 16;i = __builtin_ctz(hbit)) { | |
int ent = (i + hv) % h->n_buckets; | |
if (h->keys[ent] == key) { | |
return ent; | |
} | |
hbit ^= (1 << i); | |
} | |
} | |
return -1; | |
} | |
main(){ | |
const ndata = 8192; | |
mhash *h = mh_init(ndata); | |
for (int i = 0; i < ndata; i++) { | |
int ent = mh_put(h, i); | |
printf("%d %d \n", i, ent); | |
h->values[ent] = i + 1; | |
} | |
printf("fin\n"); | |
for (int i = 0; i < ndata; i++) { | |
int ent = mh_get(h, i); | |
printf("%d %d \n", h->values[ent], ent); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment