Last active
April 5, 2018 17:22
-
-
Save jsimmons/5382989 to your computer and use it in GitHub Desktop.
Quicky hash map in C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// We don't block multiple includes of this file since each inclusion should | |
// be for a different type. Be careful. | |
// | |
// A map using hopscotch hashing[1], an open-addressing scheme for resolving | |
// hash collisions. | |
// | |
// [1] Herlihy, Maurice and Shavit, Nir and Tzafrir, Moran (2008). | |
// "Hopscotch Hashing". | |
// DISC '08: Proceedings of the 22nd international symposium on | |
// Distributed Computing. Arcachon, France: Springer-Verlag. pp. 350--364. | |
// | |
// Hopscotch hashing defines a neighbourhood of HOP_RANGE size following the | |
// hashed bucket within which any value hashed to that bucket will be found. | |
// Each bucket stores a bitmap which describes which of the buckets in that | |
// bucket's neighbourhood contain values that hashed to its position. | |
// | |
// Lookup is a matter of finding the hashed bucket and iterating up to | |
// HOP_RANGE times checking the bitmap and if set checking the value. Deletion | |
// and exists checks work the same way. | |
// | |
// To insert we first scan from the hashed bucket up to ADD_RANGE entries in | |
// order to find an empty slot. If a slot is found then we check if it's within | |
// HOP_RANGE in which case we can simply do the insertion. Otherwise we have to | |
// find a bucket in another neighbourhood which we can swap with the hole in | |
// order to move the hole closer to the hashed slot. This iterates until we | |
// move the bucket into HOP_RANGE or a swap is impossible. | |
// | |
// Operations return false if they cannot be completed, there is no in-build | |
// resize (since we don't have anything to do with hash generation either) | |
// | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include "jsb_macros.h" | |
#ifndef JSB_MAP_HASH_TYPE | |
#define JSB_MAP_HASH_TYPE uint32_t | |
#endif | |
#ifndef JSB_MAP_KEY_TYPE | |
#define JSB_MAP_KEY_TYPE const char * | |
#endif | |
#ifndef JSB_MAP_VALUE_TYPE | |
#define JSB_MAP_VALUE_TYPE intptr_t | |
#endif | |
#ifndef JSB_MAP_KEY_CMP | |
#define JSB_MAP_KEY_CMP(X, Y) ((X) == (Y)) | |
#endif | |
#ifndef JSB_MAP_KEY_SENTINEL | |
#define JSB_MAP_KEY_SENTINEL 0 | |
#endif | |
#ifndef JSB_MAP_ALLOC | |
#define JSB_MAP_ALLOC(Bytes) malloc((Bytes)) | |
#endif | |
#ifndef JSB_MAP_RELEASE | |
#define JSB_MAP_RELEASE(Ptr) free(Ptr) | |
#endif | |
#ifndef JSB_MAP_NAME | |
#define JSB_MAP_NAME jsb_map | |
#endif | |
#define HOP_BITMAP_TYPE uint16_t | |
#define HOP_RANGE 16 | |
#define ADD_RANGE 70 | |
#define PASTER(X, Y) X ## _ ## Y | |
#define EVALUATOR(X, Y) PASTER(X, Y) | |
#define PREFIX(Name) EVALUATOR(JSB_MAP_NAME, Name) | |
struct PREFIX(bucket) | |
{ | |
HOP_BITMAP_TYPE hop_info; | |
JSB_MAP_KEY_TYPE key; | |
JSB_MAP_VALUE_TYPE value; | |
}; | |
struct JSB_MAP_NAME | |
{ | |
size_t num_buckets; | |
struct PREFIX(bucket) *buckets; | |
}; | |
void | |
PREFIX(init)(struct JSB_MAP_NAME *self, size_t num_buckets); | |
void | |
PREFIX(fina)(struct JSB_MAP_NAME *self); | |
bool | |
PREFIX(insert)(struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash, | |
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE value); | |
bool | |
PREFIX(lookup)(const struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash, | |
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE *out_value); | |
bool | |
PREFIX(remove)(struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash, | |
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE *out_value); | |
#ifdef JSB_DEFINE | |
void | |
PREFIX(init)(struct JSB_MAP_NAME *self, size_t num_buckets) | |
{ | |
self->num_buckets = num_buckets; | |
// We allocate an extra HOP_RANGE buckets to alleviate the issue where any | |
// keys hashed near the end of the bucket array would overly quickly run | |
// out of slots and potentially trigger a premature resize. Also simplifies | |
// the lookup and delete loops. | |
size_t buckets_size = sizeof(struct PREFIX(bucket)) * (num_buckets + HOP_RANGE); | |
self->buckets = JSB_MAP_ALLOC(buckets_size); | |
// Unfortunately we can't just go and zero the whole block of memory since | |
// we support a user-defined empty key sentinel. | |
for(size_t i = 0; i < num_buckets + HOP_RANGE; i++) | |
{ | |
self->buckets[i].hop_info = 0; | |
self->buckets[i].key = JSB_MAP_KEY_SENTINEL; | |
} | |
} | |
void | |
PREFIX(fina)(struct JSB_MAP_NAME *self) | |
{ | |
self->num_buckets = 0; | |
JSB_MAP_RELEASE(self->buckets); | |
} | |
static inline size_t | |
PREFIX(shift_free_bucket)(struct PREFIX(bucket) *free_bucket) | |
{ | |
for(int i = HOP_RANGE - 1; i > 0; i--) | |
{ | |
struct PREFIX(bucket) *neighbour = free_bucket - i; | |
for(int j = 0; j < i; j++) | |
{ | |
// If we're a member of neighbour it means we're a full bucket ripe | |
// for the swapping. | |
if(neighbour->hop_info & (1 << j)) | |
{ | |
// Don't touch the hop info for the buckets we're swapping, | |
// the only metadata we're interested in is that for the | |
// neighbour bucket. | |
// `new_free` slot is no longer valid for neighbour. | |
neighbour->hop_info &= ~(1 << j); | |
// `free_bucket` now contains a valid entry for neighbour. | |
neighbour->hop_info |= (1 << i); | |
struct PREFIX(bucket) *new_free = neighbour + j; | |
free_bucket->key = new_free->key; | |
free_bucket->value = new_free->value; | |
new_free->key = JSB_MAP_KEY_SENTINEL; | |
return i - j; | |
} | |
} | |
} | |
return 0; | |
} | |
bool | |
PREFIX(insert)(struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash, | |
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE value) | |
{ | |
size_t start_pos = hash % self->num_buckets; | |
struct PREFIX(bucket) *start_bucket = &self->buckets[start_pos]; | |
// Well, one can hope! | |
if(JSB_MAP_KEY_CMP(start_bucket->key, JSB_MAP_KEY_SENTINEL)) | |
{ | |
start_bucket->hop_info |= 1; | |
start_bucket->key = key; | |
start_bucket->value = value; | |
return true; | |
} | |
for(size_t i = 1; i < JSB_MIN(ADD_RANGE, self->num_buckets - start_pos + HOP_RANGE); i++) | |
{ | |
struct PREFIX(bucket) *bucket = &self->buckets[start_pos + i]; | |
if(JSB_MAP_KEY_CMP(bucket->key, JSB_MAP_KEY_SENTINEL)) | |
{ | |
for(;;) | |
{ | |
if(i < HOP_RANGE) | |
{ | |
start_bucket->hop_info |= (1 << i); | |
bucket->key = key; | |
bucket->value = value; | |
return true; | |
} | |
size_t shift_distance = PREFIX(shift_free_bucket)(bucket); | |
if(shift_distance) | |
{ | |
i -= shift_distance; | |
bucket = &self->buckets[start_pos + i]; | |
continue; | |
} | |
return false; | |
} | |
} | |
} | |
return false; | |
} | |
bool | |
PREFIX(lookup)(const struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash, | |
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE *out_value) | |
{ | |
size_t start_pos = hash % self->num_buckets; | |
struct PREFIX(bucket) *start_bucket = &self->buckets[start_pos]; | |
// fast path, nothing contained in this neighbourhood. | |
if(start_bucket->hop_info == 0) | |
{ | |
return false; | |
} | |
for(int i = 0; i < HOP_RANGE; i++) | |
{ | |
if(start_bucket->hop_info & (1 << i)) | |
{ | |
struct PREFIX(bucket) *bucket = &self->buckets[start_pos + i]; | |
if(JSB_MAP_KEY_CMP(bucket->key, key)) | |
{ | |
*out_value = bucket->value; | |
return true; | |
} | |
} | |
} | |
return false; | |
} | |
bool | |
PREFIX(remove)(struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash, | |
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE *out_value) | |
{ | |
size_t start_pos = hash % self->num_buckets; | |
struct PREFIX(bucket) *start_bucket = &self->buckets[start_pos]; | |
// fast path, nothing contained in this neighbourhood. | |
if(start_bucket->hop_info == 0) | |
{ | |
return false; | |
} | |
for(int i = 0; i < HOP_RANGE; i++) | |
{ | |
if(start_bucket->hop_info & (1 << i)) | |
{ | |
struct PREFIX(bucket) *bucket = &self->buckets[start_pos + i]; | |
if(JSB_MAP_KEY_CMP(bucket->key, key)) | |
{ | |
*out_value = bucket->value; | |
bucket->key = JSB_MAP_KEY_SENTINEL; | |
start_bucket->hop_info &= ~(1 << i); | |
return true; | |
} | |
} | |
} | |
return false; | |
} | |
#endif | |
#undef JSB_MAP_NAME | |
#undef JSB_MAP_HASH_TYPE | |
#undef JSB_MAP_KEY_TYPE | |
#undef JSB_MAP_VALUE_TYPE | |
#undef JSB_MAP_KEY_CMP | |
#undef JSB_MAP_ALLOC | |
#undef JSB_MAP_RELEASE | |
#undef HOP_RANGE | |
#undef ADD_RANGE | |
#undef PASTER | |
#undef EVALUATOR | |
#undef PREFIX |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment