Skip to content

Instantly share code, notes, and snippets.

@jsimmons
Last active April 5, 2018 17:22
Show Gist options
  • Save jsimmons/5382989 to your computer and use it in GitHub Desktop.
Save jsimmons/5382989 to your computer and use it in GitHub Desktop.
Quicky hash map in C
// We don't block multiple includes of this file since each inclusion should
// be for a different type. Be careful.
//
// A map using hopscotch hashing[1], an open-addressing scheme for resolving
// hash collisions.
//
// [1] Herlihy, Maurice and Shavit, Nir and Tzafrir, Moran (2008).
// "Hopscotch Hashing".
// DISC '08: Proceedings of the 22nd international symposium on
// Distributed Computing. Arcachon, France: Springer-Verlag. pp. 350--364.
//
// Hopscotch hashing defines a neighbourhood of HOP_RANGE size following the
// hashed bucket within which any value hashed to that bucket will be found.
// Each bucket stores a bitmap which describes which of the buckets in that
// bucket's neighbourhood contain values that hashed to its position.
//
// Lookup is a matter of finding the hashed bucket and iterating up to
// HOP_RANGE times checking the bitmap and if set checking the value. Deletion
// and exists checks work the same way.
//
// To insert we first scan from the hashed bucket up to ADD_RANGE entries in
// order to find an empty slot. If a slot is found then we check if it's within
// HOP_RANGE in which case we can simply do the insertion. Otherwise we have to
// find a bucket in another neighbourhood which we can swap with the hole in
// order to move the hole closer to the hashed slot. This iterates until we
// move the bucket into HOP_RANGE or a swap is impossible.
//
// Operations return false if they cannot be completed, there is no in-build
// resize (since we don't have anything to do with hash generation either)
//
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include "jsb_macros.h"
#ifndef JSB_MAP_HASH_TYPE
#define JSB_MAP_HASH_TYPE uint32_t
#endif
#ifndef JSB_MAP_KEY_TYPE
#define JSB_MAP_KEY_TYPE const char *
#endif
#ifndef JSB_MAP_VALUE_TYPE
#define JSB_MAP_VALUE_TYPE intptr_t
#endif
#ifndef JSB_MAP_KEY_CMP
#define JSB_MAP_KEY_CMP(X, Y) ((X) == (Y))
#endif
#ifndef JSB_MAP_KEY_SENTINEL
#define JSB_MAP_KEY_SENTINEL 0
#endif
#ifndef JSB_MAP_ALLOC
#define JSB_MAP_ALLOC(Bytes) malloc((Bytes))
#endif
#ifndef JSB_MAP_RELEASE
#define JSB_MAP_RELEASE(Ptr) free(Ptr)
#endif
#ifndef JSB_MAP_NAME
#define JSB_MAP_NAME jsb_map
#endif
#define HOP_BITMAP_TYPE uint16_t
#define HOP_RANGE 16
#define ADD_RANGE 70
#define PASTER(X, Y) X ## _ ## Y
#define EVALUATOR(X, Y) PASTER(X, Y)
#define PREFIX(Name) EVALUATOR(JSB_MAP_NAME, Name)
struct PREFIX(bucket)
{
HOP_BITMAP_TYPE hop_info;
JSB_MAP_KEY_TYPE key;
JSB_MAP_VALUE_TYPE value;
};
struct JSB_MAP_NAME
{
size_t num_buckets;
struct PREFIX(bucket) *buckets;
};
void
PREFIX(init)(struct JSB_MAP_NAME *self, size_t num_buckets);
void
PREFIX(fina)(struct JSB_MAP_NAME *self);
bool
PREFIX(insert)(struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash,
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE value);
bool
PREFIX(lookup)(const struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash,
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE *out_value);
bool
PREFIX(remove)(struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash,
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE *out_value);
#ifdef JSB_DEFINE
void
PREFIX(init)(struct JSB_MAP_NAME *self, size_t num_buckets)
{
self->num_buckets = num_buckets;
// We allocate an extra HOP_RANGE buckets to alleviate the issue where any
// keys hashed near the end of the bucket array would overly quickly run
// out of slots and potentially trigger a premature resize. Also simplifies
// the lookup and delete loops.
size_t buckets_size = sizeof(struct PREFIX(bucket)) * (num_buckets + HOP_RANGE);
self->buckets = JSB_MAP_ALLOC(buckets_size);
// Unfortunately we can't just go and zero the whole block of memory since
// we support a user-defined empty key sentinel.
for(size_t i = 0; i < num_buckets + HOP_RANGE; i++)
{
self->buckets[i].hop_info = 0;
self->buckets[i].key = JSB_MAP_KEY_SENTINEL;
}
}
void
PREFIX(fina)(struct JSB_MAP_NAME *self)
{
self->num_buckets = 0;
JSB_MAP_RELEASE(self->buckets);
}
static inline size_t
PREFIX(shift_free_bucket)(struct PREFIX(bucket) *free_bucket)
{
for(int i = HOP_RANGE - 1; i > 0; i--)
{
struct PREFIX(bucket) *neighbour = free_bucket - i;
for(int j = 0; j < i; j++)
{
// If we're a member of neighbour it means we're a full bucket ripe
// for the swapping.
if(neighbour->hop_info & (1 << j))
{
// Don't touch the hop info for the buckets we're swapping,
// the only metadata we're interested in is that for the
// neighbour bucket.
// `new_free` slot is no longer valid for neighbour.
neighbour->hop_info &= ~(1 << j);
// `free_bucket` now contains a valid entry for neighbour.
neighbour->hop_info |= (1 << i);
struct PREFIX(bucket) *new_free = neighbour + j;
free_bucket->key = new_free->key;
free_bucket->value = new_free->value;
new_free->key = JSB_MAP_KEY_SENTINEL;
return i - j;
}
}
}
return 0;
}
bool
PREFIX(insert)(struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash,
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE value)
{
size_t start_pos = hash % self->num_buckets;
struct PREFIX(bucket) *start_bucket = &self->buckets[start_pos];
// Well, one can hope!
if(JSB_MAP_KEY_CMP(start_bucket->key, JSB_MAP_KEY_SENTINEL))
{
start_bucket->hop_info |= 1;
start_bucket->key = key;
start_bucket->value = value;
return true;
}
for(size_t i = 1; i < JSB_MIN(ADD_RANGE, self->num_buckets - start_pos + HOP_RANGE); i++)
{
struct PREFIX(bucket) *bucket = &self->buckets[start_pos + i];
if(JSB_MAP_KEY_CMP(bucket->key, JSB_MAP_KEY_SENTINEL))
{
for(;;)
{
if(i < HOP_RANGE)
{
start_bucket->hop_info |= (1 << i);
bucket->key = key;
bucket->value = value;
return true;
}
size_t shift_distance = PREFIX(shift_free_bucket)(bucket);
if(shift_distance)
{
i -= shift_distance;
bucket = &self->buckets[start_pos + i];
continue;
}
return false;
}
}
}
return false;
}
bool
PREFIX(lookup)(const struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash,
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE *out_value)
{
size_t start_pos = hash % self->num_buckets;
struct PREFIX(bucket) *start_bucket = &self->buckets[start_pos];
// fast path, nothing contained in this neighbourhood.
if(start_bucket->hop_info == 0)
{
return false;
}
for(int i = 0; i < HOP_RANGE; i++)
{
if(start_bucket->hop_info & (1 << i))
{
struct PREFIX(bucket) *bucket = &self->buckets[start_pos + i];
if(JSB_MAP_KEY_CMP(bucket->key, key))
{
*out_value = bucket->value;
return true;
}
}
}
return false;
}
bool
PREFIX(remove)(struct JSB_MAP_NAME *self, JSB_MAP_HASH_TYPE hash,
JSB_MAP_KEY_TYPE key, JSB_MAP_VALUE_TYPE *out_value)
{
size_t start_pos = hash % self->num_buckets;
struct PREFIX(bucket) *start_bucket = &self->buckets[start_pos];
// fast path, nothing contained in this neighbourhood.
if(start_bucket->hop_info == 0)
{
return false;
}
for(int i = 0; i < HOP_RANGE; i++)
{
if(start_bucket->hop_info & (1 << i))
{
struct PREFIX(bucket) *bucket = &self->buckets[start_pos + i];
if(JSB_MAP_KEY_CMP(bucket->key, key))
{
*out_value = bucket->value;
bucket->key = JSB_MAP_KEY_SENTINEL;
start_bucket->hop_info &= ~(1 << i);
return true;
}
}
}
return false;
}
#endif
#undef JSB_MAP_NAME
#undef JSB_MAP_HASH_TYPE
#undef JSB_MAP_KEY_TYPE
#undef JSB_MAP_VALUE_TYPE
#undef JSB_MAP_KEY_CMP
#undef JSB_MAP_ALLOC
#undef JSB_MAP_RELEASE
#undef HOP_RANGE
#undef ADD_RANGE
#undef PASTER
#undef EVALUATOR
#undef PREFIX
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment