Created
July 14, 2021 03:00
-
-
Save assyrianic/e22e29c62d2fac7a89fcacc94fb48af7 to your computer and use it in GitHub Desktop.
cache friendly, order-preserving hash table.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* ordered map for SourcePawn. | |
* Author: Kevin Yonan, (C) 2021. | |
* License: MIT | |
*/ | |
#ifndef ORDMAP_INCLUDED | |
# define ORDMAP_INCLUDED | |
#include <inttypes.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <vector> | |
#include <string> | |
#include <random> | |
#include <memory> | |
#include <variant> | |
static size_t bool_hash(const bool key, const size_t seed=0) { | |
size_t h = seed + 1; | |
if( key ) { | |
h = 1 + (h << 6) + (h << 16) - h; | |
} else { | |
h = (h << 6) + (h << 16) - h; | |
} | |
return h; | |
} | |
static size_t int_hash(const size_t key, const size_t seed=0) { | |
size_t h = seed; | |
for( size_t n=0; n < sizeof key * 8; n += 8 ) { | |
h = ((key >> n) & 0xFF) + (h << 6) + (h << 16) - h; | |
} | |
return h; | |
} | |
static size_t float_hash(const float key, const size_t seed=0) { | |
size_t n = 0; memcpy(&n, &key, sizeof key); | |
return int_hash(n, seed); | |
} | |
template< typename T > | |
static size_t array_hash(T key, const size_t len, const size_t seed=0) { | |
size_t h = seed; | |
for( size_t i=0; i < len; i++ ) { | |
h = ( size_t )(key[i]) + (h << 6) + (h << 16) - h; | |
} | |
return h; | |
} | |
typedef std::variant< | |
/// int types. | |
bool, size_t, int, intptr_t, | |
/// self explanatory. | |
float, | |
/// string types. | |
const char*, | |
const uint8_t*, | |
const bool*, | |
const int*, | |
const void* | |
> OrdMapHashable; | |
static size_t tg_hash(const OrdMapHashable &key, const size_t len, const size_t seed=0) { | |
switch( key.index() ) { | |
case 0: return bool_hash(std::get< bool >(key), seed); | |
case 1: return int_hash(std::get< size_t >(key), seed); | |
case 2: return int_hash(std::get< int >(key), seed); | |
case 3: return int_hash(std::get< intptr_t >(key), seed); | |
case 4: return float_hash(std::get< float >(key), seed); | |
case 5: return array_hash(std::get< const char* >(key), len, seed); | |
case 6: return array_hash(std::get< const uint8_t* >(key), len, seed); | |
case 7: return array_hash(std::get< const bool* >(key), len, seed); | |
case 8: return array_hash(std::get< const int* >(key), len, seed); | |
default: { | |
size_t h = seed; | |
const uint8_t *k = reinterpret_cast< const uint8_t* >(std::get< const void* >(key)); | |
for( size_t i=0; i < len; i++ ) { | |
h = ( size_t )(k[i]) + (h << 6) + (h << 16) - h; | |
} | |
return h; | |
} | |
} | |
} | |
template< typename T > | |
static std::unique_ptr< T[] > resize_unique_ptr(std::unique_ptr< T[] > &p, const size_t new_size, const size_t old_size) { | |
std::unique_ptr< T[] > new_ptr = std::make_unique< T[] >(new_size); | |
for( size_t i=0; i<old_size; i++ ) { | |
new_ptr[i] = std::move(p[i]); | |
} | |
return new_ptr; | |
} | |
struct PawnArray { | |
std::unique_ptr< cell_t[] > t; | |
size_t l; | |
PawnArray(const cell_t *arr, const size_t len) { | |
t = std::make_unique< cell_t[] >(len); | |
l = len; | |
for( size_t i=0; i<len; i++ ) { | |
t[i] = arr[i]; | |
} | |
} | |
}; | |
struct PawnStr { | |
std::unique_ptr< char[] > t; | |
size_t l; | |
PawnStr(const char *str, size_t len=0) { | |
if( len==0 ) { | |
len = strlen(str); | |
} | |
t = std::make_unique< char[] >(len + 1); | |
l = len + 1; | |
strcpy(t.get(), str); | |
} | |
}; | |
struct PawnFunc { | |
cell_t fnid; | |
PawnFunc(cell_t n) { fnid = n; } | |
}; | |
typedef std::vector< size_t* > OrdMapBucket; | |
template< typename Entry > | |
struct OrdMap { | |
std::unique_ptr< OrdMapBucket[] > buckets; | |
std::unique_ptr< Entry[] > datum; | |
std::unique_ptr< OrdMapHashable[] > keys; | |
std::unique_ptr< size_t[] > hashes, keylens; | |
size_t cap, len, seed; | |
OrdMap(const size_t init_size = 4) { | |
keys = std::make_unique< OrdMapHashable[] >(init_size); | |
datum = std::make_unique< Entry[] >(init_size); | |
keylens = std::make_unique< size_t[] >(init_size); | |
hashes = std::make_unique< size_t[] >(init_size); | |
buckets = std::make_unique< OrdMapBucket[] >(init_size); | |
cap = init_size; | |
len = 0; | |
std::random_device rd; std::mt19937 gen( rd() ); | |
std::uniform_int_distribution< size_t > distrib(0, SIZE_MAX); | |
seed = distrib(gen); | |
} | |
~OrdMap() { | |
for( size_t i=0; i<cap; i++ ) { | |
buckets[i].clear(); | |
} | |
len = seed = cap = 0; | |
} | |
bool HasKey(const OrdMapHashable &key, const size_t keylen) const { | |
return GetKeyIdx(key, keylen) < len; | |
} | |
size_t KeySet(const OrdMapHashable &key, const size_t keylen, Entry val) { | |
size_t index = GetKeyIdx(key, keylen); | |
if( index < len ) { | |
datum[index] = std::move(val); | |
return index; | |
} else if( len >= cap && !_rehash(cap << 1) ) { | |
return SIZE_MAX; | |
} | |
index = len; | |
keys[index] = key; | |
datum[index] = std::move(val); | |
hashes[index] = tg_hash(key, keylen, seed); | |
_insert_entry(index); | |
keylens[index] = keylen; | |
len++; | |
return index; | |
} | |
size_t GetKeyIdx(OrdMapHashable key, const size_t keylen) const { | |
const size_t hash = tg_hash(key, keylen, seed); | |
const size_t index = hash & (cap - 1); | |
for( auto n : buckets[index] ) { | |
const uintptr_t arr_index = (( uintptr_t )(n) - ( uintptr_t )(hashes.get())) / sizeof *n; | |
if( *n==hash && keylens[arr_index]==keylen && keys[arr_index]==key ) | |
return arr_index; | |
} | |
return SIZE_MAX; | |
} | |
Entry &KeyGet(const OrdMapHashable &key, const size_t keylen) const { | |
const size_t entry = GetKeyIdx(key, keylen); | |
return IdxGet(entry); | |
} | |
Entry &IdxGet(const size_t index) const { | |
return( index >= len ) ? datum[cap-1] : datum[index]; | |
} | |
bool IdxSet(const size_t index, Entry val) { | |
if( index >= len ) { | |
return false; | |
} | |
datum[index] = std::move(val); | |
return true; | |
} | |
bool KeyRm(const OrdMapHashable &key, const size_t keylen) { | |
return IdxRm(GetKeyIdx(key, keylen)); | |
} | |
bool IdxRm(const size_t n) { | |
if( n >= len ) | |
return false; | |
const size_t index = hashes[n] & (cap - 1); | |
std::vector< size_t* > &bucket = buckets[index]; | |
const size_t entry = OrdMap::_get_vec_idx(bucket, &hashes[n]); | |
if( entry==SIZE_MAX ) | |
return false; | |
bucket.erase(bucket.begin() + entry); | |
hashes[n] = keylens[n] = 0; | |
OrdMap::_array_shift_up< OrdMapHashable >(keys.get(), len, n); | |
OrdMap::_array_shift_up< Entry >(datum.get(), len, n); | |
OrdMap::_array_shift_up< size_t >(hashes.get(), len, n); | |
OrdMap::_array_shift_up< size_t >(keylens.get(), len, n); | |
len--; | |
return true; | |
} | |
void Clear() { | |
for( size_t i=0; i<len; i++ ) { | |
keylens[i] = 0; | |
hashes[i] = 0; | |
datum[i] = Entry(); | |
} | |
for( size_t i=0; i<cap; i++ ) { | |
buckets[i].clear(); | |
} | |
len = 0; | |
} | |
Entry& operator[](const size_t index) { | |
return( index >= len )? datum[cap-1] : datum[index]; | |
} | |
const Entry& operator[](const size_t index) const { | |
return( index >= len )? datum[cap-1] : datum[index]; | |
} | |
Entry& operator[](const char *key) { | |
const size_t len = strlen(key); | |
size_t index = GetKeyIdx(key, len + 1); | |
if( index==SIZE_MAX ) { | |
index = KeySet(key, len + 1, Entry()); | |
} | |
return datum[index]; | |
} | |
const Entry& operator[](const char *key) const { | |
const size_t len = strlen(key); | |
size_t index = GetKeyIdx(key, len + 1); | |
if( index==SIZE_MAX ) { | |
return datum[cap - 1]; /// valid reference but invalid data! | |
} | |
return datum[index]; | |
} | |
private: | |
void _insert_entry(const size_t n) { | |
buckets[hashes[n] & (cap - 1)].push_back(&hashes[n]); | |
} | |
void _resize_vecs(const size_t new_size) { | |
keys = resize_unique_ptr< OrdMapHashable >(keys, new_size, cap); | |
keylens = resize_unique_ptr< size_t >(keylens, new_size, cap); | |
hashes = resize_unique_ptr< size_t >(hashes, new_size, cap); | |
datum = resize_unique_ptr< Entry >(datum, new_size, cap); | |
} | |
bool _rehash(const size_t new_size) { | |
const size_t old_cap = cap; | |
std::vector< size_t* > *curr = buckets.release(); | |
buckets = std::make_unique< OrdMapBucket[] >(new_size); | |
_resize_vecs(new_size); | |
cap = new_size; | |
for( size_t i=0; i<old_cap; i++ ) { | |
curr[i].clear(); | |
} | |
delete[] curr; curr = nullptr; | |
for( size_t i=0; i<len; i++ ) { | |
_insert_entry(i); | |
} | |
return true; | |
} | |
template< typename T > | |
static void _array_shift_up(T *const buf, size_t len, const size_t index) { | |
if( index >= len ) { | |
return; | |
} | |
for( size_t j=index, i=index+1; i<len; i++, j++ ) { | |
buf[j] = std::move(buf[i]); | |
} | |
len--; | |
buf[len] = T(); | |
} | |
static size_t _get_vec_idx(std::vector< size_t* > &bucket, const size_t *const p) { | |
for( size_t i=0; i<bucket.size(); i++ ) { | |
if( bucket[i]==p ) { | |
return i; | |
} | |
} | |
return SIZE_MAX; | |
} | |
}; | |
#endif /** ORDMAP_INCLUDED */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment