Created
April 30, 2019 00:37
-
-
Save kdrnic/4043f4a91783e5b2257a5ec986f62215 to your computer and use it in GitHub Desktop.
hashtable implementation in C
uses open addressing, linear probing
also includes a FNV1/DJB2 hashing comparison
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <time.h> | |
#include <assert.h> | |
#include <string.h> | |
#include <limits.h> | |
#include <inttypes.h> | |
uint32_t fnv1(const char *str) | |
{ | |
#define FNV_PRIME_32 16777619 | |
#define FNV_OFFSET_32 2166136261U | |
uint32_t hash = FNV_OFFSET_32; | |
int c; | |
while((c = *str++)){ | |
hash ^= c; | |
hash *= FNV_PRIME_32; | |
} | |
return hash; | |
#undef FNV_OFFSET_32 | |
#undef FNV_PRIME_32 | |
} | |
uint32_t djb2(const char *str) | |
{ | |
uint32_t hash = 5381; | |
int c; | |
while((c = *str++)) hash = ((hash << 5) + hash) + c; | |
return hash; | |
} | |
typedef long long int modpow_t; | |
//Returns (b^e)%m | |
modpow_t modular_pow(modpow_t base, modpow_t exponent, modpow_t modulus) | |
{ | |
modpow_t result = 1; | |
if(modulus == 1) return 0; | |
//assert(UINT_MAX / (modulus - 1) > (modulus - 1)); | |
base %= modulus; | |
while(exponent > 0){ | |
if(exponent & 1){ | |
result = (result * base) % modulus; | |
} | |
exponent >>= 1; | |
base = (base * base) % modulus; | |
} | |
return result; | |
} | |
//Primality test | |
//Accurate at least up to 2 billion | |
int miller_rabin_test(int n) | |
{ | |
const int bases[] = {2, 7, 61}; | |
int r = 0, d = n - 1, i, j; | |
assert(n < 2000000000); | |
if(n == 2 || n == 3) return 1; | |
if((n & 1) == 0 || n < 2) return 0; | |
while(!(d & 1)){ | |
d >>= 1; | |
r++; | |
} | |
for(j = 0; j < sizeof(bases) / sizeof(bases[0]); j++){ | |
int a = bases[j]; | |
int x = modular_pow(a, d, n); | |
if(x == 1 || x == n - 1) continue; | |
for(i = 1; i < r; i++){ | |
x = modular_pow(x, 2, n); | |
if(x == n - 1) break; | |
} | |
if(i == r) return 0; | |
} | |
return 1; | |
} | |
//Returns a prime greater than | |
int get_prime_gt(int a) | |
{ | |
while(!miller_rabin_test(a)){ | |
a++; | |
} | |
return a; | |
} | |
typedef struct hashtable | |
{ | |
struct bucket | |
{ | |
//Storing the hash allows for fast reinsertion | |
uint32_t hash; | |
//Currently stored as pointers to individual strings | |
//If pointing to the very table buffer, means a previously deleted bucket | |
char *key; | |
void *val; | |
} *table; | |
//Hash func ptr | |
uint32_t (*hash)(const char *); | |
//Num of used buckets, total capacity | |
int len, siz; | |
} hashtable; | |
//Create a hash table | |
hashtable *ht_create(int cap) | |
{ | |
hashtable *h = malloc(sizeof(*h)); | |
h->len = 0; | |
h->siz = get_prime_gt(cap); | |
h->table = malloc(sizeof(*h->table) * h->siz); | |
memset(h->table, 0, sizeof(*h->table) * h->siz); | |
return h; | |
} | |
//Destroys a hash table | |
void ht_destroy(hashtable *h) | |
{ | |
int i; | |
for(i = 0; i < h->siz; i++){ | |
if(h->table[i].key && h->table[i].key != (char *) h->table){ | |
free(h->table[i].key); | |
} | |
} | |
free(h->table); | |
free(h); | |
} | |
//Searches for key, returns value or 0 if not found | |
void *ht_search(const hashtable *h, const char *k) | |
{ | |
int i; | |
const int i0 = h->hash(k) % h->siz; | |
i = i0; | |
do { | |
if(!h->table[i].key) break; | |
if(h->table[i].key != (char *) h->table){ | |
if(!strcmp(h->table[i].key, k)) return h->table[i].val; | |
} | |
i = (i + 1) % h->siz; | |
} while(i != i0); | |
return 0; | |
} | |
//Grows the hashtable to new capacity | |
void ht_resize(hashtable *h, int new_cap) | |
{ | |
int j; | |
hashtable *newh = ht_create(new_cap); | |
for(j = 0; j < h->siz; j++){ | |
int i; | |
const int i0 = h->table[j].hash % newh->siz; | |
if(!h->table[j].key) continue; | |
if(h->table[j].key == (char *) h->table) continue; | |
i = i0; | |
do { | |
if(!newh->table[i].key) break; | |
i = (i + 1) % newh->siz; | |
} while(i != i0); | |
newh->table[i] = h->table[j]; | |
} | |
free(h->table); | |
h->table = newh->table; | |
h->siz = newh->siz; | |
free(newh); | |
} | |
//Insert key-val pair, grows table if necessary | |
void ht_insert(hashtable *h, const char *k, void *v) | |
{ | |
int i; | |
uint32_t hash = h->hash(k); | |
const int i0 = hash % h->siz; | |
i = i0; | |
do { | |
if(!h->table[i].key) break; | |
if(h->table[i].key == (char *) h->table) break; | |
if(!strcmp(h->table[i].key, k)){ | |
h->table[i].val = v; | |
return; | |
} | |
i = (i + 1) % h->siz; | |
} while(i != i0); | |
if(h->table[i].key && h->table[i].key != (char *) h->table){ | |
ht_resize(h, h->siz * 2); | |
ht_insert(h, k, v); | |
return; | |
} | |
h->table[i].key = strdup(k); | |
h->table[i].val = v; | |
h->table[i].hash = hash; | |
h->len++; | |
//Relatively arbitrary limits | |
if(h->len * 100 > h->siz * 70){ | |
ht_resize(h, h->siz * 2); | |
} | |
else if(h->len * 100 < h->siz * 10 && h->siz > 53){ | |
ht_resize(h, h->siz / 2); | |
} | |
} | |
//Deletes a key | |
void ht_delete(hashtable *h, const char *k) | |
{ | |
int i; | |
const int i0 = h->hash(k) % h->siz; | |
i = i0; | |
do { | |
if(!h->table[i].key) return; | |
if(h->table[i].key != (char *) h->table){ | |
if(!strcmp(h->table[i].key, k)){ | |
free(h->table[i].key); | |
h->table[i].key = (char *) h->table; | |
h->len--; | |
return; | |
} | |
} | |
i = (i + 1) % h->siz; | |
} while(i != i0); | |
} | |
//105249 filenames | |
//find > list.h, then ^ -> " and $ -> ", plus const char *strings={, }; | |
#include "list.h" | |
/* | |
Output: | |
0 done... | |
100 done... | |
200 done... | |
300 done... | |
400 done... | |
500 done... | |
600 done... | |
700 done... | |
800 done... | |
900 done... | |
Went through 11073732 strings | |
num_strings: 105249 | |
Collisions: | |
DJB2 0 | |
FNV1 3 | |
*/ | |
/* | |
gprof information (collision counting omitted): | |
Each sample counts as 0.01 seconds. | |
% cumulative self self total | |
time seconds seconds calls us/call us/call name | |
29.01 3.01 3.01 44236788 0.07 0.07 fnv1 | |
26.12 5.72 2.71 main | |
25.83 8.40 2.68 44236788 0.06 0.06 djb2 | |
10.22 9.46 1.06 22138394 0.05 0.11 ht_insert | |
3.95 9.87 0.41 36842 11.13 11.39 ht_resize | |
3.57 10.24 0.37 _mcount_private | |
0.29 10.27 0.03 __moddi3 | |
0.29 10.30 0.03 free | |
0.29 10.33 0.03 strcmp | |
0.19 10.35 0.02 strdup | |
0.14 10.37 0.01 _fentry__ | |
0.10 10.38 0.01 246150 0.04 0.04 miller_rabin_test | |
*/ | |
int main() | |
{ | |
char tmp[256]; | |
int i, j, k; | |
int tot = 0; | |
const int num_strings = sizeof(strings) / sizeof(strings[0]); | |
srand(time(0)); | |
for(i = 0; i < 1000; i++){ | |
const int a = rand() % num_strings, b = rand() % num_strings; | |
const int *min = (a < b) ? &a : &b, *max = (a > b) ? &a : &b; | |
hashtable *h1, *h2; | |
if(a == b) continue; | |
h1 = ht_create(rand() % (num_strings / 1234)); | |
h2 = ht_create(rand() % (num_strings / 1234)); | |
h1->hash = djb2; | |
h2->hash = fnv1; | |
for(j = *min, k = 0; j < *max; j++, k++){ | |
ht_insert(h1, strings[j], (void *) strings[j]); | |
ht_insert(h2, strings[j], (void *) k); | |
} | |
assert(h1->len == *max - *min); | |
assert(h1->len == h2->len); | |
for(j = *min, k = 0; j < *max; j++, k++){ | |
assert(ht_search(h1, strings[j]) == strings[j]); | |
assert((int) ht_search(h2, strings[j]) == k); | |
} | |
for(j = *max - 1; j >= *min; j--){ | |
ht_delete(h1, strings[j]); | |
ht_delete(h2, strings[j]); | |
assert(!ht_search(h1, strings[j])); | |
assert(!ht_search(h2, strings[j])); | |
assert(h1->len == j - *min); | |
assert(h2->len == j - *min); | |
} | |
for(j = 0; j < 20; j++){ | |
k = rand() % num_strings; | |
ht_insert(h1, strings[k], (void *) strings[k]); | |
ht_insert(h2, strings[k], (void *) k); | |
assert(ht_search(h1, strings[k]) == strings[k]); | |
assert((int) ht_search(h2, strings[k]) == k); | |
} | |
ht_destroy(h1); | |
ht_destroy(h2); | |
if(i % 100 == 0) printf("%d done...\n", i); | |
tot += *max - *min; | |
} | |
printf("Went through %d strings\nnum_strings: %d\n", tot, num_strings); | |
if(1){ | |
int dcoll = 0, fcoll = 0; | |
hashtable *h1, *h2; | |
h1 = ht_create((num_strings * 3) / 2); | |
h2 = ht_create((num_strings * 3) / 2); | |
h1->hash = djb2; | |
h2->hash = fnv1; | |
for(i = 0; i < num_strings; i++){ | |
sprintf(tmp, "%"PRIu32, djb2(strings[i])); | |
if(ht_search(h1, tmp)) dcoll++; | |
else ht_insert(h1, tmp, (void *) 1); | |
sprintf(tmp, "%"PRIu32, fnv1(strings[i])); | |
if(ht_search(h2, tmp)) fcoll++; | |
else ht_insert(h2, tmp, (void *) 1); | |
} | |
printf("Collisions:\nDJB2\t\t%d\nFNV1\t\t%d\n", dcoll, fcoll); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment