Skip to content

Instantly share code, notes, and snippets.

@kdrnic
Created April 30, 2019 00:37
Show Gist options
  • Save kdrnic/a9025341a3509c5eb8f0e9691c8e9df0 to your computer and use it in GitHub Desktop.
Save kdrnic/a9025341a3509c5eb8f0e9691c8e9df0 to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <assert.h>
#include <string.h>
#include <limits.h>
#include <inttypes.h>
uint32_t fnv1(const char *str)
{
#define FNV_PRIME_32 16777619
#define FNV_OFFSET_32 2166136261U
uint32_t hash = FNV_OFFSET_32;
int c;
while((c = *str++)){
hash ^= c;
hash *= FNV_PRIME_32;
}
return hash;
#undef FNV_OFFSET_32
#undef FNV_PRIME_32
}
uint32_t djb2(const char *str)
{
uint32_t hash = 5381;
int c;
while((c = *str++)) hash = ((hash << 5) + hash) + c;
return hash;
}
typedef long long int modpow_t;
//Returns (b^e)%m
modpow_t modular_pow(modpow_t base, modpow_t exponent, modpow_t modulus)
{
modpow_t result = 1;
if(modulus == 1) return 0;
//assert(UINT_MAX / (modulus - 1) > (modulus - 1));
base %= modulus;
while(exponent > 0){
if(exponent & 1){
result = (result * base) % modulus;
}
exponent >>= 1;
base = (base * base) % modulus;
}
return result;
}
//Primality test
//Accurate at least up to 2 billion
int miller_rabin_test(int n)
{
const int bases[] = {2, 7, 61};
int r = 0, d = n - 1, i, j;
assert(n < 2000000000);
if(n == 2 || n == 3) return 1;
if((n & 1) == 0 || n < 2) return 0;
while(!(d & 1)){
d >>= 1;
r++;
}
for(j = 0; j < sizeof(bases) / sizeof(bases[0]); j++){
int a = bases[j];
int x = modular_pow(a, d, n);
if(x == 1 || x == n - 1) continue;
for(i = 1; i < r; i++){
x = modular_pow(x, 2, n);
if(x == n - 1) break;
}
if(i == r) return 0;
}
return 1;
}
//Returns a prime greater than
int get_prime_gt(int a)
{
while(!miller_rabin_test(a)){
a++;
}
return a;
}
typedef struct hashtable
{
struct bucket
{
//Storing the hash allows for fast reinsertion
uint32_t hash;
//Currently stored as pointers to individual strings
//If pointing to the very table buffer, means a previously deleted bucket
char *key;
void *val;
} *table;
//Hash func ptr
uint32_t (*hash)(const char *);
//Num of used buckets, total capacity
int len, siz;
} hashtable;
//Create a hash table
hashtable *ht_create(int cap)
{
hashtable *h = malloc(sizeof(*h));
h->len = 0;
h->siz = get_prime_gt(cap);
h->table = malloc(sizeof(*h->table) * h->siz);
memset(h->table, 0, sizeof(*h->table) * h->siz);
return h;
}
//Destroys a hash table
void ht_destroy(hashtable *h)
{
int i;
for(i = 0; i < h->siz; i++){
if(h->table[i].key && h->table[i].key != (char *) h->table){
free(h->table[i].key);
}
}
free(h->table);
free(h);
}
//Searches for key, returns value or 0 if not found
void *ht_search(const hashtable *h, const char *k)
{
int i;
const int i0 = h->hash(k) % h->siz;
i = i0;
do {
if(!h->table[i].key) break;
if(h->table[i].key != (char *) h->table){
if(!strcmp(h->table[i].key, k)) return h->table[i].val;
}
i = (i + 1) % h->siz;
} while(i != i0);
return 0;
}
//Grows the hashtable to new capacity
void ht_resize(hashtable *h, int new_cap)
{
int j;
hashtable *newh = ht_create(new_cap);
for(j = 0; j < h->siz; j++){
int i;
const int i0 = h->table[j].hash % newh->siz;
if(!h->table[j].key) continue;
if(h->table[j].key == (char *) h->table) continue;
i = i0;
do {
if(!newh->table[i].key) break;
i = (i + 1) % newh->siz;
} while(i != i0);
newh->table[i] = h->table[j];
}
free(h->table);
h->table = newh->table;
h->siz = newh->siz;
free(newh);
}
//Insert key-val pair, grows table if necessary
void ht_insert(hashtable *h, const char *k, void *v)
{
int i;
uint32_t hash = h->hash(k);
const int i0 = hash % h->siz;
i = i0;
do {
if(!h->table[i].key) break;
if(h->table[i].key == (char *) h->table) break;
if(!strcmp(h->table[i].key, k)){
h->table[i].val = v;
return;
}
i = (i + 1) % h->siz;
} while(i != i0);
if(h->table[i].key && h->table[i].key != (char *) h->table){
ht_resize(h, h->siz * 2);
ht_insert(h, k, v);
return;
}
h->table[i].key = strdup(k);
h->table[i].val = v;
h->table[i].hash = hash;
h->len++;
//Relatively arbitrary limits
if(h->len * 100 > h->siz * 70){
ht_resize(h, h->siz * 2);
}
else if(h->len * 100 < h->siz * 10 && h->siz > 53){
ht_resize(h, h->siz / 2);
}
}
//Deletes a key
void ht_delete(hashtable *h, const char *k)
{
int i;
const int i0 = h->hash(k) % h->siz;
i = i0;
do {
if(!h->table[i].key) return;
if(h->table[i].key != (char *) h->table){
if(!strcmp(h->table[i].key, k)){
free(h->table[i].key);
h->table[i].key = (char *) h->table;
h->len--;
return;
}
}
i = (i + 1) % h->siz;
} while(i != i0);
}
//105249 filenames
//find > list.h, then ^ -> " and $ -> ", plus const char *strings={, };
#include "list.h"
/*
Output:
0 done...
100 done...
200 done...
300 done...
400 done...
500 done...
600 done...
700 done...
800 done...
900 done...
Went through 11073732 strings
num_strings: 105249
Collisions:
DJB2 0
FNV1 3
*/
/*
gprof information (collision counting omitted):
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls us/call us/call name
29.01 3.01 3.01 44236788 0.07 0.07 fnv1
26.12 5.72 2.71 main
25.83 8.40 2.68 44236788 0.06 0.06 djb2
10.22 9.46 1.06 22138394 0.05 0.11 ht_insert
3.95 9.87 0.41 36842 11.13 11.39 ht_resize
3.57 10.24 0.37 _mcount_private
0.29 10.27 0.03 __moddi3
0.29 10.30 0.03 free
0.29 10.33 0.03 strcmp
0.19 10.35 0.02 strdup
0.14 10.37 0.01 _fentry__
0.10 10.38 0.01 246150 0.04 0.04 miller_rabin_test
*/
int main()
{
char tmp[256];
int i, j, k;
int tot = 0;
const int num_strings = sizeof(strings) / sizeof(strings[0]);
srand(time(0));
for(i = 0; i < 1000; i++){
const int a = rand() % num_strings, b = rand() % num_strings;
const int *min = (a < b) ? &a : &b, *max = (a > b) ? &a : &b;
hashtable *h1, *h2;
if(a == b) continue;
h1 = ht_create(rand() % (num_strings / 1234));
h2 = ht_create(rand() % (num_strings / 1234));
h1->hash = djb2;
h2->hash = fnv1;
for(j = *min, k = 0; j < *max; j++, k++){
ht_insert(h1, strings[j], (void *) strings[j]);
ht_insert(h2, strings[j], (void *) k);
}
assert(h1->len == *max - *min);
assert(h1->len == h2->len);
for(j = *min, k = 0; j < *max; j++, k++){
assert(ht_search(h1, strings[j]) == strings[j]);
assert((int) ht_search(h2, strings[j]) == k);
}
for(j = *max - 1; j >= *min; j--){
ht_delete(h1, strings[j]);
ht_delete(h2, strings[j]);
assert(!ht_search(h1, strings[j]));
assert(!ht_search(h2, strings[j]));
assert(h1->len == j - *min);
assert(h2->len == j - *min);
}
for(j = 0; j < 20; j++){
k = rand() % num_strings;
ht_insert(h1, strings[k], (void *) strings[k]);
ht_insert(h2, strings[k], (void *) k);
assert(ht_search(h1, strings[k]) == strings[k]);
assert((int) ht_search(h2, strings[k]) == k);
}
ht_destroy(h1);
ht_destroy(h2);
if(i % 100 == 0) printf("%d done...\n", i);
tot += *max - *min;
}
printf("Went through %d strings\nnum_strings: %d\n", tot, num_strings);
if(1){
int dcoll = 0, fcoll = 0;
hashtable *h1, *h2;
h1 = ht_create((num_strings * 3) / 2);
h2 = ht_create((num_strings * 3) / 2);
h1->hash = djb2;
h2->hash = fnv1;
for(i = 0; i < num_strings; i++){
sprintf(tmp, "%"PRIu32, djb2(strings[i]));
if(ht_search(h1, tmp)) dcoll++;
else ht_insert(h1, tmp, (void *) 1);
sprintf(tmp, "%"PRIu32, fnv1(strings[i]));
if(ht_search(h2, tmp)) fcoll++;
else ht_insert(h2, tmp, (void *) 1);
}
printf("Collisions:\nDJB2\t\t%d\nFNV1\t\t%d\n", dcoll, fcoll);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment