|
// Like in the c++ version, we |
|
// have a regular hash table |
|
// with a custom hash function |
|
|
|
#include <string.h> |
|
#include <stdlib.h> |
|
#include <stdio.h> |
|
|
|
struct fht_node { |
|
char* key; |
|
void* data; |
|
struct fht_node * next; |
|
}; |
|
|
|
typedef struct fht { |
|
struct fht_node ** nodes; |
|
size_t size; |
|
} FHT; |
|
|
|
|
|
FHT* fht_create() { |
|
FHT *ht = malloc(sizeof(struct fht)); |
|
ht->size = 1 << (sizeof(char)*8); |
|
ht->nodes = calloc(ht->size, sizeof(struct fht_node)); |
|
return ht; |
|
} |
|
|
|
fht_put(FHT* hashtbl, char* key, void* data) { |
|
struct fht_node *node = hashtbl->nodes[key[0]]; |
|
|
|
while(node) { |
|
if(!strcmp(node->key, key)) { |
|
node->data=data; |
|
return 0; |
|
} |
|
node=node->next; |
|
} |
|
|
|
node=malloc(sizeof(struct fht_node)); |
|
node->key= strdup(key); |
|
node->data=data; |
|
node->next=hashtbl->nodes[key[0]]; |
|
hashtbl->nodes[key[0]]=node; |
|
} |
|
|
|
void* fht_get(FHT* hashtbl, char* key) { |
|
struct fht_node *node = hashtbl->nodes[key[0]]; |
|
while (node) { |
|
if (!strcmp(node->key, key)) return node->data; |
|
node = node->next; |
|
} |
|
return NULL; |
|
} |
|
|
|
int main() { |
|
|
|
char* text[19] = { |
|
"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", |
|
"restaurant", "near", "the", "lake", "of", "a", "new", "era" |
|
}; |
|
|
|
FHT *hashtbl = fht_create(); |
|
|
|
int textLen = 19; |
|
|
|
int times = 1000000; |
|
int k, *cnt; |
|
|
|
|
|
while (times--) |
|
for (k = 0; k < textLen; ++k) { |
|
cnt = fht_get(hashtbl, text[k]); |
|
if (!cnt) { |
|
cnt = malloc(sizeof(int)); |
|
*cnt = 1; |
|
fht_put(hashtbl, text[k], cnt); |
|
} else { *cnt += 1; } |
|
} |
|
|
|
|
|
for (k = 0; k < hashtbl->size; ++k) { |
|
struct fht_node *n = hashtbl->nodes[k]; |
|
while (n) { |
|
printf("%s %d\n", n->key, *((int *)n->data)); |
|
n = n->next; |
|
} |
|
} |
|
} |
The summary is that naive benchmarks give you wrong and unrealistic results. You would expect these to be strings, and you would expect the objects to be regular hash tables, but in reality scripting languages can switch data structures or hashing functions for these depending on the way the code is executed at runtime (i.e. JIT) so your test might not be representative of what would happen in a real program.
Still, its an interesting starter for a conversation on micro-benchmarks and JIT optimizations. I recommend mraleph's blog for a deep dive (mostly from a JS perspective), e.g. https://mrale.ph/blog/2014/02/23/the-black-cat-of-microbenchmarks.html