spion · February 18, 2025 05:02 · bearophile · Mar 2, 2014 · marcomagdy · Mar 3, 2014
diff --git a/a-warning.md b/a-warning.md
diff --git a/benchmark b/benchmark
 spion@missperfect:~/Projects/testhash$ gcc -O3 test.c -o testc
 spion@missperfect:~/Projects/testhash$ time ./testc
 The 1000000
 a 2000000
 at 1000000
 brown 1000000
 dog 1000000
 era 1000000
 fox 1000000
 jumped 1000000
 lake 1000000
 lazy 1000000
 new 1000000
 near 1000000
 of 1000000
 over 1000000
 quick 1000000
 restaurant 1000000
 the 2000000

 real	0m0.234s
 user	0m0.228s
 sys	0m0.004s




 spion@missperfect:~/Projects/testhash$ g++ -O3 -std=gnu++0x test.cpp -o test
 spion@missperfect:~/Projects/testhash$ time ./test
 the 2000000
 at 1000000
 a 2000000
 brown 1000000
 dog 1000000
 era 1000000
 fox 1000000
 jumped 1000000
 The 1000000
 lake 1000000
 lazy 1000000
 new 1000000
 near 1000000
 of 1000000
 over 1000000
 quick 1000000
 restaurant 1000000

 real	0m0.321s
 user	0m0.320s
 sys	0m0.000s



 spion@missperfect:~/Projects/testhash$ time node test.js 
 The 1000000
 quick 1000000
 brown 1000000
 fox 1000000
 jumped 1000000
 over 1000000
 the 2000000
 lazy 1000000
 dog 1000000
 at 1000000
 a 2000000
 restaurant 1000000
 near 1000000
 lake 1000000
 of 1000000
 new 1000000
 era 1000000

 real	0m0.317s
 user	0m0.304s
 sys	0m0.012s



 spion@missperfect:~/Projects/testhash$ time luajit test.lua 
 new 1000000
 lazy 1000000
 The 1000000
 era 1000000
 of 1000000
 at 1000000
 fox 1000000
 jumped 1000000
 quick 1000000
 brown 1000000
 lake 1000000
 over 1000000
 near 1000000
 the 2000000
 dog 1000000
 a 2000000
 restaurant 1000000

 real	0m0.161s
 user	0m0.160s
 sys	0m0.000s
diff --git a/test.c b/test.c
 // Like in the c++ version, we
 // have a regular hash table
 // with a custom hash function

 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>

 struct fht_node {
    char* key;
    void* data;
    struct fht_node * next;
 };

 typedef struct fht {
    struct fht_node ** nodes;
    size_t size;
 } FHT;


 FHT* fht_create() {
    FHT *ht = malloc(sizeof(struct fht));
    ht->size = 1 << (sizeof(char)*8);
    ht->nodes = calloc(ht->size, sizeof(struct fht_node));
    return ht;
 }

 fht_put(FHT* hashtbl, char* key, void* data) {
    struct fht_node *node = hashtbl->nodes[key[0]];

    while(node) {
        if(!strcmp(node->key, key)) {
            node->data=data;
            return 0;
        }
        node=node->next;
    }

    node=malloc(sizeof(struct fht_node));
    node->key= strdup(key);
    node->data=data;
    node->next=hashtbl->nodes[key[0]];
    hashtbl->nodes[key[0]]=node;
 }

 void* fht_get(FHT* hashtbl, char* key) {
    struct fht_node *node = hashtbl->nodes[key[0]];
    while (node) {
        if (!strcmp(node->key, key)) return node->data;
        node = node->next;
    }
    return NULL;
 }

 int main() {

    char* text[19] = {
        "The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", 
        "restaurant", "near", "the", "lake", "of", "a", "new", "era"
    };

    FHT *hashtbl = fht_create();

    int textLen = 19;

    int times = 1000000;
    int k, *cnt;


    while (times--) 
        for (k = 0; k < textLen; ++k) {
            cnt = fht_get(hashtbl, text[k]);
            if (!cnt) {
                cnt = malloc(sizeof(int)); 
                *cnt = 1; 
                fht_put(hashtbl, text[k], cnt);
            } else { *cnt += 1; }
        }
    

    for (k = 0; k < hashtbl->size; ++k) {
        struct fht_node *n = hashtbl->nodes[k];
        while (n) {
            printf("%s %d\n", n->key, *((int *)n->data));
            n = n->next;
        }
    }    
 }
diff --git a/test.cpp b/test.cpp
 #include <unordered_map>
 #include <string>
 #include <vector>
 #include <iostream>
 #include <string>

 using namespace std;

 // In C++ despite only accessing the hashtable
 // once per loop, we also need a custom hash function
 // otherwise the program will be 2 times slower.

 struct CustomHash {
    size_t operator() (const string& s) const { return (size_t)s[0]; }
 };

 int main() {

    vector<string> text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"};
    unordered_map<string, int, CustomHash> cnt;
    int times = 1000000;
    while (times--)
        for (auto it = text.begin(); it != text.end(); ++it)
            cnt[*it] += 1;

    for (auto it = cnt.begin(); it != cnt.end(); ++it) {
        cout << it->first << " " << it->second << endl;
    }
 }
diff --git a/test.js b/test.js
 var text = ["The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"],
    map = {};
    times = 1000001;

 while (--times) 
    for (var k = 0; k < text.length; ++k) {
        // Unlike luajit, if we put if (map[text[k]]) map[text[k]] += 1
        // in v8, this program will become 6 times slower. Instead we create a new object with a counter
        // and this way we'll only need to access the hashtable once per loop.
        var kth = map[text[k]];
        if (kth) kth.c += 1;
        else map[text[k]] = {c:1};
    }

 for (var key in map) {
    console.log(key, map[key].c);
 }
diff --git a/test.lua b/test.lua
 -- The lua program needs no optimizations.
 -- It just works. Fast.
 -- Note that luajit probably optimizes away
 -- all dictionary lookups :)

 local text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"}
 local map = {}
 local times = 1000000

 while times > 0 do  
    for i = 1, table.getn(text), 1 do
        if map[text[i]] then map[text[i]] = map[text[i]] + 1
        else map[text[i]] = 1 end
    end
    times = times - 1;
 end

 for key, value in pairs(map) do
    io.write(key, " ", value, "\n")
 end
	spion@missperfect:~/Projects/testhash$ gcc -O3 test.c -o testc
	spion@missperfect:~/Projects/testhash$ time ./testc
	The 1000000
	a 2000000
	at 1000000
	brown 1000000
	dog 1000000
	era 1000000
	fox 1000000
	jumped 1000000
	lake 1000000
	lazy 1000000
	new 1000000
	near 1000000
	of 1000000
	over 1000000
	quick 1000000
	restaurant 1000000
	the 2000000

	real 0m0.234s
	user 0m0.228s
	sys 0m0.004s




	spion@missperfect:~/Projects/testhash$ g++ -O3 -std=gnu++0x test.cpp -o test
	spion@missperfect:~/Projects/testhash$ time ./test
	the 2000000
	at 1000000
	a 2000000
	brown 1000000
	dog 1000000
	era 1000000
	fox 1000000
	jumped 1000000
	The 1000000
	lake 1000000
	lazy 1000000
	new 1000000
	near 1000000
	of 1000000
	over 1000000
	quick 1000000
	restaurant 1000000

	real 0m0.321s
	user 0m0.320s
	sys 0m0.000s



	spion@missperfect:~/Projects/testhash$ time node test.js
	The 1000000
	quick 1000000
	brown 1000000
	fox 1000000
	jumped 1000000
	over 1000000
	the 2000000
	lazy 1000000
	dog 1000000
	at 1000000
	a 2000000
	restaurant 1000000
	near 1000000
	lake 1000000
	of 1000000
	new 1000000
	era 1000000

	real 0m0.317s
	user 0m0.304s
	sys 0m0.012s



	spion@missperfect:~/Projects/testhash$ time luajit test.lua
	new 1000000
	lazy 1000000
	The 1000000
	era 1000000
	of 1000000
	at 1000000
	fox 1000000
	jumped 1000000
	quick 1000000
	brown 1000000
	lake 1000000
	over 1000000
	near 1000000
	the 2000000
	dog 1000000
	a 2000000
	restaurant 1000000

	real 0m0.161s
	user 0m0.160s
	sys 0m0.000s
	// Like in the c++ version, we
	// have a regular hash table
	// with a custom hash function

	#include <string.h>
	#include <stdlib.h>
	#include <stdio.h>

	struct fht_node {
	char* key;
	void* data;
	struct fht_node * next;
	};

	typedef struct fht {
	struct fht_node ** nodes;
	size_t size;
	} FHT;


	FHT* fht_create() {
	FHT *ht = malloc(sizeof(struct fht));
	ht->size = 1 << (sizeof(char)*8);
	ht->nodes = calloc(ht->size, sizeof(struct fht_node));
	return ht;
	}

	fht_put(FHT* hashtbl, char* key, void* data) {
	struct fht_node *node = hashtbl->nodes[key[0]];

	while(node) {
	if(!strcmp(node->key, key)) {
	node->data=data;
	return 0;
	}
	node=node->next;
	}

	node=malloc(sizeof(struct fht_node));
	node->key= strdup(key);
	node->data=data;
	node->next=hashtbl->nodes[key[0]];
	hashtbl->nodes[key[0]]=node;
	}

	void* fht_get(FHT* hashtbl, char* key) {
	struct fht_node *node = hashtbl->nodes[key[0]];
	while (node) {
	if (!strcmp(node->key, key)) return node->data;
	node = node->next;
	}
	return NULL;
	}

	int main() {

	char* text[19] = {
	"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a",
	"restaurant", "near", "the", "lake", "of", "a", "new", "era"
	};

	FHT *hashtbl = fht_create();

	int textLen = 19;

	int times = 1000000;
	int k, *cnt;


	while (times--)
	for (k = 0; k < textLen; ++k) {
	cnt = fht_get(hashtbl, text[k]);
	if (!cnt) {
	cnt = malloc(sizeof(int));
	*cnt = 1;
	fht_put(hashtbl, text[k], cnt);
	} else { *cnt += 1; }
	}


	for (k = 0; k < hashtbl->size; ++k) {
	struct fht_node *n = hashtbl->nodes[k];
	while (n) {
	printf("%s %d\n", n->key, ((int )n->data));
	n = n->next;
	}
	}
	}
	#include <unordered_map>
	#include <string>
	#include <vector>
	#include <iostream>
	#include <string>

	using namespace std;

	// In C++ despite only accessing the hashtable
	// once per loop, we also need a custom hash function
	// otherwise the program will be 2 times slower.

	struct CustomHash {
	size_t operator() (const string& s) const { return (size_t)s[0]; }
	};

	int main() {

	vector<string> text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"};
	unordered_map<string, int, CustomHash> cnt;
	int times = 1000000;
	while (times--)
	for (auto it = text.begin(); it != text.end(); ++it)
	cnt[*it] += 1;

	for (auto it = cnt.begin(); it != cnt.end(); ++it) {
	cout << it->first << " " << it->second << endl;
	}
	}
	var text = ["The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"],
	map = {};
	times = 1000001;

	while (--times)
	for (var k = 0; k < text.length; ++k) {
	// Unlike luajit, if we put if (map[text[k]]) map[text[k]] += 1
	// in v8, this program will become 6 times slower. Instead we create a new object with a counter
	// and this way we'll only need to access the hashtable once per loop.
	var kth = map[text[k]];
	if (kth) kth.c += 1;
	else map[text[k]] = {c:1};
	}

	for (var key in map) {
	console.log(key, map[key].c);
	}
	-- The lua program needs no optimizations.
	-- It just works. Fast.
	-- Note that luajit probably optimizes away
	-- all dictionary lookups :)

	local text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"}
	local map = {}
	local times = 1000000

	while times > 0 do
	for i = 1, table.getn(text), 1 do
	if map[text[i]] then map[text[i]] = map[text[i]] + 1
	else map[text[i]] = 1 end
	end
	times = times - 1;
	end

	for key, value in pairs(map) do
	io.write(key, " ", value, "\n")
	end