-
-
Save jh3141/768a9672dba9a8fc2524 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Benchmark updated at Sat Dec 6 23:05:32 UTC 2014 | |
--- time ./test-c | |
The 1000000 | |
a 2000000 | |
at 1000000 | |
brown 1000000 | |
dog 1000000 | |
era 1000000 | |
fox 1000000 | |
jumped 1000000 | |
lake 1000000 | |
lazy 1000000 | |
new 1000000 | |
near 1000000 | |
of 1000000 | |
over 1000000 | |
quick 1000000 | |
restaurant 1000000 | |
the 2000000 | |
0.16user 0.00system 0:00.33elapsed 50%CPU (0avgtext+0avgdata 532maxresident)k | |
0inputs+0outputs (0major+174minor)pagefaults 0swaps | |
--- time ./test-clang | |
The 1000000 | |
a 2000000 | |
at 1000000 | |
brown 1000000 | |
dog 1000000 | |
era 1000000 | |
fox 1000000 | |
jumped 1000000 | |
lake 1000000 | |
lazy 1000000 | |
new 1000000 | |
near 1000000 | |
of 1000000 | |
over 1000000 | |
quick 1000000 | |
restaurant 1000000 | |
the 2000000 | |
0.19user 0.00system 0:00.19elapsed 98%CPU (0avgtext+0avgdata 536maxresident)k | |
0inputs+0outputs (0major+174minor)pagefaults 0swaps | |
--- time ./test-g++ | |
The 1000000 | |
quick 1000000 | |
brown 1000000 | |
fox 1000000 | |
jumped 1000000 | |
over 1000000 | |
the 2000000 | |
lazy 1000000 | |
dog 1000000 | |
at 1000000 | |
a 2000000 | |
restaurant 1000000 | |
near 1000000 | |
lake 1000000 | |
of 1000000 | |
new 1000000 | |
era 1000000 | |
0.08user 0.00system 0:00.09elapsed 96%CPU (0avgtext+0avgdata 1208maxresident)k | |
0inputs+0outputs (0major+355minor)pagefaults 0swaps | |
--- time ./test-clang++ | |
The 1000000 | |
quick 1000000 | |
brown 1000000 | |
fox 1000000 | |
jumped 1000000 | |
over 1000000 | |
the 2000000 | |
lazy 1000000 | |
dog 1000000 | |
at 1000000 | |
a 2000000 | |
restaurant 1000000 | |
near 1000000 | |
lake 1000000 | |
of 1000000 | |
new 1000000 | |
era 1000000 | |
0.06user 0.00system 0:00.22elapsed 29%CPU (0avgtext+0avgdata 1204maxresident)k | |
0inputs+0outputs (0major+355minor)pagefaults 0swaps | |
--- time node test.js | |
The 1000000 | |
quick 1000000 | |
brown 1000000 | |
fox 1000000 | |
jumped 1000000 | |
over 1000000 | |
the 2000000 | |
lazy 1000000 | |
dog 1000000 | |
at 1000000 | |
a 2000000 | |
restaurant 1000000 | |
near 1000000 | |
lake 1000000 | |
of 1000000 | |
new 1000000 | |
era 1000000 | |
0.25user 0.01system 0:00.43elapsed 62%CPU (0avgtext+0avgdata 9956maxresident)k | |
0inputs+8outputs (0major+2798minor)pagefaults 0swaps | |
--- time luajit test.lua | |
new 1000000 | |
lazy 1000000 | |
The 1000000 | |
era 1000000 | |
of 1000000 | |
at 1000000 | |
fox 1000000 | |
jumped 1000000 | |
quick 1000000 | |
brown 1000000 | |
lake 1000000 | |
over 1000000 | |
near 1000000 | |
the 2000000 | |
dog 1000000 | |
a 2000000 | |
restaurant 1000000 | |
0.09user 0.00system 0:00.09elapsed 96%CPU (0avgtext+0avgdata 1128maxresident)k | |
0inputs+0outputs (0major+338minor)pagefaults 0swaps |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all: benchmark | |
benchmark: test-c test-clang test-g++ test-clang++ test.js test.lua | |
@echo "Benchmark updated at `date`" > benchmark | |
@echo >> benchmark | |
@echo '--- time ./test-c' >> benchmark | |
@time ./test-c >> benchmark 2>&1 | |
@echo >> benchmark | |
@echo '--- time ./test-clang' >> benchmark | |
@time ./test-clang >> benchmark 2>&1 | |
@echo >> benchmark | |
@echo '--- time ./test-g++' >> benchmark | |
@time ./test-g++ >> benchmark 2>&1 | |
@echo >> benchmark | |
@echo '--- time ./test-clang++' >> benchmark | |
@time ./test-clang++ >> benchmark 2>&1 | |
@echo >> benchmark | |
@echo '--- time node test.js' >> benchmark | |
@time node test.js >> benchmark 2>&1 | |
@echo >> benchmark | |
@echo '--- time luajit test.lua' >> benchmark | |
@time luajit test.lua >> benchmark 2>&1 | |
@cat benchmark | |
test-clang: test.c | |
clang -O3 -o test-clang test.c | |
test-c: test.c | |
gcc -O3 -o test-c test.c | |
test-g++: test.cpp table.hpp | |
g++ -std=gnu++0x -O3 -o test-g++ test.cpp | |
test-clang++: test.cpp table.hpp | |
clang++ -std=c++11 -O3 -o test-clang++ test.cpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef TABLE_HPP | |
#define TABLE_HPP | |
#include <utility> | |
#include <vector> | |
namespace jh3141 | |
{ | |
template<typename T1, typename T2> | |
class table | |
{ | |
public: | |
typedef typename std::vector<std::pair<T1,T2>>::iterator iterator; | |
private: | |
std::vector<std::pair<T1,T2>> data; | |
iterator cachedSearchStart; | |
public: | |
table () | |
{ | |
cachedSearchStart = data.end(); | |
} | |
T2& operator [] (const T1 & key) | |
{ | |
if (cachedSearchStart != data.end () && cachedSearchStart->first == key) | |
return (cachedSearchStart++)->second; | |
for (iterator i = data.begin (); i != data.end (); i ++) | |
if (i->first == key) { | |
cachedSearchStart = i + 1; | |
return i->second; | |
} | |
auto newItemPos = data.insert(data.end(), std::make_pair(key, T2())); | |
cachedSearchStart = data.begin(); | |
return newItemPos->second; | |
} | |
iterator begin() { return data.begin(); } | |
iterator end() { return data.end(); } | |
}; | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Like in the c++ version, we | |
// have a regular hash table | |
// with a custom hash function | |
#include <string.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
struct fht_node { | |
char* key; | |
void* data; | |
struct fht_node * next; | |
}; | |
typedef struct fht { | |
struct fht_node ** nodes; | |
size_t size; | |
} FHT; | |
FHT* fht_create() { | |
FHT *ht = malloc(sizeof(struct fht)); | |
ht->size = 1 << (sizeof(char)*8); | |
ht->nodes = calloc(ht->size, sizeof(struct fht_node)); | |
return ht; | |
} | |
fht_put(FHT* hashtbl, char* key, void* data) { | |
struct fht_node *node = hashtbl->nodes[key[0]]; | |
while(node) { | |
if(!strcmp(node->key, key)) { | |
node->data=data; | |
return 0; | |
} | |
node=node->next; | |
} | |
node=malloc(sizeof(struct fht_node)); | |
node->key= strdup(key); | |
node->data=data; | |
node->next=hashtbl->nodes[key[0]]; | |
hashtbl->nodes[key[0]]=node; | |
} | |
void* fht_get(FHT* hashtbl, char* key) { | |
struct fht_node *node = hashtbl->nodes[key[0]]; | |
while (node) { | |
if (!strcmp(node->key, key)) return node->data; | |
node = node->next; | |
} | |
return NULL; | |
} | |
int main() { | |
char* text[19] = { | |
"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", | |
"restaurant", "near", "the", "lake", "of", "a", "new", "era" | |
}; | |
FHT *hashtbl = fht_create(); | |
int textLen = 19; | |
int times = 1000000; | |
int k, *cnt; | |
while (times--) | |
for (k = 0; k < textLen; ++k) { | |
cnt = fht_get(hashtbl, text[k]); | |
if (!cnt) { | |
cnt = malloc(sizeof(int)); | |
*cnt = 1; | |
fht_put(hashtbl, text[k], cnt); | |
} else { *cnt += 1; } | |
} | |
for (k = 0; k < hashtbl->size; ++k) { | |
struct fht_node *n = hashtbl->nodes[k]; | |
while (n) { | |
printf("%s %d\n", n->key, *((int *)n->data)); | |
n = n->next; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <set> | |
#include <cstring> | |
#include <vector> | |
#include <iostream> | |
#include <string> | |
#include "table.hpp" | |
using namespace std; | |
using namespace jh3141; | |
// Use an array to hold the strings rather than vector, as it is a simpler and more easily-optimized object. | |
// We also use const char * rather than string to avoid the overhead of performing actual string comparisons, | |
// instead using a set of strings to ensure that all references to identical strings are using the same | |
// pointer before we begin the operation. This optimization is probably a key reason why node/luajit were | |
// originally much faster than the c++ version. | |
// We also iterate over the text array by index rather than using an iterator as the compiler can't unroll the | |
// loop when we use an iterator but can when we're doing it numerically (this is only a small win, but still | |
// worth doing). | |
// Not flushing the output after each end of line (using "\n" instead of std::endl) gains nothing in terms of | |
// best times, but appears to significantly reduce the variance of the test times, so is also worth doing. | |
// Finally, it became apparent that both v8 and luajit use a substantially simpler data structure for storing | |
// data in this kind of application, which is optimized for small numbers of items with rather regular access | |
// patterns. I developed a similar structure myself, based on a table of key/value pairs with a cached | |
// next-item-to-search pointer. Switching to this knocked off another 20% of the run time, finally bringing | |
// this implementation to faster than luajit. | |
struct cstr_lt { | |
bool operator() (const char * left, const char * right) { return strcmp(left, right) < 0; } | |
}; | |
int main() { | |
set<const char *, cstr_lt> string_intern; | |
array<const char *, 19> text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"}; | |
table<const char *, int> cnt; | |
int times = 1000000; | |
// start off by ensuring all duplicated strings are using the same reference | |
for (const char *& str : text) | |
{ | |
auto existing = string_intern.find(str); | |
if (existing != string_intern.end()) | |
{ | |
str = *existing; | |
} | |
else | |
{ | |
string_intern.insert(str); | |
} | |
} | |
// now update the counts in the map | |
while (times--) | |
for (int i = 0; i < text.size(); i++) | |
cnt[text[i]] += 1; | |
// and produce output | |
for (auto it = cnt.begin(); it != cnt.end(); ++it) { | |
cout << it->first << " " << it->second << "\n"; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var text = ["The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"], | |
map = {}; | |
times = 1000001; | |
while (--times) | |
for (var k = 0; k < text.length; ++k) { | |
// Unlike luajit, if we put if (map[text[k]]) map[text[k]] += 1 | |
// in v8, this program will become 6 times slower. Instead we create a new object with a counter | |
// and this way we'll only need to access the hashtable once per loop. | |
var kth = map[text[k]]; | |
if (kth) kth.c += 1; | |
else map[text[k]] = {c:1}; | |
} | |
for (var key in map) { | |
console.log(key, map[key].c); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- The lua program needs no optimizations. | |
-- It just works. Fast. | |
-- Note that luajit probably optimizes away | |
-- all dictionary lookups :) | |
local text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"} | |
local map = {} | |
local times = 1000000 | |
while times > 0 do | |
for i = 1, table.getn(text), 1 do | |
if map[text[i]] then map[text[i]] = map[text[i]] + 1 | |
else map[text[i]] = 1 end | |
end | |
times = times - 1; | |
end | |
for key, value in pairs(map) do | |
io.write(key, " ", value, "\n") | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment