Last active
October 26, 2016 00:54
-
-
Save simonhf/48f48dd00c64c351dec42611047009d8 to your computer and use it in GitHub Desktop.
1 cache line fetch per hash table read simulation experimentation lab
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <locale.h> | |
#include <sys/mman.h> | |
#include <sys/time.h> | |
#include <unistd.h> | |
#include <murmurhash3.h> | |
// gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
#define CACHE_LINE_SIZE (1024) | |
#define SLOTS_PER_BUCKET (CACHE_LINE_SIZE / 2 / sizeof(uint32_t)) | |
typedef struct BUCKET { | |
uint32_t key[SLOTS_PER_BUCKET]; | |
uint32_t val[SLOTS_PER_BUCKET]; | |
} __attribute__((packed)) BUCKET; | |
double | |
get_time_in_seconds(void) | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; | |
} | |
void | |
main(void) | |
{ | |
setlocale(LC_NUMERIC, ""); | |
printf("- starting\n"); | |
size_t memory_to_allocate = 1 << 29; | |
size_t size_of_cache_line = sizeof(BUCKET); | |
size_t number_of_buckets = memory_to_allocate / size_of_cache_line; | |
printf("- mmap() %'zu bytes or %zu MB containing %'zu buckets of size %zu with %zu slots per bucket or %'zu total slots\n", memory_to_allocate, memory_to_allocate / 1024 / 1024, number_of_buckets, size_of_cache_line, SLOTS_PER_BUCKET, SLOTS_PER_BUCKET * number_of_buckets); | |
BUCKET * buckets = mmap (NULL, memory_to_allocate, PROT_READ | PROT_WRITE,MAP_SHARED | MAP_ANONYMOUS, -1, 0); | |
uint64_t key; | |
uint64_t hash[2]; | |
uint32_t i = 0; | |
double t1 = get_time_in_seconds(); | |
while(1) { | |
key = i; | |
REHASH1:; | |
MurmurHash3_x64_128(&key, sizeof(key), 123456, &hash[0]); | |
uint32_t bucket = hash[0] % number_of_buckets; | |
if(0 == bucket) { printf("- hit magic bucket 0; re-hashing key %'zu\n", key); key += 1 + key; goto REHASH1; } | |
//debug printf("- i=%zu 0x%16zx 0x%16zx bucket=%'u\n", i, hash[0], hash[1], bucket); | |
for(uint32_t j = 0; j < (SLOTS_PER_BUCKET - 1); j++) { | |
if(0 == buckets[bucket].key[j]) { | |
//debug printf("- inserted at j=%u\n", j); | |
buckets[bucket].key[j] = key; | |
buckets[bucket].val[j] = key; | |
goto INSERTED; | |
} | |
} | |
goto EARLY_OUT_PUT; | |
INSERTED:; | |
i ++; | |
} | |
EARLY_OUT_PUT:; | |
uint32_t i_max = i; | |
double t2 = get_time_in_seconds(); | |
printf("- put %'u keys (before collision) in %f seconds or %'.0f inserts per second\n", i_max, t2 - t1, i_max / (t2 - t1)); | |
uint32_t process_number; | |
uint32_t process_number_max = 1; | |
for(process_number = 1; process_number <= process_number_max; process_number ++) { | |
pid_t pid = fork(); | |
if(0 == pid) { | |
goto PUT_KID_TO_WORK; | |
} | |
} | |
printf("- parent ending after fork()ing %u kids\n", process_number_max); | |
exit(0); | |
PUT_KID_TO_WORK:; | |
#define PRECALC_LEN (8) | |
for(uint32_t loop = 0; loop < 4; loop ++) { | |
uint32_t prefetch_active = 0; | |
if(loop >= 1) { prefetch_active = 1; } | |
uint32_t precalc_len_multiple = 1; | |
if(2 == loop) { precalc_len_multiple = 2; } | |
if(3 == loop) { precalc_len_multiple = 3; } | |
int32_t precalc_put_pos = 0; | |
int32_t precalc_get_pos = 0 - (PRECALC_LEN * (precalc_len_multiple - 1)); | |
uint32_t precalc_bucket[PRECALC_LEN * precalc_len_multiple]; | |
uint32_t precalc_key[PRECALC_LEN * precalc_len_multiple]; | |
uint32_t precalc_val[PRECALC_LEN * precalc_len_multiple]; | |
uint64_t count = 0; | |
//debug printf("- precalc_put_pos=%d, precalc_get_pos=%d\n", precalc_put_pos, precalc_get_pos); | |
i = process_number * 100000; | |
t1 = get_time_in_seconds(); | |
while(1) { | |
for(uint32_t k = 0; k < PRECALC_LEN; k ++) { | |
key = i; | |
REHASH2:; | |
precalc_key[k + precalc_put_pos] = key; | |
MurmurHash3_x64_128(&key, sizeof(key), 123456, &hash[0]); | |
precalc_bucket[k + precalc_put_pos] = hash[0] % number_of_buckets; | |
if(0 == precalc_bucket[k + precalc_put_pos]) { /* printf("- hit magic bucket 0; re-hashing key %'zu\n", key); */ key += 1 + key; goto REHASH2; } | |
#define PREFETCH_LOCALITY (0) | |
if(prefetch_active) { __builtin_prefetch(&buckets[precalc_bucket[k + precalc_put_pos]].key[0], 0, PREFETCH_LOCALITY); } | |
count ++; | |
i ++; | |
if(i >= i_max) { i = 0; } | |
if(0 == (i & 0xffff)) { | |
t2 = get_time_in_seconds(); | |
if((t2 - t1) >= 5) { | |
goto EARLY_OUT_GET; | |
} | |
} | |
} | |
if(precalc_get_pos >= 0) { | |
for(uint32_t k = 0; k < PRECALC_LEN; k ++) { | |
uint32_t bucket = precalc_bucket[k + precalc_get_pos]; | |
uint32_t key = precalc_key[k + precalc_get_pos]; | |
for(uint32_t j = 0; j < (SLOTS_PER_BUCKET - 1); j++) { | |
if(key == buckets[bucket].key[j]) { | |
precalc_val[k + precalc_get_pos] = buckets[bucket].key[j]; | |
goto GOT; | |
} | |
} | |
printf("- ERROR: INTERNAL: key not found for key=%'u in bucket=%'u !\n", key, bucket); | |
exit(1); | |
GOT:; | |
} | |
} | |
precalc_put_pos += PRECALC_LEN; | |
precalc_get_pos += PRECALC_LEN; | |
precalc_put_pos = precalc_put_pos >= (PRECALC_LEN * precalc_len_multiple) ? 0 : precalc_put_pos; | |
precalc_get_pos = precalc_get_pos >= (PRECALC_LEN * precalc_len_multiple) ? 0 : precalc_get_pos; | |
} | |
EARLY_OUT_GET:; | |
printf("- process_number=%u loop=%u prefetch_active=%u precalc_len_multiple=%u got %'zu keys in %f seconds or %'.0f gets per second\n", process_number, loop, prefetch_active, precalc_len_multiple, count, t2 - t1, count / (t2 - t1)); | |
} | |
printf("- process_number=%u ending\n", process_number); | |
} | |
// | |
// experiments with a 64 byte bucket and changing the number of parallel processes | |
// | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 | |
// - put 4,903,147 keys (before collision) in 0.746633 seconds or 6,567,010 inserts per second | |
// - parent ending after fork()ing 1 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 76,265,253 keys in 5.002618 seconds or 15,245,069 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 102,747,068 keys in 5.000722 seconds or 20,546,446 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 121,704,296 keys in 5.002531 seconds or 24,328,544 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 122,031,976 keys in 5.000540 seconds or 24,403,760 gets per second | |
// - process_number=1 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 | |
// - put 4,903,147 keys (before collision) in 0.784651 seconds or 6,248,827 inserts per second | |
// - parent ending after fork()ing 2 kids | |
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 65,572,527 keys in 5.001105 seconds or 13,111,608 gets per second | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 65,672,527 keys in 5.001063 seconds or 13,131,714 gets per second | |
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 87,544,411 keys in 5.002060 seconds or 17,501,671 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 87,251,195 keys in 5.002263 seconds or 17,442,344 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 94,698,193 keys in 5.002471 seconds or 18,930,283 gets per second | |
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 97,994,012 keys in 5.004300 seconds or 19,581,961 gets per second | |
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 110,028,530 keys in 5.000954 seconds or 22,001,507 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 110,718,354 keys in 5.002635 seconds or 22,132,007 gets per second | |
// - process_number=2 ending | |
// - process_number=1 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 | |
// - put 4,903,147 keys (before collision) in 0.744962 seconds or 6,581,739 inserts per second | |
// - parent ending after fork()ing 3 kids | |
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 56,256,057 keys in 5.002871 seconds or 11,244,755 gets per second | |
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 52,698,094 keys in 5.003711 seconds or 10,531,802 gets per second | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 58,422,137 keys in 5.007053 seconds or 11,667,968 gets per second | |
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 75,803,109 keys in 5.000443 seconds or 15,159,279 gets per second | |
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 79,364,464 keys in 5.001411 seconds or 15,868,415 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 76,199,717 keys in 5.001988 seconds or 15,233,887 gets per second | |
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 92,675,238 keys in 5.004450 seconds or 18,518,566 gets per second | |
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 79,954,288 keys in 5.005317 seconds or 15,973,871 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 74,561,317 keys in 5.002349 seconds or 14,905,261 gets per second | |
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 85,216,187 keys in 5.002121 seconds or 17,036,011 gets per second | |
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 86,823,515 keys in 5.000560 seconds or 17,362,758 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 93,006,310 keys in 5.000277 seconds or 18,600,231 gets per second | |
// - process_number=3 ending | |
// - process_number=2 ending | |
// - process_number=1 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 | |
// - put 4,903,147 keys (before collision) in 0.772905 seconds or 6,343,791 inserts per second | |
// - parent ending after fork()ing 4 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 51,683,982 keys in 5.002305 seconds or 10,332,033 gets per second | |
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 55,697,305 keys in 5.004646 seconds or 11,129,120 gets per second | |
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 56,028,377 keys in 5.001612 seconds or 11,202,064 gets per second | |
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 51,221,838 keys in 5.003998 seconds or 10,236,183 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 67,310,927 keys in 5.001811 seconds or 13,457,311 gets per second | |
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 66,683,247 keys in 5.004323 seconds or 13,325,128 gets per second | |
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 68,193,967 keys in 5.003913 seconds or 13,628,128 gets per second | |
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 65,931,279 keys in 5.002827 seconds or 13,178,805 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 63,587,428 keys in 5.000220 seconds or 12,716,926 gets per second | |
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 72,569,434 keys in 5.005618 seconds or 14,497,597 gets per second | |
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 78,196,869 keys in 5.005375 seconds or 15,622,580 gets per second | |
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 76,261,861 keys in 5.003949 seconds or 15,240,336 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 75,740,965 keys in 5.000678 seconds or 15,146,140 gets per second | |
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 70,279,066 keys in 5.003276 seconds or 14,046,610 gets per second | |
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 71,717,466 keys in 5.009659 seconds or 14,315,838 gets per second | |
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 73,247,205 keys in 5.000062 seconds or 14,649,259 gets per second | |
// - process_number=1 ending | |
// - process_number=2 ending | |
// - process_number=4 ending | |
// - process_number=3 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 | |
// - put 4,903,147 keys (before collision) in 0.737446 seconds or 6,648,823 inserts per second | |
// - parent ending after fork()ing 5 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 47,436,195 keys in 5.003296 seconds or 9,480,989 gets per second | |
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 48,088,163 keys in 5.000460 seconds or 9,616,748 gets per second | |
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 45,304,579 keys in 5.006865 seconds or 9,048,492 gets per second | |
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 46,611,907 keys in 5.001497 seconds or 9,319,591 gets per second | |
// - process_number=5 loop=0 prefetch_active=0 precalc_len_multiple=1 got 51,939,342 keys in 5.002657 seconds or 10,382,351 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 54,162,297 keys in 5.001440 seconds or 10,829,340 gets per second | |
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 66,848,783 keys in 5.001389 seconds or 13,366,043 gets per second | |
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 61,259,204 keys in 5.000600 seconds or 12,250,371 gets per second | |
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 61,321,348 keys in 5.006056 seconds or 12,249,433 gets per second | |
// - process_number=5 loop=1 prefetch_active=1 precalc_len_multiple=1 got 64,486,095 keys in 5.001064 seconds or 12,894,475 gets per second | |
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 64,751,631 keys in 5.002955 seconds or 12,942,677 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 67,310,927 keys in 5.006616 seconds or 13,444,396 gets per second | |
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 75,444,357 keys in 5.004708 seconds or 15,074,677 gets per second | |
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 59,486,340 keys in 5.003384 seconds or 11,889,221 gets per second | |
// - process_number=5 loop=2 prefetch_active=1 precalc_len_multiple=2 got 74,882,213 keys in 5.000509 seconds or 14,974,918 gets per second | |
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 72,407,290 keys in 5.002365 seconds or 14,474,611 gets per second | |
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 65,244,847 keys in 5.002451 seconds or 13,042,575 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 70,903,354 keys in 5.004322 seconds or 14,168,424 gets per second | |
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 67,731,823 keys in 5.003001 seconds or 13,538,239 gets per second | |
// - process_number=5 loop=3 prefetch_active=1 precalc_len_multiple=3 got 67,304,143 keys in 5.002435 seconds or 13,454,276 gets per second | |
// - process_number=2 ending | |
// - process_number=1 ending | |
// - process_number=4 ending | |
// - process_number=3 ending | |
// - process_number=5 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 | |
// - put 4,903,147 keys (before collision) in 0.725215 seconds or 6,760,957 inserts per second | |
// - parent ending after fork()ing 6 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 41,615,544 keys in 5.004154 seconds or 8,316,200 gets per second | |
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 45,107,971 keys in 5.006711 seconds or 9,009,502 gets per second | |
// - process_number=5 loop=0 prefetch_active=0 precalc_len_multiple=1 got 44,218,147 keys in 5.003569 seconds or 8,837,321 gets per second | |
// - process_number=6 loop=0 prefetch_active=0 precalc_len_multiple=1 got 43,659,395 keys in 5.004504 seconds or 8,724,020 gets per second | |
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 48,512,451 keys in 5.006961 seconds or 9,689,001 gets per second | |
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 43,828,323 keys in 5.000483 seconds or 8,764,818 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 60,376,164 keys in 5.002672 seconds or 12,068,783 gets per second | |
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 52,698,094 keys in 5.006964 seconds or 10,524,960 gets per second | |
// - process_number=5 loop=1 prefetch_active=1 precalc_len_multiple=1 got 56,645,881 keys in 5.004448 seconds or 11,319,107 gets per second | |
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 57,925,529 keys in 5.001449 seconds or 11,581,749 gets per second | |
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 55,207,481 keys in 5.000500 seconds or 11,040,392 gets per second | |
// - process_number=6 loop=1 prefetch_active=1 precalc_len_multiple=1 got 54,317,657 keys in 5.006172 seconds or 10,850,138 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 65,606,991 keys in 5.001629 seconds or 13,117,124 gets per second | |
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 56,945,881 keys in 5.004772 seconds or 11,378,316 gets per second | |
// - process_number=5 loop=2 prefetch_active=1 precalc_len_multiple=2 got 61,614,564 keys in 5.000714 seconds or 12,321,153 gets per second | |
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 55,107,481 keys in 5.001246 seconds or 11,018,750 gets per second | |
// - process_number=6 loop=2 prefetch_active=1 precalc_len_multiple=2 got 63,403,055 keys in 5.001584 seconds or 12,676,596 gets per second | |
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 68,093,967 keys in 5.003964 seconds or 13,608,005 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 59,196,516 keys in 5.002539 seconds or 11,833,294 gets per second | |
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 53,415,598 keys in 5.002627 seconds or 10,677,509 gets per second | |
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 66,490,031 keys in 5.007029 seconds or 13,279,338 gets per second | |
// - process_number=6 loop=3 prefetch_active=1 precalc_len_multiple=3 got 53,858,905 keys in 5.001302 seconds or 10,768,977 gets per second | |
// - process_number=5 loop=3 prefetch_active=1 precalc_len_multiple=3 got 57,891,065 keys in 5.005824 seconds or 11,564,742 gets per second | |
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 69,196,026 keys in 5.001882 seconds or 13,833,998 gets per second | |
// - process_number=1 ending | |
// - process_number=4 ending | |
// - process_number=2 ending | |
// - process_number=6 ending | |
// - process_number=5 ending | |
// - process_number=3 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 | |
// - put 4,903,147 keys (before collision) in 0.778333 seconds or 6,299,550 inserts per second | |
// - parent ending after fork()ing 7 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 37,236,685 keys in 5.003076 seconds or 7,442,758 gets per second | |
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 45,532,259 keys in 5.005522 seconds or 9,096,406 gets per second | |
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 39,418,392 keys in 5.009593 seconds or 7,868,582 gets per second | |
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 38,956,248 keys in 5.000563 seconds or 7,790,372 gets per second | |
// - process_number=6 loop=0 prefetch_active=0 precalc_len_multiple=1 got 40,263,576 keys in 5.002979 seconds or 8,047,920 gets per second | |
// - process_number=5 loop=0 prefetch_active=0 precalc_len_multiple=1 got 39,249,464 keys in 5.003498 seconds or 7,844,405 gets per second | |
// - process_number=7 loop=0 prefetch_active=0 precalc_len_multiple=1 got 45,001,187 keys in 5.007646 seconds or 8,986,495 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 49,128,078 keys in 5.002063 seconds or 9,821,564 gets per second | |
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 46,711,907 keys in 5.004042 seconds or 9,334,835 gets per second | |
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 47,401,731 keys in 5.004479 seconds or 9,471,861 gets per second | |
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 49,548,974 keys in 5.002409 seconds or 9,905,023 gets per second | |
// - process_number=6 loop=1 prefetch_active=1 precalc_len_multiple=1 got 52,232,558 keys in 5.002241 seconds or 10,441,832 gets per second | |
// - process_number=7 loop=1 prefetch_active=1 precalc_len_multiple=1 got 52,918,990 keys in 5.003712 seconds or 10,575,947 gets per second | |
// - process_number=5 loop=1 prefetch_active=1 precalc_len_multiple=1 got 50,038,798 keys in 5.007249 seconds or 9,993,271 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 53,387,918 keys in 5.007230 seconds or 10,662,166 gets per second | |
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 54,421,049 keys in 5.002982 seconds or 10,877,722 gets per second | |
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 56,487,129 keys in 5.004470 seconds or 11,287,335 gets per second | |
// - process_number=6 loop=2 prefetch_active=1 precalc_len_multiple=2 got 53,400,153 keys in 5.002144 seconds or 10,675,453 gets per second | |
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 51,777,198 keys in 5.005023 seconds or 10,345,047 gets per second | |
// - process_number=7 loop=2 prefetch_active=1 precalc_len_multiple=2 got 54,807,481 keys in 5.002081 seconds or 10,956,936 gets per second | |
// - process_number=5 loop=2 prefetch_active=1 precalc_len_multiple=2 got 54,024,441 keys in 5.007304 seconds or 10,789,128 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 55,866,233 keys in 5.002276 seconds or 11,168,162 gets per second | |
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 50,566,478 keys in 5.000430 seconds or 10,112,426 gets per second | |
// - process_number=6 loop=3 prefetch_active=1 precalc_len_multiple=3 got 52,363,630 keys in 5.001225 seconds or 10,470,161 gets per second | |
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 51,256,302 keys in 5.006153 seconds or 10,238,661 gets per second | |
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 56,090,521 keys in 5.005819 seconds or 11,205,064 gets per second | |
// - process_number=7 loop=3 prefetch_active=1 precalc_len_multiple=3 got 57,363,385 keys in 5.003658 seconds or 11,464,290 gets per second | |
// - process_number=5 loop=3 prefetch_active=1 precalc_len_multiple=3 got 52,922,382 keys in 5.003028 seconds or 10,578,070 gets per second | |
// - process_number=1 ending | |
// - process_number=3 ending | |
// - process_number=6 ending | |
// - process_number=2 ending | |
// - process_number=4 ending | |
// - process_number=7 ending | |
// - process_number=5 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 | |
// - put 4,903,147 keys (before collision) in 0.761831 seconds or 6,436,003 inserts per second | |
// - parent ending after fork()ing 8 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,712,397 keys in 5.001366 seconds or 7,340,474 gets per second | |
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,940,077 keys in 5.001865 seconds or 7,385,261 gets per second | |
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 37,102,221 keys in 5.003418 seconds or 7,415,375 gets per second | |
// - process_number=8 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,864,365 keys in 5.000885 seconds or 7,371,568 gets per second | |
// - process_number=6 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,867,757 keys in 5.001844 seconds or 7,370,833 gets per second | |
// - process_number=7 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,964,365 keys in 5.004100 seconds or 7,386,816 gets per second | |
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 37,198,829 keys in 5.005153 seconds or 7,432,106 gets per second | |
// - process_number=5 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,771,149 keys in 5.007912 seconds or 7,342,611 gets per second | |
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 45,697,795 keys in 5.000444 seconds or 9,138,748 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 45,601,187 keys in 5.006289 seconds or 9,108,780 gets per second | |
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 47,432,803 keys in 5.005106 seconds or 9,476,883 gets per second | |
// - process_number=6 loop=1 prefetch_active=1 precalc_len_multiple=1 got 42,753,944 keys in 5.001581 seconds or 8,548,086 gets per second | |
// - process_number=8 loop=1 prefetch_active=1 precalc_len_multiple=1 got 44,507,971 keys in 5.003052 seconds or 8,896,164 gets per second | |
// - process_number=7 loop=1 prefetch_active=1 precalc_len_multiple=1 got 44,476,899 keys in 5.001811 seconds or 8,892,159 gets per second | |
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 47,594,947 keys in 5.008350 seconds or 9,503,119 gets per second | |
// - process_number=5 loop=1 prefetch_active=1 precalc_len_multiple=1 got 45,004,579 keys in 5.007661 seconds or 8,987,146 gets per second | |
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 47,532,803 keys in 5.001259 seconds or 9,504,168 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 47,501,731 keys in 5.004918 seconds or 9,491,011 gets per second | |
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 46,122,083 keys in 5.002951 seconds or 9,218,976 gets per second | |
// - process_number=8 loop=2 prefetch_active=1 precalc_len_multiple=2 got 48,690,222 keys in 5.000941 seconds or 9,736,212 gets per second | |
// - process_number=6 loop=2 prefetch_active=1 precalc_len_multiple=2 got 46,346,371 keys in 5.003253 seconds or 9,263,248 gets per second | |
// - process_number=7 loop=2 prefetch_active=1 precalc_len_multiple=2 got 48,724,686 keys in 5.007236 seconds or 9,730,855 gets per second | |
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 46,874,051 keys in 5.004600 seconds or 9,366,193 gets per second | |
// - process_number=5 loop=2 prefetch_active=1 precalc_len_multiple=2 got 43,574,840 keys in 5.004941 seconds or 8,706,364 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 42,926,264 keys in 5.002457 seconds or 8,581,036 gets per second | |
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 48,319,235 keys in 5.007317 seconds or 9,649,725 gets per second | |
// - process_number=8 loop=3 prefetch_active=1 precalc_len_multiple=3 got 45,359,939 keys in 5.001278 seconds or 9,069,670 gets per second | |
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 47,498,339 keys in 5.003657 seconds or 9,492,725 gets per second | |
// - process_number=6 loop=3 prefetch_active=1 precalc_len_multiple=3 got 43,343,768 keys in 5.006243 seconds or 8,657,943 gets per second | |
// - process_number=7 loop=3 prefetch_active=1 precalc_len_multiple=3 got 50,232,014 keys in 5.004614 seconds or 10,037,140 gets per second | |
// - process_number=5 loop=3 prefetch_active=1 precalc_len_multiple=3 got 47,429,411 keys in 5.000161 seconds or 9,485,577 gets per second | |
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 50,597,550 keys in 5.005670 seconds or 10,108,048 gets per second | |
// - process_number=1 ending | |
// - process_number=2 ending | |
// - process_number=8 ending | |
// - process_number=3 ending | |
// - process_number=6 ending | |
// - process_number=7 ending | |
// - process_number=5 ending | |
// - process_number=4 ending | |
// | |
// experiments with one process and changing the size of the bucket therefore relying on cache-line read-ahead | |
// | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 with 8 slots per bucket or 67,108,864 total slots | |
// - put 4,903,147 keys (before collision) in 0.790470 seconds or 6,202,826 inserts per second | |
// - parent ending after fork()ing 1 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 77,117,221 keys in 5.002977 seconds or 15,414,267 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 104,504,487 keys in 5.002284 seconds or 20,891,354 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 124,248,147 keys in 5.000843 seconds or 24,845,441 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 124,117,075 keys in 5.000037 seconds or 24,823,232 gets per second | |
// - process_number=1 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 4,194,304 buckets of size 128 with 16 slots per bucket or 67,108,864 total slots | |
// - hit magic bucket 0; re-hashing key 10,253,294 | |
// ... | |
// - hit magic bucket 0; re-hashing key 12,192,560 | |
// - put 13,076,546 keys (before collision) in 1.570020 seconds or 8,328,904 inserts per second | |
// - parent ending after fork()ing 1 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 63,740,520 keys in 5.001866 seconds or 12,743,348 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 95,630,126 keys in 5.000265 seconds or 19,125,011 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 106,806,128 keys in 5.000957 seconds or 21,357,138 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 109,427,568 keys in 5.001722 seconds or 21,877,978 gets per second | |
// - process_number=1 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 2,097,152 buckets of size 256 with 32 slots per bucket or 67,108,864 total slots | |
// - hit magic bucket 0; re-hashing key 9,003,110 | |
// ... | |
// - hit magic bucket 0; re-hashing key 22,234,547 | |
// - put 23,028,554 keys (before collision) in 2.939141 seconds or 7,835,131 inserts per second | |
// - parent ending after fork()ing 1 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 56,377,332 keys in 5.004045 seconds or 11,266,352 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 75,866,942 keys in 5.001897 seconds or 15,167,634 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 94,242,440 keys in 5.000053 seconds or 18,848,288 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 92,014,216 keys in 5.000120 seconds or 18,402,402 gets per second | |
// - process_number=1 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 1,048,576 buckets of size 512 with 64 slots per bucket or 67,108,864 total slots | |
// - hit magic bucket 0; re-hashing key 3,933,621 | |
// ... | |
// - hit magic bucket 0; re-hashing key 34,570,889 | |
// - put 34,746,952 keys (before collision) in 5.457229 seconds or 6,367,142 inserts per second | |
// - parent ending after fork()ing 1 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 40,348,584 keys in 5.001734 seconds or 8,066,919 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 58,502,056 keys in 5.006553 seconds or 11,685,097 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 57,519,016 keys in 5.005921 seconds or 11,490,196 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 59,091,880 keys in 5.000849 seconds or 11,816,370 gets per second | |
// - process_number=1 ending | |
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line | |
// - starting | |
// - mmap() 536,870,912 bytes or 512 MB containing 524,288 buckets of size 1024 with 128 slots per bucket or 67,108,864 total slots | |
// - hit magic bucket 0; re-hashing key 3,230,506 | |
// ... | |
// - hit magic bucket 0; re-hashing key 42,292,122 | |
// - put 43,103,254 keys (before collision) in 9.367549 seconds or 4,601,337 inserts per second | |
// - parent ending after fork()ing 1 kids | |
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 30,636,384 keys in 5.003317 seconds or 6,123,215 gets per second | |
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 38,828,384 keys in 5.000796 seconds or 7,764,441 gets per second | |
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 39,352,672 keys in 5.005588 seconds or 7,861,748 gets per second | |
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 39,156,064 keys in 5.006275 seconds or 7,821,397 gets per second | |
// - process_number=1 ending |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Based on the MIT licensed [1], C++ function here [2]. | |
* We only use the functions outputting a 128bit hash. | |
* | |
* [1]http://code.google.com/p/smhasher/ | |
* [2]http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp | |
*/ | |
//----------------------------------------------------------------------------- | |
// MurmurHash3 was written by Austin Appleby, and is placed in the public | |
// domain. The author hereby disclaims copyright to this source code. | |
#include "murmurhash3.h" | |
#define FORCE_INLINE __attribute__((always_inline)) inline | |
static inline uint32_t rotl32 ( uint32_t x, int8_t r ) | |
{ | |
return (x << r) | (x >> (32 - r)); | |
} | |
static inline uint64_t rotl64 ( uint64_t x, int8_t r ) | |
{ | |
return (x << r) | (x >> (64 - r)); | |
} | |
#define ROTL32(x,y) rotl32(x,y) | |
#define ROTL64(x,y) rotl64(x,y) | |
#define BIG_CONSTANT(x) (x##LLU) | |
//----------------------------------------------------------------------------- | |
// Block read - if your platform needs to do endian-swapping or can only | |
// handle aligned reads, do the conversion here | |
static FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) | |
{ | |
return p[i]; | |
} | |
static FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) | |
{ | |
return p[i]; | |
} | |
//----------------------------------------------------------------------------- | |
// Finalization mix - force all bits of a hash block to avalanche | |
static FORCE_INLINE uint32_t fmix32 ( uint32_t h ) | |
{ | |
h ^= h >> 16; | |
h *= 0x85ebca6b; | |
h ^= h >> 13; | |
h *= 0xc2b2ae35; | |
h ^= h >> 16; | |
return h; | |
} | |
//---------- | |
static FORCE_INLINE uint64_t fmix64 ( uint64_t k ) | |
{ | |
k ^= k >> 33; | |
k *= BIG_CONSTANT(0xff51afd7ed558ccd); | |
k ^= k >> 33; | |
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); | |
k ^= k >> 33; | |
return k; | |
} | |
//----------------------------------------------------------------------------- | |
void MurmurHash3_x64_128 ( const void * key, const int len, | |
const uint32_t seed, void * out ) | |
{ | |
const uint8_t * data = (const uint8_t*)key; | |
const int nblocks = len / 16; | |
uint64_t h1 = seed; | |
uint64_t h2 = seed; | |
const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); | |
const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); | |
//---------- | |
// body | |
const uint64_t * blocks = (const uint64_t *)(data); | |
for(int i = 0; i < nblocks; i++) | |
{ | |
uint64_t k1 = getblock64(blocks,i*2+0); | |
uint64_t k2 = getblock64(blocks,i*2+1); | |
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; | |
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; | |
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; | |
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; | |
} | |
//---------- | |
// tail | |
const uint8_t * tail = (const uint8_t*)(data + nblocks*16); | |
uint64_t k1 = 0; | |
uint64_t k2 = 0; | |
switch(len & 15) | |
{ | |
case 15: k2 ^= (uint64_t)(tail[14]) << 48; | |
case 14: k2 ^= (uint64_t)(tail[13]) << 40; | |
case 13: k2 ^= (uint64_t)(tail[12]) << 32; | |
case 12: k2 ^= (uint64_t)(tail[11]) << 24; | |
case 11: k2 ^= (uint64_t)(tail[10]) << 16; | |
case 10: k2 ^= (uint64_t)(tail[ 9]) << 8; | |
case 9: k2 ^= (uint64_t)(tail[ 8]) << 0; | |
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; | |
case 8: k1 ^= (uint64_t)(tail[ 7]) << 56; | |
case 7: k1 ^= (uint64_t)(tail[ 6]) << 48; | |
case 6: k1 ^= (uint64_t)(tail[ 5]) << 40; | |
case 5: k1 ^= (uint64_t)(tail[ 4]) << 32; | |
case 4: k1 ^= (uint64_t)(tail[ 3]) << 24; | |
case 3: k1 ^= (uint64_t)(tail[ 2]) << 16; | |
case 2: k1 ^= (uint64_t)(tail[ 1]) << 8; | |
case 1: k1 ^= (uint64_t)(tail[ 0]) << 0; | |
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; | |
}; | |
//---------- | |
// finalization | |
h1 ^= len; h2 ^= len; | |
h1 += h2; | |
h2 += h1; | |
h1 = fmix64(h1); | |
h2 = fmix64(h2); | |
h1 += h2; | |
h2 += h1; | |
((uint64_t*)out)[0] = h1; | |
((uint64_t*)out)[1] = h2; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __MURMURHASH3_H__ | |
#define __MURMURHASH3_H__ | |
#include <stdint.h> | |
extern void MurmurHash3_x64_128(const void * key, const int len, const uint32_t seed, void * out); | |
#endif /* __MURMURHASH3_H__ */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment