Created
July 6, 2012 23:34
-
-
Save submachine/3063407 to your computer and use it in GitHub Desktop.
A toy x86_64 pointer-hash benchmark to go with the StackOverflow answer at: http://stackoverflow.com/a/11364619/274261
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <stdbool.h> | |
#define HASH_BITS 10 | |
#define BUCKETS (1 << HASH_BITS) | |
/* A half-avalanche 4 byte integer hash function, as described at: | |
http://burtleburtle.net/bob/hash/integer.html */ | |
uint32_t half_avalanche (uint32_t a); | |
/* A hash function written to play nice with x86_64 virtual addresses, | |
as described at: http://stackoverflow.com/a/11364619/274261 */ | |
uint32_t hash (void * p); | |
void **buckets; | |
bool *collisions; | |
/* A toy benchmark for `hash'. 'toy' is the key word here. Tests for hash | |
collisions between pointers to memory allocated on the heap. */ | |
int main (void) | |
{ | |
int d, i, values, n_collision; | |
void * p; | |
uint32_t index; | |
int n_collision_buckets; | |
srand (33); | |
buckets = malloc (BUCKETS * sizeof (void *)); | |
collisions = malloc (BUCKETS * sizeof (bool)); | |
/* We will actually leak memory during the execution. But freeing any | |
can cause malloc to return the same pointers again, and give us false | |
collisions. So, we just let the leak be. */ | |
for (d = 2; d >= 0; d--) | |
{ | |
n_collision = 0; | |
values = BUCKETS >> d; | |
/* Empty the buckets. */ | |
for (i = 0; i < BUCKETS; i++) | |
{ | |
buckets[i] = NULL; | |
collisions[i] = false; | |
} | |
for (i = 0; i < values; i++) | |
{ | |
/* Allocate either a small even sized block, or a big one. */ | |
if (rand () % 2) | |
p = malloc (2 << (rand () % 3)); | |
else | |
p = malloc (512 + (rand () % 1024)); | |
index = hash (p); | |
if (buckets[index] != NULL) | |
{ | |
n_collision++; | |
collisions[index] = true; | |
} | |
buckets[index] = p; | |
} | |
n_collision_buckets = 0; | |
for (i = 0; i < BUCKETS; i++) | |
if (collisions[i]) | |
n_collision_buckets ++; | |
printf ("%d buckets, %d values, %d collissions across %d buckets\n", | |
BUCKETS, values, n_collision, n_collision_buckets); | |
} | |
return 0; | |
} | |
uint32_t hash (void * p) | |
{ | |
uint64_t a, msbs; | |
a = (uint64_t) p; | |
/* Drop two LSBs. */ | |
a >>= 2; | |
/* Get rid of the MSBs. Keep 46 bits. */ | |
a &= 0x3fffffffffff; | |
/* Get the 14 MSBs and fold them in to get a 32 bit integer. | |
The MSBs are mostly 0s anyway, so we don't lose much entropy */ | |
msbs = (a >> 32) << 18; | |
a ^= msbs; | |
return half_avalanche ((uint32_t) a) >> (32 - HASH_BITS); | |
} | |
uint32_t half_avalanche (uint32_t a) | |
{ | |
a = (a+0x479ab41d) + (a<<8); | |
a = (a^0xe4aa10ce) ^ (a>>5); | |
a = (a+0x9942f0a6) - (a<<14); | |
a = (a^0x5aedd67d) ^ (a>>3); | |
a = (a+0x17bea992) + (a<<7); | |
return a; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment