Skip to content

Instantly share code, notes, and snippets.

@yl3w
Created October 24, 2013 12:58
Show Gist options
  • Select an option

  • Save yl3w/7136792 to your computer and use it in GitHub Desktop.

Select an option

Save yl3w/7136792 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define MAXBUCKETS 2048
#define PRINT_DISTRIBUTION 1
static unsigned int occ[MAXBUCKETS];
char line[1024];
unsigned test_hash(const unsigned char *s, int len)
{
unsigned int i;
i = 0;
while (len && *s) {
i ^= *s;
i = (i << (*s >> 3)) | (i >> (32-(*s >> 3)));
s++;
len--;
}
return i;
}
unsigned wt_hash (const unsigned char *key, int len)
{
unsigned char *p = (unsigned char *)key;
unsigned h = 0x783c965aUL;
unsigned step = 16;
for (; len > 0; len--) {
h ^= ((unsigned int)*p) * 9;
p++;
h = (h << step) + (h >> (32-step));
step ^= h;
step &= 0x1F;
}
return h;
}
unsigned wt_hash5 (const unsigned char *key, int len)
{
unsigned char *p = (unsigned char *)key;
unsigned h0 = 0xa53c965aUL;
unsigned h1 = 0x5CA6953AUL;
unsigned step0 = 19;
unsigned step1 = 13;
for (; len > 0; len--) {
unsigned int t;
t = ((unsigned int)*p);
p++;
//t *= 0x00DB7531;
t *= 13;
h0 -= t;
h1 += t;
t = (h1 << step0) | (h1 >> (32-step0));
h1 = (h0 << step1) | (h0 >> (32-step1));
h0 = t;
t = ((h0 >> 16) ^ h1) & 0xffff;
step0 = t & 0x1F;
step1 = t >> 11;
}
return h0 ^ h1;
}
unsigned wt_hash6 (const unsigned char *key, int len)
{
unsigned char *p = (unsigned char *)key;
unsigned h0 = 0xa53c965aUL;
unsigned h1 = 0x5CA6953AUL;
unsigned step0 = 6;
unsigned step1 = 18;
for (; len > 0; len--) {
unsigned int t;
t = ((unsigned int)*p);
p++;
h0 = ~(h0 ^ t);
h1 = ~(h1 + t);
t = (h1 << step0) | (h1 >> (32-step0));
h1 = (h0 << step1) | (h0 >> (32-step1));
h0 = t;
t = ((h0 >> 16) ^ h1) & 0xffff;
step0 = t & 0x1F;
step1 = t >> 11;
}
return h0 ^ h1;
}
unsigned hash_djbx33(const unsigned char *key, int len)
{
unsigned hash = 5381;
/* the hash unrolled eight times */
for (; len >= 8; len -= 8) {
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
}
switch (len) {
case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 1: hash = ((hash << 5) + hash) + *key++; break;
default: /* case 0: */ break;
}
return hash;
}
unsigned haproxy_uri_hash(const unsigned char *uri, int uri_len)
{
unsigned hash = 0;
int c;
while (uri_len--) {
c = *uri++;
if (c == '?')
break;
hash = c + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
static inline unsigned int full_hash(unsigned int a)
{
/* This function is one of Bob Jenkins' full avalanche hashing
* functions, which when provides quite a good distribution for little
* input variations. The result is quite suited to fit over a 32-bit
* space with enough variations so that a randomly picked number falls
* equally before any server position.
* Check http://burtleburtle.net/bob/hash/integer.html for more info.
*/
a = (a+0x7ed55d16) + (a<<12);
a = (a^0xc761c23c) ^ (a>>19);
a = (a+0x165667b1) + (a<<5);
a = (a+0xd3a2646c) ^ (a<<9);
a = (a+0xfd7046c5) + (a<<3);
a = (a^0xb55a4f09) ^ (a>>16);
/* ensure values are better spread all around the tree by multiplying
* by a large prime close to 3/4 of the tree.
*/
return a * 3221225473U;
}
unsigned (*hash_fct[])(const unsigned char *, int) = {
test_hash,
wt_hash,
wt_hash5,
wt_hash6,
hash_djbx33,
haproxy_uri_hash,
};
main(int argc, char **argv)
{
unsigned int i, j, min, max;
unsigned int max_occ, max_ent, acc, buckets;
unsigned long tot;
char *c;
int pref_hash = 0;
double avg;
long double variance, stdd;
unsigned long sum_deviation;
int diff;
avg = 0;
sum_deviation = 0;
variance = 0; stdd = 0;
tot = 0;
max = 0; min = ~0UL;
buckets = 50;
if (argc > 1) {
pref_hash = atoi(argv[1]);
if (pref_hash < 0 || pref_hash >= sizeof(hash_fct)/sizeof(hash_fct[0])) {
printf("Unsupported function number.\n");
exit(1);
}
}
if (argc > 2) {
buckets = atoi(argv[2]);
if (buckets < 0 || buckets >= MAXBUCKETS) {
printf("Unsupported buckets count.\n");
exit(1);
}
}
while (fgets(line, sizeof(line), stdin) != NULL) {
tot++;
c = line;
while (*c && *c != '\n')
c++;
*c = 0;
i = hash_fct[pref_hash](line, c - line);
if (argc > 3)
i = full_hash(i);
occ[i % buckets]++;
}
avg = tot/(double)buckets;
for (i = 0; i < buckets; i++) {
diff = occ[i] - avg;
sum_deviation += ((long) diff * (long) diff);
if (occ[i] < min)
min = occ[i];
if (occ[i] > max)
max = occ[i];
}
variance = sum_deviation/(double)buckets;
stdd = sqrt(variance);
fprintf(stderr,
"fct tot srv min avg variance stdd max min/max\n");
fprintf(stderr,
"%d%s %ld %d %d %.2f %.2LF %.2LF %d %.2f\n",
pref_hash, argc > 3 ? "+av" : "", tot, buckets, min, avg, variance, stdd, max, (double)min/(double)max);
if(PRINT_DISTRIBUTION) {
fprintf(stderr, "Distribution: (#entries => #times => #remaining above)\n");
max_occ = max_ent = 0;
acc = 0;
for (j = 0; j <= max; j++) {
int k = 0;
for (i = 0; i < buckets; i++) {
if (occ[i] == j)
k++;
}
if (k > max_occ) {
max_occ = k;
max_ent = j;
}
acc += k;
if (k)
printf("%d %d %d\n", j, k, buckets - acc);
}
fprintf(stderr, "Most frequent number of entries: %d (%d times)\n",
max_ent, max_occ);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment