Created
October 24, 2013 12:58
-
-
Save yl3w/7136792 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <math.h> | |
| #define MAXBUCKETS 2048 | |
| #define PRINT_DISTRIBUTION 1 | |
| static unsigned int occ[MAXBUCKETS]; | |
| char line[1024]; | |
| unsigned test_hash(const unsigned char *s, int len) | |
| { | |
| unsigned int i; | |
| i = 0; | |
| while (len && *s) { | |
| i ^= *s; | |
| i = (i << (*s >> 3)) | (i >> (32-(*s >> 3))); | |
| s++; | |
| len--; | |
| } | |
| return i; | |
| } | |
| unsigned wt_hash (const unsigned char *key, int len) | |
| { | |
| unsigned char *p = (unsigned char *)key; | |
| unsigned h = 0x783c965aUL; | |
| unsigned step = 16; | |
| for (; len > 0; len--) { | |
| h ^= ((unsigned int)*p) * 9; | |
| p++; | |
| h = (h << step) + (h >> (32-step)); | |
| step ^= h; | |
| step &= 0x1F; | |
| } | |
| return h; | |
| } | |
| unsigned wt_hash5 (const unsigned char *key, int len) | |
| { | |
| unsigned char *p = (unsigned char *)key; | |
| unsigned h0 = 0xa53c965aUL; | |
| unsigned h1 = 0x5CA6953AUL; | |
| unsigned step0 = 19; | |
| unsigned step1 = 13; | |
| for (; len > 0; len--) { | |
| unsigned int t; | |
| t = ((unsigned int)*p); | |
| p++; | |
| //t *= 0x00DB7531; | |
| t *= 13; | |
| h0 -= t; | |
| h1 += t; | |
| t = (h1 << step0) | (h1 >> (32-step0)); | |
| h1 = (h0 << step1) | (h0 >> (32-step1)); | |
| h0 = t; | |
| t = ((h0 >> 16) ^ h1) & 0xffff; | |
| step0 = t & 0x1F; | |
| step1 = t >> 11; | |
| } | |
| return h0 ^ h1; | |
| } | |
| unsigned wt_hash6 (const unsigned char *key, int len) | |
| { | |
| unsigned char *p = (unsigned char *)key; | |
| unsigned h0 = 0xa53c965aUL; | |
| unsigned h1 = 0x5CA6953AUL; | |
| unsigned step0 = 6; | |
| unsigned step1 = 18; | |
| for (; len > 0; len--) { | |
| unsigned int t; | |
| t = ((unsigned int)*p); | |
| p++; | |
| h0 = ~(h0 ^ t); | |
| h1 = ~(h1 + t); | |
| t = (h1 << step0) | (h1 >> (32-step0)); | |
| h1 = (h0 << step1) | (h0 >> (32-step1)); | |
| h0 = t; | |
| t = ((h0 >> 16) ^ h1) & 0xffff; | |
| step0 = t & 0x1F; | |
| step1 = t >> 11; | |
| } | |
| return h0 ^ h1; | |
| } | |
| unsigned hash_djbx33(const unsigned char *key, int len) | |
| { | |
| unsigned hash = 5381; | |
| /* the hash unrolled eight times */ | |
| for (; len >= 8; len -= 8) { | |
| hash = ((hash << 5) + hash) + *key++; | |
| hash = ((hash << 5) + hash) + *key++; | |
| hash = ((hash << 5) + hash) + *key++; | |
| hash = ((hash << 5) + hash) + *key++; | |
| hash = ((hash << 5) + hash) + *key++; | |
| hash = ((hash << 5) + hash) + *key++; | |
| hash = ((hash << 5) + hash) + *key++; | |
| hash = ((hash << 5) + hash) + *key++; | |
| } | |
| switch (len) { | |
| case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ | |
| case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ | |
| case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ | |
| case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ | |
| case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ | |
| case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ | |
| case 1: hash = ((hash << 5) + hash) + *key++; break; | |
| default: /* case 0: */ break; | |
| } | |
| return hash; | |
| } | |
| unsigned haproxy_uri_hash(const unsigned char *uri, int uri_len) | |
| { | |
| unsigned hash = 0; | |
| int c; | |
| while (uri_len--) { | |
| c = *uri++; | |
| if (c == '?') | |
| break; | |
| hash = c + (hash << 6) + (hash << 16) - hash; | |
| } | |
| return hash; | |
| } | |
| static inline unsigned int full_hash(unsigned int a) | |
| { | |
| /* This function is one of Bob Jenkins' full avalanche hashing | |
| * functions, which when provides quite a good distribution for little | |
| * input variations. The result is quite suited to fit over a 32-bit | |
| * space with enough variations so that a randomly picked number falls | |
| * equally before any server position. | |
| * Check http://burtleburtle.net/bob/hash/integer.html for more info. | |
| */ | |
| a = (a+0x7ed55d16) + (a<<12); | |
| a = (a^0xc761c23c) ^ (a>>19); | |
| a = (a+0x165667b1) + (a<<5); | |
| a = (a+0xd3a2646c) ^ (a<<9); | |
| a = (a+0xfd7046c5) + (a<<3); | |
| a = (a^0xb55a4f09) ^ (a>>16); | |
| /* ensure values are better spread all around the tree by multiplying | |
| * by a large prime close to 3/4 of the tree. | |
| */ | |
| return a * 3221225473U; | |
| } | |
| unsigned (*hash_fct[])(const unsigned char *, int) = { | |
| test_hash, | |
| wt_hash, | |
| wt_hash5, | |
| wt_hash6, | |
| hash_djbx33, | |
| haproxy_uri_hash, | |
| }; | |
| main(int argc, char **argv) | |
| { | |
| unsigned int i, j, min, max; | |
| unsigned int max_occ, max_ent, acc, buckets; | |
| unsigned long tot; | |
| char *c; | |
| int pref_hash = 0; | |
| double avg; | |
| long double variance, stdd; | |
| unsigned long sum_deviation; | |
| int diff; | |
| avg = 0; | |
| sum_deviation = 0; | |
| variance = 0; stdd = 0; | |
| tot = 0; | |
| max = 0; min = ~0UL; | |
| buckets = 50; | |
| if (argc > 1) { | |
| pref_hash = atoi(argv[1]); | |
| if (pref_hash < 0 || pref_hash >= sizeof(hash_fct)/sizeof(hash_fct[0])) { | |
| printf("Unsupported function number.\n"); | |
| exit(1); | |
| } | |
| } | |
| if (argc > 2) { | |
| buckets = atoi(argv[2]); | |
| if (buckets < 0 || buckets >= MAXBUCKETS) { | |
| printf("Unsupported buckets count.\n"); | |
| exit(1); | |
| } | |
| } | |
| while (fgets(line, sizeof(line), stdin) != NULL) { | |
| tot++; | |
| c = line; | |
| while (*c && *c != '\n') | |
| c++; | |
| *c = 0; | |
| i = hash_fct[pref_hash](line, c - line); | |
| if (argc > 3) | |
| i = full_hash(i); | |
| occ[i % buckets]++; | |
| } | |
| avg = tot/(double)buckets; | |
| for (i = 0; i < buckets; i++) { | |
| diff = occ[i] - avg; | |
| sum_deviation += ((long) diff * (long) diff); | |
| if (occ[i] < min) | |
| min = occ[i]; | |
| if (occ[i] > max) | |
| max = occ[i]; | |
| } | |
| variance = sum_deviation/(double)buckets; | |
| stdd = sqrt(variance); | |
| fprintf(stderr, | |
| "fct tot srv min avg variance stdd max min/max\n"); | |
| fprintf(stderr, | |
| "%d%s %ld %d %d %.2f %.2LF %.2LF %d %.2f\n", | |
| pref_hash, argc > 3 ? "+av" : "", tot, buckets, min, avg, variance, stdd, max, (double)min/(double)max); | |
| if(PRINT_DISTRIBUTION) { | |
| fprintf(stderr, "Distribution: (#entries => #times => #remaining above)\n"); | |
| max_occ = max_ent = 0; | |
| acc = 0; | |
| for (j = 0; j <= max; j++) { | |
| int k = 0; | |
| for (i = 0; i < buckets; i++) { | |
| if (occ[i] == j) | |
| k++; | |
| } | |
| if (k > max_occ) { | |
| max_occ = k; | |
| max_ent = j; | |
| } | |
| acc += k; | |
| if (k) | |
| printf("%d %d %d\n", j, k, buckets - acc); | |
| } | |
| fprintf(stderr, "Most frequent number of entries: %d (%d times)\n", | |
| max_ent, max_occ); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment