Last active
August 29, 2015 14:27
-
-
Save paulmach/74bc0a2df4b71bd8d0da to your computer and use it in GitHub Desktop.
Check uniformity of consistent hash using crc32
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"log" | |
"math/rand" | |
"time" | |
"github.com/strava/go.serversets/fixedset" | |
"github.com/strava/go.serversets/mcset" | |
) | |
func init() { | |
rand.Seed(time.Now().UnixNano()) | |
} | |
func main() { | |
watch := fixedset.New([]string{"10.0.16.69:11211", "10.0.22.188:11211", "10.0.26.214:11211"}) | |
cluster := mcset.New(watch) | |
log.Printf("cluster endpoints: %v", cluster.Endpoints()) | |
////////////////////////////////// | |
picks := make(map[string]int) | |
for i := 0; i < 100000; i++ { | |
addr, _ := cluster.PickServer(RandStringBytes(100)) | |
picks[addr.String()]++ | |
} | |
log.Printf("mcset results") | |
log.Printf("\t%d\t%v", picks["10.0.16.69:11211"], "10.0.16.69:11211") | |
log.Printf("\t%d\t%v", picks["10.0.22.188:11211"], "10.0.22.188:11211") | |
log.Printf("\t%d\t%v", picks["10.0.26.214:11211"], "10.0.26.214:11211") | |
} | |
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ12345/" | |
func RandStringBytes(n int) string { | |
b := make([]byte, n) | |
for i := range b { | |
b[i] = letterBytes[rand.Intn(len(letterBytes))] | |
} | |
return string(b) | |
} |
See this article:
http://research.neustar.biz/2012/02/02/choosing-a-good-hash-function-part-3/
I believe we ended up going with Murmur3.
Code sample from wikipedia:
uint32_t murmur3_32(const char *key, uint32_t len, uint32_t seed) {
static const uint32_t c1 = 0xcc9e2d51;
static const uint32_t c2 = 0x1b873593;
static const uint32_t r1 = 15;
static const uint32_t r2 = 13;
static const uint32_t m = 5;
static const uint32_t n = 0xe6546b64;
uint32_t hash = seed;
const int nblocks = len / 4;
const uint32_t *blocks = (const uint32_t *) key;
int i;
for (i = 0; i < nblocks; i++) {
uint32_t k = blocks[i];
k *= c1;
k = (k << r1) | (k >> (32 - r1));
k *= c2;
hash ^= k;
hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
}
const uint8_t *tail = (const uint8_t *) (key + nblocks * 4);
uint32_t k1 = 0;
switch (len & 3) {
case 3:
k1 ^= tail[2] << 16;
case 2:
k1 ^= tail[1] << 8;
case 1:
k1 ^= tail[0];
k1 *= c1;
k1 = (k1 << r1) | (k1 >> (32 - r1));
k1 *= c2;
hash ^= k1;
}
hash ^= len;
hash ^= (hash >> 16);
hash *= 0x85ebca6b;
hash ^= (hash >> 13);
hash *= 0xc2b2ae35;
hash ^= (hash >> 16);
return hash;
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I ran into this exact non-uniformity when I implemented our assignment randomizer for the experiment reporting framework.