Created
December 5, 2016 13:32
-
-
Save advincze/8f59910af98b1227f4fc0569e5f8d6d8 to your computer and use it in GitHub Desktop.
compare the bias of pearson and fnv hashes on bytes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"hash/fnv" | |
"testing" | |
"math/rand" | |
"time" | |
"github.com/montanaflynn/stats" | |
) | |
func TestHashes(t *testing.T) { | |
t.Run("fnv", testHash(fnvhash, 1e6, 32)) | |
t.Run("pearson", testHash(func(msg []byte) []byte { | |
return pearson(msg, scramble([]byte("foobar")), 4) | |
}, 1e6, 32)) | |
} | |
func testHash(fn func([]byte) []byte, iter int, bytecount int) func(t *testing.T) { | |
return func(t *testing.T) { | |
rand.Seed(42) | |
hist := make([]map[int]int, 4) | |
for i := 0; i < 4; i++ { | |
hist[i] = map[int]int{} | |
} | |
msg := make([]byte, bytecount) | |
func() { | |
defer func(s time.Time) { fmt.Println("took", time.Since(s)) }(time.Now()) | |
for i := 0; i < iter; i++ { | |
rand.Read(msg) | |
res := fn(msg) | |
for j := 0; j < 4; j++ { | |
hist[j][int(res[j])]++ | |
} | |
} | |
}() | |
for i := 0; i < 4; i++ { | |
h := stats.LoadRawData(hist[i]) | |
m, _ := stats.Mean(h) | |
stddev, _ := stats.StandardDeviation(h) | |
fmt.Println(m, stddev, "=", stddev*100.0/m, "%") | |
} | |
} | |
} | |
func fnvhash(msg []byte) []byte { | |
h := fnv.New32() | |
h.Write(msg) | |
res := h.Sum32() | |
out := make([]byte, 4) | |
for i := 0; i < 4; i++ { | |
out[i] = byte(res >> uint(i*8) % 256) | |
} | |
return out | |
} | |
func pearson(msg, iv []byte, bytecount int) []byte { | |
var h byte | |
hh := make([]byte, bytecount) | |
for j := 0; j < bytecount; j++ { | |
h = iv[(int(msg[0])+j)%256] | |
for i := 1; i < len(msg); i++ { | |
h = iv[h^msg[i]] | |
} | |
hh[j] = h | |
} | |
return hh | |
} | |
func scramble(seed []byte) []byte { | |
ss := make([]byte, 256) | |
for i := 0; i < 256; i++ { | |
ss[i] = byte(i) | |
} | |
j := 0 | |
for i := 0; i < 256; i++ { | |
j = (j + int(ss[i]) + int(seed[i%len(seed)])) % 256 | |
ss[i], ss[j] = ss[j], ss[i] | |
} | |
return ss | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment