Skip to content

Instantly share code, notes, and snippets.

@moloch--
Last active July 4, 2020 15:17
Show Gist options
  • Save moloch--/c8408978a3a2ead79dc6496f39e3cc8f to your computer and use it in GitHub Desktop.
Save moloch--/c8408978a3a2ead79dc6496f39e3cc8f to your computer and use it in GitHub Desktop.
Bloom Filter Benchmark
module github.com/moloch--/bloom-test
go 1.14
require (
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/willf/bitset v1.1.10 // indirect
github.com/willf/bloom v2.0.3+incompatible
)
package main
import (
"crypto/sha256"
"fmt"
"os"
"sort"
"sync"
"text/tabwriter"
"time"
"github.com/willf/bloom"
)
const (
kb = 1024
mb = kb * 1024
gb = mb * 1024
)
// Result of the benchmark
type Result struct {
FilterSize int
FilterHashes int
FirstCollision int
}
// Config of bloom filter
type Config struct {
FilterSize int
FilterHashes int
}
// Worker thread
type Worker struct {
Wg *sync.WaitGroup
Results []*Result
Queue chan *Config
Done chan bool
}
func (w *Worker) start() {
for {
select {
case conf := <-w.Queue:
collision := findFirstCollision(conf.FilterSize, conf.FilterHashes)
w.Results = append(w.Results, &Result{
FilterSize: conf.FilterSize,
FilterHashes: conf.FilterHashes,
FirstCollision: collision,
})
case <-w.Done:
w.Wg.Done()
}
}
}
func main() {
// [Config Options] *********************
maxWorkers := 1
filterSizes := []int{16 * gb}
startHashes := 8
maxHashes := 12
// **************************************
totalTests := len(filterSizes) * (maxHashes + 1 - startHashes)
queue := make(chan *Config)
wg := &sync.WaitGroup{}
workers := []*Worker{}
for id := 0; id < maxWorkers; id++ {
worker := &Worker{
Results: []*Result{},
Wg: wg,
Queue: queue,
Done: make(chan bool),
}
workers = append(workers, worker)
wg.Add(1)
go worker.start()
}
joined := make(chan bool)
go func() {
for {
select {
case <-time.After(time.Second):
resultsSoFar := 0
for _, worker := range workers {
resultsSoFar += len(worker.Results)
}
fmt.Printf("\u001b[2K\r %d of %d results ...", resultsSoFar, totalTests)
case <-joined:
fmt.Printf("\u001b[2K\r")
joined <- true
return
}
}
}()
for _, filterSize := range filterSizes {
for filterHashes := startHashes; filterHashes <= maxHashes; filterHashes++ {
queue <- &Config{FilterSize: filterSize, FilterHashes: filterHashes}
}
}
for _, worker := range workers {
worker.Done <- true
}
wg.Wait()
joined <- true
<-joined
results := []*Result{}
fmt.Printf("\u001b[2K\rSorting %d results ...", len(results))
for _, worker := range workers {
results = append(results, worker.Results...)
fmt.Printf("\u001b[2K\rSorting %d results ...", len(results))
}
sort.Slice(results, func(i, j int) bool {
if results[i].FilterSize > results[j].FilterSize {
return false
}
if results[i].FilterSize < results[j].FilterSize {
return true
}
return results[i].FilterHashes < results[j].FilterHashes
})
fmt.Printf("\u001b[2K\rSorting %d results ... done!\n", len(results))
w := new(tabwriter.Writer)
w.Init(os.Stdout, 1, 4, 2, ' ', 0)
fmt.Fprintln(w, "Size (MBs)\tHashes\tFirst Collision")
fmt.Fprintln(w, "==========\t======\t===============")
for _, result := range results {
fmt.Fprintf(w, "%d\t%d\t%d\n", result.FilterSize/mb, result.FilterHashes, result.FirstCollision)
}
fmt.Fprintln(w)
w.Flush()
}
func findFirstCollision(filterSize, filterHashes int) int {
bloomFilter := bloom.New(uint(filterSize), uint(filterHashes))
hash := sha256.New()
hash.Write([]byte{'a', 'b', 'c'})
rounds := 1
for {
hexDigest := fmt.Sprintf("%x", hash.Sum(nil))
exists := bloomFilter.TestAndAddString(hexDigest)
if exists {
break
}
hash.Write(hash.Sum(nil))
rounds++
}
return rounds
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment