Last active
July 4, 2020 15:17
-
-
Save moloch--/c8408978a3a2ead79dc6496f39e3cc8f to your computer and use it in GitHub Desktop.
Bloom Filter Benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module github.com/moloch--/bloom-test | |
go 1.14 | |
require ( | |
github.com/spaolacci/murmur3 v1.1.0 // indirect | |
github.com/willf/bitset v1.1.10 // indirect | |
github.com/willf/bloom v2.0.3+incompatible | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"crypto/sha256" | |
"fmt" | |
"os" | |
"sort" | |
"sync" | |
"text/tabwriter" | |
"time" | |
"github.com/willf/bloom" | |
) | |
const ( | |
kb = 1024 | |
mb = kb * 1024 | |
gb = mb * 1024 | |
) | |
// Result of the benchmark | |
type Result struct { | |
FilterSize int | |
FilterHashes int | |
FirstCollision int | |
} | |
// Config of bloom filter | |
type Config struct { | |
FilterSize int | |
FilterHashes int | |
} | |
// Worker thread | |
type Worker struct { | |
Wg *sync.WaitGroup | |
Results []*Result | |
Queue chan *Config | |
Done chan bool | |
} | |
func (w *Worker) start() { | |
for { | |
select { | |
case conf := <-w.Queue: | |
collision := findFirstCollision(conf.FilterSize, conf.FilterHashes) | |
w.Results = append(w.Results, &Result{ | |
FilterSize: conf.FilterSize, | |
FilterHashes: conf.FilterHashes, | |
FirstCollision: collision, | |
}) | |
case <-w.Done: | |
w.Wg.Done() | |
} | |
} | |
} | |
func main() { | |
// [Config Options] ********************* | |
maxWorkers := 1 | |
filterSizes := []int{16 * gb} | |
startHashes := 8 | |
maxHashes := 12 | |
// ************************************** | |
totalTests := len(filterSizes) * (maxHashes + 1 - startHashes) | |
queue := make(chan *Config) | |
wg := &sync.WaitGroup{} | |
workers := []*Worker{} | |
for id := 0; id < maxWorkers; id++ { | |
worker := &Worker{ | |
Results: []*Result{}, | |
Wg: wg, | |
Queue: queue, | |
Done: make(chan bool), | |
} | |
workers = append(workers, worker) | |
wg.Add(1) | |
go worker.start() | |
} | |
joined := make(chan bool) | |
go func() { | |
for { | |
select { | |
case <-time.After(time.Second): | |
resultsSoFar := 0 | |
for _, worker := range workers { | |
resultsSoFar += len(worker.Results) | |
} | |
fmt.Printf("\u001b[2K\r %d of %d results ...", resultsSoFar, totalTests) | |
case <-joined: | |
fmt.Printf("\u001b[2K\r") | |
joined <- true | |
return | |
} | |
} | |
}() | |
for _, filterSize := range filterSizes { | |
for filterHashes := startHashes; filterHashes <= maxHashes; filterHashes++ { | |
queue <- &Config{FilterSize: filterSize, FilterHashes: filterHashes} | |
} | |
} | |
for _, worker := range workers { | |
worker.Done <- true | |
} | |
wg.Wait() | |
joined <- true | |
<-joined | |
results := []*Result{} | |
fmt.Printf("\u001b[2K\rSorting %d results ...", len(results)) | |
for _, worker := range workers { | |
results = append(results, worker.Results...) | |
fmt.Printf("\u001b[2K\rSorting %d results ...", len(results)) | |
} | |
sort.Slice(results, func(i, j int) bool { | |
if results[i].FilterSize > results[j].FilterSize { | |
return false | |
} | |
if results[i].FilterSize < results[j].FilterSize { | |
return true | |
} | |
return results[i].FilterHashes < results[j].FilterHashes | |
}) | |
fmt.Printf("\u001b[2K\rSorting %d results ... done!\n", len(results)) | |
w := new(tabwriter.Writer) | |
w.Init(os.Stdout, 1, 4, 2, ' ', 0) | |
fmt.Fprintln(w, "Size (MBs)\tHashes\tFirst Collision") | |
fmt.Fprintln(w, "==========\t======\t===============") | |
for _, result := range results { | |
fmt.Fprintf(w, "%d\t%d\t%d\n", result.FilterSize/mb, result.FilterHashes, result.FirstCollision) | |
} | |
fmt.Fprintln(w) | |
w.Flush() | |
} | |
func findFirstCollision(filterSize, filterHashes int) int { | |
bloomFilter := bloom.New(uint(filterSize), uint(filterHashes)) | |
hash := sha256.New() | |
hash.Write([]byte{'a', 'b', 'c'}) | |
rounds := 1 | |
for { | |
hexDigest := fmt.Sprintf("%x", hash.Sum(nil)) | |
exists := bloomFilter.TestAndAddString(hexDigest) | |
if exists { | |
break | |
} | |
hash.Write(hash.Sum(nil)) | |
rounds++ | |
} | |
return rounds | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment