Last active
August 29, 2015 13:57
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package bloomcounter | |
import ( | |
"encoding/binary" | |
"bytes" | |
"crypto/md5" | |
) | |
type BloomCounter struct { | |
buckets []uint | |
nhashes int | |
limit int | |
} | |
// constructor | |
func New(buckets int, limit int) *BloomCounter { | |
bf := new(BloomCounter) | |
bf.buckets = make([]uint, buckets) | |
bf.nhashes = 8 | |
bf.limit = limit | |
return bf | |
} | |
// add a new item | |
// returns a boolean, if it's true the word has hit it's limit and should be counted. | |
func (bf *BloomCounter) Add(item string) bool { | |
hashes := get_hashes(item, bf.nhashes, len(bf.buckets)) | |
hitLimit := true | |
for _, hash := range hashes { | |
if bf.buckets[hash] < uint(bf.limit) { | |
bf.buckets[hash]++ | |
hitLimit = false | |
} | |
} | |
return hitLimit | |
} | |
// return a uint32 array of k hashes between 0 and m | |
func get_hashes(item string, k int, m int) []uint32 { | |
hashes := make([]uint32, k) | |
for i := range hashes { | |
a, b := hash(item) | |
hashes[i] = (a + b * uint32(i)) % uint32(m) | |
} | |
return hashes; | |
} | |
// returns two uint32s. one hash two values, sneaky. | |
func hash(item string) (uint32, uint32) { | |
h := md5.New() | |
h.Write([]byte(item)) | |
buf := bytes.NewBuffer(h.Sum(nil)) | |
var hash1 uint32 | |
var hash2 uint32 | |
binary.Read(buf, binary.LittleEndian, &hash1) | |
binary.Read(buf, binary.LittleEndian, &hash2) | |
return hash1, hash2 | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment