Created
March 3, 2014 19:03
-
-
Save Alligator/9332216 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package bloomfilter | |
import ( | |
"crypto/md5" | |
"encoding/binary" | |
"bytes" | |
) | |
type BloomFilter struct { | |
buckets []bool | |
nhashes int | |
} | |
// this is a constructor type thing i guess | |
func New(buckets int) *BloomFilter { | |
bf := new(BloomFilter) | |
bf.buckets = make([]bool, buckets) | |
bf.nhashes = 8 | |
return bf | |
} | |
// add a new item | |
func (bf *BloomFilter) Add(item string) { | |
hashes := get_hashes(item, bf.nhashes, len(bf.buckets)) | |
for _, hash := range hashes { | |
bf.buckets[hash] = true | |
} | |
} | |
// check if we've seem it before | |
func (bf *BloomFilter) Has(item string) bool { | |
hashes := get_hashes(item, bf.nhashes, len(bf.buckets)) | |
for _, hash := range hashes { | |
if bf.buckets[hash] == false { | |
return false; | |
} | |
} | |
return true; | |
} | |
// return a uint32 array of k hashes between 0 and m | |
func get_hashes(item string, k int, m int) []uint32 { | |
hashes := make([]uint32, k) | |
for i := range hashes { | |
a, b := hash(item) | |
hashes[i] = (a + b * uint32(i)) % uint32(m) | |
} | |
return hashes; | |
} | |
// returns two uint32s. one hash two values, sneaky. | |
func hash(item string) (uint32, uint32) { | |
h := md5.New() | |
h.Write([]byte(item)) | |
buf := bytes.NewBuffer(h.Sum(nil)) | |
var hash1 uint32 | |
var hash2 uint32 | |
binary.Read(buf, binary.LittleEndian, &hash1) | |
binary.Read(buf, binary.LittleEndian, &hash2) | |
return hash1, hash2 | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"strings" | |
"bloomfilter" | |
) | |
func main() { | |
var line = "hello dave how are you mate dave" | |
words := strings.Split(line, " ") | |
m := 512 // no. of buckets | |
bf := bloomfilter.New(m) | |
fmt.Println(words) | |
// skipping some words for testing | |
for _, word := range words[0:3] { | |
bf.Add(word) | |
} | |
for _, word := range words { | |
fmt.Printf("%s \t %t\n", word, bf.Has(word)) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment