Skip to content

Instantly share code, notes, and snippets.

@jgreco
Created November 13, 2018 00:14
Show Gist options
  • Save jgreco/69f063f3e1b058fdf867c231c68bb47e to your computer and use it in GitHub Desktop.
Save jgreco/69f063f3e1b058fdf867c231c68bb47e to your computer and use it in GitHub Desktop.
#lang racket
(require "murmur3.rkt")
(require data/bit-vector)
(require racket/serialize)
(provide make-bloom-filter make-recommended-bloom-filter bloom-filter-recommender bloom-filter-member? bloom-filter-batch-member? bloom-filter-add! false-positive-rate )
(define (murmur-int x seed) (murmur-hash (integer->integer-bytes x 8 false false) seed))
(define (h1 x) (murmur-int x 583))
(define (h2 x) (murmur-int x 2387))
(define (kmo-hashes x k)
(define (kmo-hash x i)
(+ (h1 x) (* i (h2 x))))
(for/list ([i (in-range 1 (add1 k))])
(kmo-hash x i)))
(serializable-struct bloom-filter (bv k m [n #:mutable]))
(define (false-positive-rate . args)
(define (fp-rate k m n)
(expt (- 1 (exp (- (/ (* k n)
m)))) k))
(match args
[(bloom-filter _ k m n) (fp-rate k m n)]
[(list k m n) (fp-rate k m n)]))
(define (bloom-filter-recommender n tolerance)
(define (optimal-m n tolerance) (exact-ceiling (- (/ (* n (log tolerance)) (expt (log 2) 2)))))
(define (optimal-k n m) (exact-ceiling (* (/ m n) (log 2))))
(let* ([m (optimal-m n tolerance)]
[k (optimal-k n m)])
(list k m)))
(define (make-bloom-filter k m)
(bloom-filter (make-bit-vector m) k m 0))
(define (make-recommended-bloom-filter n tolerance)
(apply make-bloom-filter (bloom-filter-recommender (max n 10) tolerance)))
(define/match (bloom-filter-add! bf x)
[((bloom-filter bv k m n) x)
(for ([i (map (λ (h) (modulo h m))
(kmo-hashes x k))])
(bit-vector-set! bv i true))
(set-bloom-filter-n! bf (add1 n))])
(define/match (bloom-filter-member? bf x)
[((bloom-filter bv k m n) x)
(for/and ([i (map (λ (h) (modulo h m))
(kmo-hashes x k))])
(bit-vector-ref bv i))])
(define (bloom-filter-batch-member? bfs-assoc x)
(define max-k (foldl (λ (x mx) (max (bloom-filter-k (cdr x)) mx)) 0 bfs-assoc))
(define hashes (kmo-hashes x max-k))
(map
car
(filter
(match-lambda
[(cons label (bloom-filter bv k m n))
(for/and ([i (map (λ (h) (modulo h m))
(take hashes k))])
(bit-vector-ref bv i))])
bfs-assoc)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment