Skip to content

Instantly share code, notes, and snippets.

@abailly
Created October 21, 2025 21:01
Show Gist options
  • Save abailly/556d326ae3ab2217c07bc196dc3c0591 to your computer and use it in GitHub Desktop.
Save abailly/556d326ae3ab2217c07bc196dc3c0591 to your computer and use it in GitHub Desktop.
A simple bloom filter with some interesting properties
module BloomSpec where
import Data.Bits (setBit, (.&.), (.|.))
import Data.Function ((&))
import Test.Hspec (Spec)
import Test.Hspec.QuickCheck (modifyMaxDiscardRatio, modifyMaxSuccess, prop)
import Test.QuickCheck
( Confidence (..),
Gen,
Property,
checkCoverageWith,
counterexample,
coverTable,
elements,
forAll,
listOf1,
property,
stdConfidence,
suchThat,
tabulate,
vectorOf,
(==>),
)
import Prelude hiding (Word)
spec :: Spec
spec = do
prop "always return true if word in filter" $ prop_alwaysTrueIfInFilter
prop "hash function has consistent collision rate" $ prop_has_consistent_collision_rate
modifyMaxSuccess (const 1) $
modifyMaxDiscardRatio (const 10) $
prop "return true if word not in filter with some probability" $
prop_mayReturnTrueIfNotInFilter
prop_has_consistent_collision_rate :: Property
prop_has_consistent_collision_rate =
forAll anyWord $ \w ->
forAll anyWord $ \w' ->
(w /= w') ==>
let hash = hashWithSalt 1 w `mod` 100
hash' = hashWithSalt 1 w' `mod` 100
in property True
& tabulate "uniqueHashes" [show (hash == hash')]
& coverTable "uniqueHashes" [("True", 1), ("False", 99)]
& checkCoverageWith (stdConfidence {tolerance = 0.8})
prop_mayReturnTrueIfNotInFilter :: Property
prop_mayReturnTrueIfNotInFilter =
forAll dictionary $ \dict ->
let bloom = makeBloomFilter m k dict
n = fromIntegral $ length dict
m = 2 ^ 13
k = 3
in forAll (anyWord `suchThat` (`notElem` dict)) $ \w ->
let result = bloom `contains` w
falsePositiveProbability = 100.0 * (1 - exp (negate (fromIntegral k * n / fromIntegral m))) ** fromIntegral k
in property True
& tabulate "result" [show result]
& coverTable "result" [("True", falsePositiveProbability), ("False", 100 - falsePositiveProbability)]
& counterexample
( "False positive probability: "
++ show falsePositiveProbability
++ "%, m: "
++ show m
++ ", n: "
++ show n
++ ", k: "
++ show k
)
& checkCoverageWith (stdConfidence {tolerance = 0.5})
prop_alwaysTrueIfInFilter :: Property
prop_alwaysTrueIfInFilter =
forAll dictionary $ \dict ->
let bloom = makeBloomFilter m k dict
m = 2 ^ 12
k = 1
in forAll (elements dict) $ \w ->
bloom `contains` w
-- * The Code
contains :: BloomFilter -> Word -> Bool
contains (BloomFilter m k bitset) word =
let wordBits = bitsForWord m k word bitset
in (bitset .&. wordBits) == wordBits
anyWord :: Gen Word
anyWord = fmap Word (listOf1 (elements ['a' .. 'z']))
newtype Word = Word String
deriving (Show, Eq)
data BloomFilter = BloomFilter {m :: Int, k :: Int, bitset :: Integer}
deriving (Show, Eq)
makeBloomFilter :: Int -> Int -> [Word] -> BloomFilter
makeBloomFilter m k = go 0
where
go bitset [] = BloomFilter m k bitset
go bitset (w : ws) =
let wordBits = bitsForWord m k w bitset
newBitset = bitset .|. wordBits
in go newBitset ws
bitsForWord :: Int -> Int -> Word -> Integer -> Integer
bitsForWord m k w bitset =
foldl
( \bs i ->
let h = hashWithSalt i w
bit = h `mod` m
in setBit bs bit
)
bitset
[0 .. k - 1]
hashWithSalt :: Int -> Word -> Int
hashWithSalt salt (Word s) =
foldl (\h c -> h * 31 + fromEnum c + salt) 0 s
dictionary :: Gen [Word]
dictionary = vectorOf 1000 anyWord
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment