Created
November 27, 2010 01:59
-
-
Save nominolo/717475 to your computer and use it in GitHub Desktop.
Preliminary Hs interface to MurmurHash3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
module Murmur3 where | |
import Data.Word ( Word32 ) | |
import Data.Bits ( rotateL, xor, shiftR ) | |
-- Same associativity as the proposed `mappend` operator for Data.Monoid | |
infixr 6 <> | |
-- MurmurHash3 uses 3 state variables, h1, c1, c2 | |
-- some variations also use h1, h2 (i.e., a 64 bit hash) | |
-- MurmurHash3 is currently in beta, so the magic constants may change | |
bmix32 :: Word32 -- ^ Key | |
-> Word32 -- ^ Current Hash | |
-> Word32 -- ^ Current c1 | |
-> Word32 -- ^ Current c2 | |
-> (Word32 -> Word32 -> Word32 -> a) | |
-> a | |
bmix32 (!k1) !h1 !c1 !c2 kont = | |
let !k1' = rotateL (k1 * c1) 11 * c2 | |
!h1' = (h1 `xor` k1') * 3 + 0x52dce729 | |
!c1' = c1 * 5 + 0x7b7d159c | |
!c2' = c2 * 5 + 0x6bce6396 | |
in kont h1' c1' c2' | |
{-# INLINE bmix32 #-} | |
fmix32 :: Word32 -> Word32 | |
fmix32 (!h0) = | |
let !h1 = h0 `xor` (h0 `shiftR` 16) | |
!h2 = h1 * 0x85ebca6b | |
!h3 = h2 `xor` (h2 `shiftR` 13) | |
!h4 = h3 * 0xc2b2ae35 | |
!h5 = h4 `xor` (h4 `shiftR` 16) | |
in h5 | |
{-# INLINE fmix32 #-} | |
type Result = Word32 | |
-- Analogous to the Builder for Binary and Text | |
newtype Hasher = Hasher | |
{ unHasher :: Word32 -> Word32 -> Word32 | |
-> (Word32 -> Word32 -> Word32 -> Result) | |
-> Result } | |
class Hashable a where | |
hashAdd :: a -> Hasher | |
-- The Primitive | |
hashAddWord32 :: Word32 -> Hasher | |
hashAddWord32 k = Hasher (\h1 c1 c2 kont -> bmix32 k h1 c1 c2 kont) | |
{-# INLINE hashAddWord32 #-} | |
(<>) :: Hasher -> Hasher -> Hasher | |
Hasher f1 <> Hasher f2 = | |
Hasher (\h1 c1 c2 kont -> | |
f1 h1 c1 c2 (\h1' c1' c2' -> f2 h1' c1' c2' kont)) | |
{-# INLINE (<>) #-} | |
makeHash :: Word32 -> Hasher -> Word32 | |
makeHash seed (Hasher f) = | |
let !h1 = 0x971e137b `xor` seed | |
!c1 = 0x95543787 | |
!c2 = 0x2ad7eb25 | |
in f h1 c1 c2 (\h _ _ -> fmix32 h) | |
hash :: Hashable a => a -> Word32 | |
hash a = makeHash 0xdeadbeef (hashAdd a) | |
test1 :: Word32 | |
test1 = makeHash 0 (hashAddWord32 1 <> hashAddWord32 2) | |
-- ------------------------------------------------------------------- | |
-- Instances (flavour) | |
instance Hashable Hasher where hashAdd = id -- for convenience | |
instance Hashable () where hashAdd _ = hashAddWord32 1 | |
instance Hashable Int where hashAdd i = hashAddWord32 (fromIntegral i) | |
instance Hashable a => Hashable (Maybe a) where | |
hashAdd Nothing = hashAddWord32 1 | |
hashAdd (Just a) = hashAddWord32 2 <> hashAdd a | |
-- these are hash collisions, but they are at different types | |
tests = and | |
[ hash one == 1636913742 | |
, hash () == 1636913742 | |
, hash (Nothing :: Maybe Int) == 1636913742 | |
, hash (Just one) == 2796854847 | |
, hash (hashAdd two <> hashAdd one) == 2796854847 | |
] | |
where one = 1 :: Int | |
two = 2 :: Int |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment