Created
February 8, 2011 00:46
-
-
Save tildedave/815618 to your computer and use it in GitHub Desktop.
cuckoo hashing in haskell ala http://programmingpraxis.com/2011/02/01/cuckoo-hashing/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module CuckooHash | |
where | |
import Data.Array | |
import Data.Maybe | |
data Cell a b = EmptyCell | Cell a b | |
deriving Show | |
type HashArray a b = Array Int (Cell a b) | |
data Hash a b = | |
Hash | |
[a -> Int] -- hash provider | |
(a -> Int) -- hash fn1 | |
(a -> Int) -- hash fn2 | |
(HashArray a b) | |
isEmpty EmptyCell = True | |
isEmpty _ = False | |
cellHasKey :: (Eq a) => a -> Cell a b -> Bool | |
cellHasKey k (Cell a b) = (k == a) | |
cellHasKey k (EmptyCell) = False | |
cellGetValue (Cell a b) = b | |
cellToPair (Cell a b) = (a,b) | |
hashArray (Hash _ _ _ arr) = arr | |
updateHashArray (Hash hp fn1 fn2 arr) arr' = (Hash hp fn1 fn2 arr') | |
instance (Show a, Show b) => Show (Hash a b) where | |
show (Hash fp f1 f2 arr) = show arr | |
emptyHash n prov fn1 fn2 = Hash prov fn1 fn2 (array (1,n) [(i,EmptyCell) | i <- [1..n]]) | |
emptyHashFromProv n fp = | |
let [new_fn1,new_fn2] = take 2 fp | |
rest_prov = drop 2 fp | |
in | |
emptyHash n rest_prov new_fn1 new_fn2 | |
-- hashSize has inefficient implementation | |
hashSize (Hash _ _ _ arr) = length $ elems arr | |
hashAsList :: (Hash a b) -> [(a,b)] | |
hashAsList h = | |
map cellToPair $ filter (not . isEmpty) $ elems (hashArray h) | |
hashCycle :: (Hash a b) -> (Hash a b) | |
hashCycle h@(Hash fp fn1 fn2 arr) = | |
let n = hashSize h | |
in | |
emptyHashFromProv n fp | |
hashInsert h k v = hashInsertHelper h k v [] | |
-- generate insertion operations; if infinite loop detected raise an exception | |
hashInsertHelper :: (Eq a, Eq b) => (Hash a b) -> a -> b -> [(a,b)] -> (Hash a b) | |
hashInsertHelper h@(Hash fp fn1 fn2 arr) k v soFar | |
| (isEmpty $ (!) arr (fn1 k)) = updateHashArray h (arr//[(fn1 k, Cell k v)]) | |
| (isEmpty $ (!) arr (fn2 k)) = updateHashArray h (arr//[(fn2 k, Cell k v)]) | |
| otherwise = let Cell dk dv = (!) arr (fn1 k) | |
displacedHash = updateHashArray h (arr //[(fn1 k, Cell k v)]) | |
in | |
if (elem (k,v) soFar) then | |
let all_kv_pairs = (k,v):(hashAsList h) | |
eh = hashCycle h | |
in | |
-- insert all values of h into an empty hash with new hash | |
-- functions new_fn1, new_fn2 and updated provider | |
foldr (\(k,v) -> \h -> hashInsert h k v) eh all_kv_pairs | |
else | |
hashInsertHelper displacedHash dk dv ((k,v) : soFar) | |
hashLookup :: (Eq a, Eq b) => (Hash a b) -> a -> Maybe b | |
hashLookup h@(Hash fp fn1 fn2 arr) k | |
| (cellHasKey k fn1cell) = Just $ cellGetValue fn1cell | |
| (cellHasKey k fn2cell) = Just $ cellGetValue fn2cell | |
| otherwise = Nothing | |
where | |
fn1cell = (!) arr (fn1 k) | |
fn2cell = (!) arr (fn2 k) | |
hashRemove :: (Eq a, Eq b) => (Hash a b) -> a -> (Hash a b) | |
hashRemove h@(Hash fp fn1 fn2 arr) k | |
| (cellHasKey k fn1cell) = updateHashArray h (arr//[(fn1 k, EmptyCell)]) | |
| (cellHasKey k fn2cell) = updateHashArray h (arr//[(fn2 k, EmptyCell)]) | |
| otherwise = h | |
where | |
fn1cell = (!) arr (fn1 k) | |
fn2cell = (!) arr (fn2 k) | |
module TestCuckoo | |
where | |
import CuckooHash | |
import Data.Char | |
import Data.List | |
import Data.Array | |
import Data.Maybe | |
import Test.QuickCheck | |
-- "TESTING" CODE | |
coprimes n = filter (\(a,b) -> a `mod` b == 1) [ (a,b) | a <- [ 2..n], b <- [2..n]] | |
primes = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199 ] | |
-- Universal Hash: http://en.wikipedia.org/wiki/Universal_hashing | |
decentHashFamily n = [ \x -> ((a * x + b) `mod` p) `mod` n | (a,b) <- (coprimes n), p <- primes] | |
-- Weakness here: two strings with same character value have same pre-hash number | |
stringToInteger :: String -> Int | |
stringToInteger s = foldr (+) 0 (map ord s) | |
stringHashFamily n = [ \s -> f (stringToInteger s) | f <- (decentHashFamily n)] | |
-- Random strings (needed to redefine b/c of QuickCheck 2.4) | |
instance Arbitrary Char where | |
arbitrary = choose ('\32', '\128') | |
coarbitrary c = variant (ord c `rem` 4) | |
-- Make a hash | |
stringListToHash :: [String] -> ([String], Hash String String) | |
stringListToHash sl = | |
let l = filter (\s -> length s > 0) $ nub sl | |
h = emptyHashFromProv 128 $ stringHashFamily 128 | |
in | |
(l, foldr (\k -> \h -> hashInsert h k k) h l) | |
testInsertion :: [String] -> Bool | |
testInsertion sl = | |
let (l, constructedHash) = stringListToHash sl | |
in | |
-- assert each insertion is lookup-able the hash | |
all (\k -> isJust $ hashLookup constructedHash k) l | |
testRemoval :: [String] -> Bool | |
testRemoval sl = | |
let (l, constructedHash) = stringListToHash sl | |
in | |
-- assert that removing an element results in no lookup | |
all (\k -> isNothing $ (hashLookup (hashRemove constructedHash k) k)) l | |
main = do | |
quickCheck testInsertion | |
quickCheck testRemoval |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment