Skip to content

Instantly share code, notes, and snippets.

@masaeedu
Last active April 25, 2019 13:16
Show Gist options
  • Select an option

  • Save masaeedu/cabfe7666121baa158cd5d10849d6028 to your computer and use it in GitHub Desktop.

Select an option

Save masaeedu/cabfe7666121baa158cd5d10849d6028 to your computer and use it in GitHub Desktop.
Reservoir sampling in Haskell
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Main where
import Prelude as P
import Data.Bool
import Data.Maybe
import Data.Ratio
import Data.Foldable as F
import Data.Monoid
import Data.IORef
import Data.List.NonEmpty
import Control.Monad.State.Class
import Control.Monad.State.Lazy
import Control.Monad.Reader.Class
import Control.Monad.Reader
import Control.Monad as M
import Control.Foldl as FL
import System.Random
roll :: (Random a, RandomGen g, MonadState g m) => a -> a -> m a
roll lo hi = state $ randomR (lo, hi)
withIndex :: [a] -> [(Int, a)]
withIndex = P.zipWith (,) [0..]
shouldSelect :: (RandomGen g, MonadState g m) => Int -> m Bool
shouldSelect i = do
(p :: Float) <- roll 0 1
pure $ p < 1 / (fromIntegral $ i + 1)
sample :: forall g m a. (RandomGen g, MonadState g m) => NonEmpty a -> m a
sample (x :| xs) = FL.foldM redex (withIndex xs)
where
redex :: FoldM m (Int, a) a
redex = FoldM (\p (i, c) -> bool p c <$> shouldSelect i) (pure x) (pure)
-- Here's an equivalent definition of sample that uses the built in
-- Control.Monad.foldM. Unfortunately this leaks memory
-- sample :: forall g m a. (RandomGen g, MonadState g m) => NonEmpty a -> m a
-- sample (x :| xs) = M.foldM redex x (withIndex xs)
-- where
-- redex :: (a -> (Int, a) -> m a)
-- redex p (i, c) = bool p c <$> shouldSelect i
-- While we could just use StateT, just for fun, we're going to make our own instance of
-- MonadState that updates a mutable reference in an IO context
newtype RStateT s m a = RStateT { runRStateT :: ReaderT (IORef s) m a } deriving (Functor, Applicative, Monad, MonadReader (IORef s), MonadIO)
instance MonadIO m => MonadState s (RStateT s m) where
get = ask >>= liftIO . readIORef
put s = ask >>= liftIO . flip writeIORef s
state f = do
r <- ask
s <- liftIO $ readIORef r
let (a, s') = f s
liftIO $ writeIORef r s'
pure a
exampleStream :: NonEmpty Int
exampleStream = 1 :| [2..10000000]
main :: IO ()
main = do
-- Create a new random seed
g <- newStdGen
-- Sample the stream
ior <- newIORef g
i <- flip runReaderT ior . runRStateT $ sample exampleStream
-- i <- return $ flip evalState g $ sample exampleStream -- we could also use the vanilla State monad
-- Print the selected value
print i
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment