Last active
April 25, 2019 13:16
-
-
Save masaeedu/cabfe7666121baa158cd5d10849d6028 to your computer and use it in GitHub Desktop.
Reservoir sampling in Haskell
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {-# LANGUAGE FlexibleContexts #-} | |
| {-# LANGUAGE ScopedTypeVariables #-} | |
| {-# LANGUAGE ViewPatterns #-} | |
| {-# LANGUAGE GeneralizedNewtypeDeriving #-} | |
| {-# LANGUAGE FlexibleInstances #-} | |
| {-# LANGUAGE MultiParamTypeClasses #-} | |
| module Main where | |
| import Prelude as P | |
| import Data.Bool | |
| import Data.Maybe | |
| import Data.Ratio | |
| import Data.Foldable as F | |
| import Data.Monoid | |
| import Data.IORef | |
| import Data.List.NonEmpty | |
| import Control.Monad.State.Class | |
| import Control.Monad.State.Lazy | |
| import Control.Monad.Reader.Class | |
| import Control.Monad.Reader | |
| import Control.Monad as M | |
| import Control.Foldl as FL | |
| import System.Random | |
| roll :: (Random a, RandomGen g, MonadState g m) => a -> a -> m a | |
| roll lo hi = state $ randomR (lo, hi) | |
| withIndex :: [a] -> [(Int, a)] | |
| withIndex = P.zipWith (,) [0..] | |
| shouldSelect :: (RandomGen g, MonadState g m) => Int -> m Bool | |
| shouldSelect i = do | |
| (p :: Float) <- roll 0 1 | |
| pure $ p < 1 / (fromIntegral $ i + 1) | |
| sample :: forall g m a. (RandomGen g, MonadState g m) => NonEmpty a -> m a | |
| sample (x :| xs) = FL.foldM redex (withIndex xs) | |
| where | |
| redex :: FoldM m (Int, a) a | |
| redex = FoldM (\p (i, c) -> bool p c <$> shouldSelect i) (pure x) (pure) | |
| -- Here's an equivalent definition of sample that uses the built in | |
| -- Control.Monad.foldM. Unfortunately this leaks memory | |
| -- sample :: forall g m a. (RandomGen g, MonadState g m) => NonEmpty a -> m a | |
| -- sample (x :| xs) = M.foldM redex x (withIndex xs) | |
| -- where | |
| -- redex :: (a -> (Int, a) -> m a) | |
| -- redex p (i, c) = bool p c <$> shouldSelect i | |
| -- While we could just use StateT, just for fun, we're going to make our own instance of | |
| -- MonadState that updates a mutable reference in an IO context | |
| newtype RStateT s m a = RStateT { runRStateT :: ReaderT (IORef s) m a } deriving (Functor, Applicative, Monad, MonadReader (IORef s), MonadIO) | |
| instance MonadIO m => MonadState s (RStateT s m) where | |
| get = ask >>= liftIO . readIORef | |
| put s = ask >>= liftIO . flip writeIORef s | |
| state f = do | |
| r <- ask | |
| s <- liftIO $ readIORef r | |
| let (a, s') = f s | |
| liftIO $ writeIORef r s' | |
| pure a | |
| exampleStream :: NonEmpty Int | |
| exampleStream = 1 :| [2..10000000] | |
| main :: IO () | |
| main = do | |
| -- Create a new random seed | |
| g <- newStdGen | |
| -- Sample the stream | |
| ior <- newIORef g | |
| i <- flip runReaderT ior . runRStateT $ sample exampleStream | |
| -- i <- return $ flip evalState g $ sample exampleStream -- we could also use the vanilla State monad | |
| -- Print the selected value | |
| print i |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment