Last active
October 6, 2015 08:58
-
-
Save ashenfad/2969087 to your computer and use it in GitHub Desktop.
Reservoir Sampling in Clojure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; For a fully fleshed out library, see: | |
;; https://github.com/bigmlcom/sampling | |
;; ------------------------------------- | |
(ns sample.reservoir | |
"Functions for sampling without replacement using a reservoir.") | |
(defn create | |
"Creates a sample reservoir." | |
[reservoir-size] | |
(with-meta [] {:reservoir-size reservoir-size | |
:insert-count 0})) | |
(defn insert | |
"Inserts a value into the sample reservoir." | |
[reservoir val] | |
(let [{:keys [reservoir-size insert-count]} (meta reservoir) | |
insert-count (inc insert-count) | |
index (rand-int insert-count)] | |
(with-meta (cond (< insert-count reservoir-size) | |
(conj reservoir val) | |
(= insert-count reservoir-size) | |
(shuffle (conj reservoir val)) | |
(< index reservoir-size) | |
(assoc reservoir index val) | |
:else reservoir) | |
{:reservoir-size reservoir-size | |
:insert-count insert-count}))) | |
(defn sample | |
"Returns a reservoir sample for a collection." | |
[coll reservoir-size] | |
(reduce insert (create reservoir-size) | |
coll)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment