Created
June 3, 2013 19:07
-
-
Save mathias-brandewinder/5700496 to your computer and use it in GitHub Desktop.
Sampling from items with known proportions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
// Sample with replacement | |
let replaceSampler (rng: Random) (counts: int[]) = | |
let N = counts |> Array.sum | |
let draw = rng.Next(N) | |
let rec find index cumul = | |
let n = counts.[index] | |
if draw < (cumul + n) | |
then index | |
else find (index + 1) (cumul + n) | |
find 0 0 | |
// Sample without replacement | |
let sampler (rng:Random) (counts:int[]) = | |
let counts = Array.copy counts | |
let N = counts |> Array.sum | |
let draw = rng.Next(N) | |
let rec find index cumul = | |
let n = counts.[index] | |
if draw < (cumul + n) | |
then | |
let updatedCounts = counts | |
updatedCounts.[index] <- counts.[index] - 1 | |
(index, updatedCounts) | |
else find (index + 1) (cumul + n) | |
find 0 0 | |
// Usage / example | |
let rng = Random() | |
let replace = replaceSampler rng | |
let counts = [| 10; 40; 50 |] | |
// Sequence should contain | |
// roughly 10%, 40%, 50% of 0, 1, 2 | |
[ for i in 1 .. 100000 -> replace counts ] | |
|> Seq.countBy id | |
|> Seq.toList | |
let noReplace = sampler rng | |
let sample = | |
Seq.unfold (fun (counts:int[]) -> | |
let (i, updatedCounts) = noReplace counts | |
Some(i, updatedCounts)) | |
sample counts |> Seq.take 50 |> Seq.countBy id |> Seq.toList | |
sample counts |> Seq.take 100 |> Seq.countBy id |> Seq.toList |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment