Last active
November 12, 2018 00:01
-
-
Save rxg/d16a3104c5b6902ae3db4a34480aef24 to your computer and use it in GitHub Desktop.
Bayesian Inference example: Urn with Replacement
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
;; urn.rkt: | |
;; 5-ball urn statistical process: | |
;; - A Sampling Engine | |
;; - An Inference Engine | |
;; - Some Viz/Rendering tools (under construction) | |
(require math/distributions) | |
(require pict) | |
(require plot) | |
;; Color is 'blue | 'white | |
;; | |
;; Stocastic Simulator | |
;; | |
(define URN-SIZE 5) | |
;; UrnBlues is [0,URN-SIZE] | |
;; - number of blue balls in an urn of 5 balls | |
;; Take 1: do it all by hand | |
;; UrnBlues [0,URN-SIZE) -> Color | |
;; What is the color of the idxth ball (assuming blue ones are first)? | |
(define (get-ball-color urn-blues idx) | |
(if (< idx urn-blues) | |
'blue | |
'white)) | |
;; UrnBlues -> Color | |
;; sampling with replacement from the urn | |
(define (draw-sample urn-blues) | |
(let ([index (random URN-SIZE)]) | |
(get-ball-color urn-blues index))) | |
;; UrnBlues n:Natural -> { l ∈ (listof Color) | (length l) = n } | |
(define (draw-samples urn-blues n) | |
(map (λ (_) (draw-sample urn-blues)) (range n))) | |
;; Take 2: exploit Racket's discrete distribution mechanism! | |
;; UrnBlues n:Natural -> { l ∈ (listof Color) | (length l) = n } | |
(define (draw-urn-samples urn-blues n) | |
(let ([dist (discrete-dist (list 'blue 'white) | |
(list urn-blues | |
(- URN-SIZE urn-blues)))]) | |
(sample dist n))) | |
;; Urn Natural -> Rational | |
;; produce the blue rate of sampling from an urn count times | |
;; Use this function to sanity-check that drawing samples is well-behaved | |
(define (rate urn count) | |
(/ (length (filter (λ (c) (equal? c 'blue)) | |
(draw-samples urn count))) | |
count)) | |
;; | |
;; Inference Engine -- Given a list of urn samples, predict how many | |
;; blue balls are in the urn | |
;; | |
;; Random Variables: | |
;; C=c - the sampled ball is 'white or 'blue | |
;; B=i - the urn has i blue balls out of 5 | |
;; Cs='(c1 ...) - list of sampled ball colors in order are '(c1 ...) | |
;; (well...technically this is a Nat-indexed set of random variables | |
;; Cs_i, each representing the results of the first *i* samples) | |
;; The inference engine incrementally updates the model's *beliefs* | |
;; P(B=i|Cs='(c1 ...)) | |
;; Likelihoods -- these remain constant | |
;; P(C='blue|B=i) | |
;; Factor for updating your prior on seeing a blue ball | |
;; i.e., likelihood that you draw a blue ball if the urn has i blue balls; | |
(define pbi* | |
'(0 1/5 2/5 3/5 4/5 5/5)) | |
;; P(C='white|B=i) | |
;; Factor for updating your prior on seeing a white ball | |
;; i.e., likelihood that you draw a white ball if the urn has i blue balls; | |
(define pwi* | |
'(5/5 4/5 3/5 2/5 1/5 0)) | |
;; Initial prior -- deduction must start from some assumption. | |
;; P(B=i|Cs='()) | |
;; Given no data, we assume no preconceived inclination toward any | |
;; urn structure, modeled with uniform initial probabilities. | |
(define pi*0 '(1/6 1/6 1/6 1/6 1/6 1/6)) | |
;; Sequential Bayesian Inference: see | |
;; https://stats.stackexchange.com/questions/244396/\ | |
;; bayesian-updating-coin-tossing-example | |
;; | |
;; http://www.stats.ox.ac.uk/~steffen/teaching/bs2HT9/kalman.pdf | |
;; P(B=i|Cs='(c1 ... . cn)) = | |
;; P(C=cn|B=i)P(B=i|Cs='(c1 ...)) / (Σ_i P(C=cn|B=i)P(B=i|Cs='(c1 ...))) | |
;; In particular, P(C=cn|B=i,Cs='(c1 ...)) = P(C=cn|B=i) because | |
;; C ⊥ Cs | B (conditional independence) | |
;; otherwise we'd have to deal with P(C=cn|B=i,Cs='(c1 ...)) | |
;; https://en.wikipedia.org/wiki/Conditional_independence | |
;; Note: we're being explicit about accumulating a sequence of | |
;; observations at each step,rather than saying that: | |
;; "the posterior P(B=i|C=c) becomes the new prior P(B=i) in the next step." | |
;; :: shudder! :: | |
;; P(C=c|Cs='(c1 ...)) = (Σ_i P(C=c|B=i)P(B=i|Cs='(c1 ...))) | |
;; C and Cs are NOT independent, only *conditionally* independent | |
(define (pcc* pci* pi*) | |
(apply + (map * pci* pi*))) | |
;; Initial P(C='white|Cs='()): 1/2 (changes as your observations change) | |
(define pw0 (pcc* pwi* pi*0)) | |
;; Initial P(C='blue|Cs='()): 1/2 (changes as your observations change | |
(define pb0 (pcc* pbi* pi*0)) | |
;; perform an inference step given P(C=c|B=i), P(B=i|Cs='(c1 ...)) | |
(define (infer pci* pi*) | |
(let ([the-pcc* (pcc* pci* pi*)]) | |
;(display (exact->inexact the-pc))(display " ") | |
(map (λ (pcb pb) (/ (* pcb pb) the-pcc*)) | |
pci* pi*))) | |
;; update belief based on an observation | |
;; c:Color pi:P(B=i|Cs='(ci ...)) -> P(B=i|Cs='(ci ... . c)) | |
(define (step c pi*) | |
(cond | |
[(equal? c 'blue) (infer pbi* pi*)] | |
[(equal? c 'white) (infer pwi* pi*)])) | |
;; (listof Color) -> (listof P(B=i)) | |
;; given a list of samples, trace sequential Bayesian inference. | |
(define (simulate-bayes c*0) | |
(local [(define (loop c* pi*) | |
(cond | |
[(empty? c*) (list pi*)] | |
[else | |
(cons pi* (loop (rest c*) | |
(step (first c*) pi*)))]))] | |
(loop c*0 pi*0))) | |
;; (listof Color) -> Urn | |
;; given a list of samples, predict the contents of the urn | |
(define (predict c*) | |
(let* ([stats (last (simulate-bayes c*))] | |
[idx-table (map cons stats (range 6))] | |
[key (apply max stats)]) | |
(cdr (assoc key idx-table)))) | |
(define (wifey-demands-units c*) | |
(let ([n (predict c*)]) | |
(printf "~a blue balls" n))) | |
;; | |
;; Rendering tools - Makes the wifey happy with visualizations | |
;; Now using Racket Plot Library | |
;; | |
;; Color -> Pict | |
;; represent the given ball visually | |
(define (render-ball color) | |
(disk 20 #:color (symbol->string color))) | |
;; (listof Color) -> Pict | |
;; lay out a list of balls horizontally | |
(define (render-balls color*) | |
(apply ht-append (map render-ball color*))) | |
(plot-width 120) | |
(plot-height 120) | |
;; Render probability mass function | |
(define (plot-probs p*) | |
(let* ([x-lbls (range 6)] | |
[entries (map vector x-lbls p*)]) | |
(plot (discrete-histogram entries) | |
#:y-max 1 #:x-label #f #:y-label #f))) | |
;; (listof Color) -> Pict | |
;; render a sequence of Bayesian inference steps | |
(define (render-bayes c*0) | |
(let ([b* (map render-ball c*0)]) | |
(let ([chart* (map plot-probs (simulate-bayes c*0))]) | |
(cons (first chart*) | |
(apply append | |
(map (λ (b chart) (list b chart)) | |
b* (rest chart*))))))) | |
;; | |
;; Example Use | |
;; | |
;; (define blues 3) ;; 3 blue and 2 white balls in urn | |
;; (define samples (draw-samples blues 10)) ;; draw 10 samples with replacement | |
;; (render-balls samples) ;; visualize the samples | |
;; (render-bayes samples) ;; visualize sequential Bayesian updating |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Inspired by "Statistical Rethinking" lectures by Richard McElreath
https://www.youtube.com/playlist?list=PLDcUM9US4XdM9_N6XUUFrhghGJ4K25bFc