Last active
October 31, 2018 11:39
-
-
Save rxg/c4ad2191b768b01140994808134c8a86 to your computer and use it in GitHub Desktop.
What proportion of the earth's surface is covered in water? A Bayesian Inference Approach.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #lang racket | |
| (require plot) | |
| (require math/base) | |
| (require math/number-theory) | |
| (require math/distributions) | |
| ;; Integration, from https://github.com/mkierzenka/Racket_NumericalMethods | |
| (require "Numerical_Integration.rkt") | |
| ;; | |
| ;; globe.rkt - Globe inference model from Chapter 2 of | |
| ;; McElreath, Statistical Rethinking | |
| ;; | |
| ;; What proportion of the Earth's surface is water? | |
| ;; McElreath offers an experiment where you toss a model of the globe in | |
| ;; the air, and track where your index finger lands when you catch it: | |
| ;; on water, or on land? You can apply this data (outcomes) to a statistical | |
| ;; inference engine to acquire statistical information about plausible | |
| ;; proportions of land and water. | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| ;; Stochastic Simulator - 'cause I'm too lazy to toss an inflatable globe | |
| ;; | |
| ;; UNESCO says the proportion of water covering the surface of Earth is | |
| ;; between 71% and 72% (it changes with tides, etc.) | |
| (define water-proportion 71) | |
| ;; Obs is 'water or 'land | |
| ;; Samples is (listof Obs) | |
| ;; Summary is (list Natural Natural) | |
| ;; (list w n) is the number of water outcomes w | |
| ;; and the total number of trials n | |
| ;; () -> Obs | |
| ;; sampling with replacement from the urn | |
| (define (sample-globe) | |
| (let ([draw (random 100)]) | |
| (if (< draw water-proportion) | |
| 'water | |
| 'land))) | |
| ;; n:Natural -> { l ∈ (listof Obs) | (length l) = n } | |
| (define (draw-samples n) | |
| (map (λ (_) (sample-globe)) (range n))) | |
| ;; Samples -> Summary | |
| ;; Given samples, report the summary | |
| (define (summarize-samples samples) | |
| (list (length (filter (λ (s) (equal? s 'water)) samples)) | |
| (length samples))) | |
| ;; Samples -> (listof Summary) | |
| ;; produce a list of running summaries from a list of samples | |
| ;; including the 0 case | |
| (define (running-summary samples) | |
| (map (λ (n) (summarize-samples (take samples n))) | |
| (range (add1 (length samples))))) | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| ;; Inference Engine | |
| ;; | |
| ;; Statistical Variables | |
| ;; n - Number of globe tosses overall | |
| ;; p - Estimate of the proportion of water covering the globe | |
| ;; w - Number of water observations from tossing the globe | |
| ;; likelihood function - binomial distribution | |
| ;; Pr(w | n,p) | |
| ;; the count of water observations w is distributed binomially, with | |
| ;; probability p of water on each toss, and n tosses total. | |
| (define (exact-likelihood w n p) | |
| (* | |
| (/ (factorial n) | |
| (* (factorial w) (factorial (- n w)))) | |
| (expt p w) | |
| (expt (- 1 p) (- n w)))) | |
| ;; ...or build one using the binomial distribution that comes with Racket | |
| (define (likelihood w n p) | |
| (let ([d (binomial-dist n p)]) | |
| (let ([pdf (distribution-pdf d)]) | |
| (pdf w)))) | |
| ;; Turns out they both compute the same thing! And the last number | |
| ;; matches McElreath's result (p. 33). Phew! | |
| ;(build-list 7 (λ (w) (pr w 9 0.5))) | |
| ;(build-list 7 (λ (w) (pr2 w 9 0.5))) | |
| ;; | |
| ;; Candidate Prior distributions on p: | |
| ;; | |
| ;; uniform distribution (it's 1 everywhere!) | |
| (define (flat-prior p) | |
| (let ([d (uniform-dist 0 1)]) | |
| (let ([pdf (distribution-pdf d)]) | |
| (pdf p)))) | |
| ;; uniform-dist above produces the inexact number 1.0 | |
| ;; this one produces the exact natural number 1 | |
| ;; useful for better understanding the theory | |
| ;; (i.e. distinguishing numerical underflow from actual 0) | |
| (define (exact-flat-prior p) 1) | |
| ;; DEFINITELY at least half of the globe is water | |
| (define (cutoff-prior p) | |
| (if (< p 1/2) | |
| 0 | |
| 2)) | |
| ;; funky spikey prior | |
| (define (spikey-prior p) | |
| (exp (* -5 (abs (- p 0.5))))) | |
| ;; | |
| ;; Joint Probability: Likelihood * Prior | |
| ;; | |
| (define (joint w n p prior) | |
| (* (likelihood w n p) | |
| (prior p))) | |
| ;; exact joint probability, assuming (exact) uniform flat prior | |
| (define (exact-joint w p n) | |
| (exact-likelihood w n p)) | |
| ;; | |
| ;; Bayesian Inference Using Numerical Integration | |
| ;; | |
| ;; Calculate the "average likelihood" a.k.a. marginal | |
| ;; using numerical integration | |
| (define (marginal w n prior) | |
| (adaptive-integrate (λ (p) (joint w n p prior)) 0 1 0.001)) | |
| ;; Posterior pdf generator (convenient for plotting and queries) | |
| (define (mk-posterior w n prior) | |
| (let ([avg (marginal w n prior)]) ;; compute the marginal once | |
| (λ (p) | |
| (/ (joint w n p prior) | |
| avg)))) | |
| ;; plot the posterior for the given summary data | |
| (define (plot-posterior w n prior y-max) | |
| (plot (function (mk-posterior w n prior) 0 1) | |
| #:x-label #f #:y-label #f #:y-min 0 #:y-max y-max)) | |
| ;; | |
| ;; Bayesian Inference Using Grid Approximation | |
| ;; | |
| ;; McElreath's approximate marginal is faux-discrete: kinda wonky. | |
| ;; Instead we approximate the integral with boxes a la Gelman et al. | |
| ;; a dead simple integral approximation. | |
| ;; doesn't try to be smart at the boundaries | |
| (define (box-integrate ys diff) | |
| #;(sum (map (λ (y) (* y diff)) ys)) | |
| ;; exploit distributivity of multiplication over sums | |
| (* (sum ys) diff)) | |
| ;; compute joint probability at a set of points | |
| (define (joints w n prior points) | |
| (map (λ (p) (joint w n p prior)) points)) | |
| ;; approximate the posteriors at the given points | |
| (define (grid-point-posterior w n prior points) | |
| (let* ([joint* (joints w n prior points)] | |
| [diff (- (second points) (first points))] ;; recover the grid diff | |
| [approx-marginal (box-integrate joint* diff)]) | |
| (map (λ (joint) (/ joint approx-marginal)) joint*))) | |
| ;; create a grid of size points from m to n | |
| (define (grid m n size) | |
| (let* ([diff (- n m)] | |
| [delta (/ diff (sub1 size))]) | |
| ;; make sure the last point is n (overkill if m and n are exact numbers) | |
| (append (map (λ (d) (+ m (* delta d))) | |
| (range (sub1 size))) | |
| (list n)))) | |
| ;; Produce a grid of "size" posterior points | |
| (define (grid-size-posterior w n prior size) | |
| (let ([points (grid 0 1 size)]) | |
| (grid-point-posterior w n prior points))) | |
| ;; collate points and posteriors into plotting coordinates | |
| (define (line-coords points posteriors) | |
| (map vector points posteriors)) | |
| ;; given w n and size, generate a list of posterior coords | |
| (define (grid-approx-coords w n prior size) | |
| (let ([posteriors (grid-size-posterior w n prior size)]) | |
| (line-coords points posteriors))) | |
| ;; plot coordinates as a line and maybe label the points | |
| (define (plot-lines-labels coords labels?) | |
| (let ([maybe-points (if (not labels?) | |
| empty | |
| (map point-label coords))]) | |
| (plot (cons (lines coords) maybe-points) | |
| #:y-min 0 #:x-label #f #:y-label #f))) | |
| ;; plot "size" grid posterior points for the summary (w,n) | |
| (define (plot-grid-approx w n prior size labels?) | |
| (let* ([coords (grid-approx-coords w n prior size)]) | |
| (plot-lines-labels coords labels?))) | |
| ;; | |
| ;; More Visualization Tools | |
| ;; | |
| ;; interleave the contents of two lists | |
| ;; to show each sample and its effect on the posterior | |
| (define (interleave lsta lstb) | |
| (cond | |
| [(empty? lsta) lstb] | |
| [else (cons (first lsta) | |
| (interleave lstb (rest lsta)))])) | |
| ;; given a list of samples, trace sequential Bayesian inference. | |
| ;; parameterized on inference/plotting engine | |
| (define (simulate-bayes obs* infer-plot) | |
| (let ([sm* (running-summary obs*)]) | |
| (let ([plots | |
| (map (λ (sm) | |
| (let ([w (first sm)] | |
| [n (second sm)]) | |
| (infer-plot w n))) | |
| sm*)]) | |
| (interleave plots obs*)))) | |
| ;; wrapper to simulate inference using grid approximation | |
| (define (simulate-grid-bayes prior size obs*) | |
| (simulate-bayes obs* | |
| (λ (w n) (plot-grid-approx w n prior size #f)))) | |
| ;; wrapper to simulate inference using numerical integration | |
| (define (simulate-integral-bayes prior obs* y-max) | |
| (simulate-bayes obs* | |
| (λ (w n) (plot-posterior w n prior y-max)))) | |
| ;; Better visualization using both! | |
| ;; a) use grid approximation to find the max posterior | |
| ;; b) use integration to plot simulation, now with common plot axes | |
| (define (plot-sequential-inference prior obs*) | |
| (let ([sm* (running-summary obs*)] | |
| [grid-size 20]) | |
| ;; pre-compute approximations to get y-axis right | |
| (let ([grids | |
| (map (λ (sm) | |
| (let ([w (first sm)] | |
| [n (second sm)]) | |
| (grid-size-posterior w n prior grid-size))) | |
| sm*)]) | |
| (let ([y-max (apply max | |
| (apply append grids))] | |
| [y-scale-factor 1.1]) | |
| (simulate-integral-bayes prior obs* (* y-max y-scale-factor)))))) | |
| ;; Globally set plot dimensions | |
| (plot-width 120) | |
| (plot-height 120) | |
| ;; Example from McElreath Chapter 2: | |
| ;; comparing Quadratic Approximation to the ideal (Figure 2.8, p. 43) | |
| #; | |
| (plot | |
| (list | |
| (function (distribution-pdf (beta-dist 7 4)) 0 1 #:color "blue") | |
| (function (distribution-pdf (normal-dist 0.67 0.16)) 0 1 #:color "black"))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment