Created
August 12, 2024 16:37
-
-
Save rmcelreath/8cc6d3414f469690287b4982fcf895ae to your computer and use it in GitHub Desktop.
Olympics 2024 medal scaling by population
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# medals per capita demonstration | |
d <- read.delim("medals_all.tsv") | |
library(rethinking) | |
# blank2() | |
d$Medals <- d$WeightedMedals | |
# d$Medals <- d$Gold + d$Silver + d$Bronze | |
d$Pop <- as.numeric(gsub(",", "", d$Population)) | |
# top 10 lists | |
# straight population | |
o <- order( -d$Medals/d$Pop ) | |
d[o,][1:20,2:7] | |
# log population | |
o <- order( -d$Medals/log(d$Pop) ) | |
d[o,][1:20,2:7] | |
# Now a model of diminishing returns that treats each person like a chance at a medal-winner. Chance is very low per person. Diminishing returns arise because large countries get duplicate winners. | |
dat <- list( | |
Medals = d$Medals, | |
n = d$Pop | |
) | |
# z is exponent on probability of being a medal-winner | |
# 1 - (1-p)^n is probability at least one of the n people is a winner | |
m0 <- ulam( | |
alist( | |
Medals ~ dgampois(exp(log_lambda),tau), | |
save> log_lambda <- a + log( 1 - (1-10^(-z))^n ), | |
tau ~ exponential(1), | |
a ~ normal(log(50),5), | |
z ~ normal(7,2) | |
), data=dat , chains=4 , cores=4 , constraints=list(z="lower=0") ) | |
plot( log(d$Pop) , d$Medals , xlab="Log-Population" , ylab="Weighted medals" ) | |
curve( 20*( 1-(1-10^(-7))^exp(x) ) , from=10 , to=25 , add=TRUE ) | |
post <- extract.samples(m0) | |
plot( log(d$Pop) , d$Medals , xlab="Log-Population" , ylab="Weighted medals" ) | |
points( log(d$Pop)[d$Country=="New Zealand"] , d$Medals[d$Country=="New Zealand"] , pch=16 , col="red" ) | |
for ( i in 1:10 ) curve( exp(post$a[i])*( 1 - ( 1 - 10^(-post$z[i]) )^exp(x) ) , add=TRUE , col=col.alpha("red",0.6) ) | |
identify( log(d$Pop) , d$Medals , labels=d$Country ) | |
# each nation residual against population | |
ry <- sapply( 1:89 , function(i) d$Medals[i] - mean(post$log_lambda[,i]) ) | |
plot( log(d$Pop) , ry , xlab="Log-Population" , ylab="residual medals" ) | |
points( log(d$Pop)[d$Country=="New Zealand"] , ry[d$Country=="New Zealand"] , pch=16 , col="red" ) | |
abline(h=0,lty=2) | |
# order by residuals | |
o <- order( -ry ) | |
d[o,][1:20,2:7] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment