Created
June 3, 2022 20:55
-
-
Save MJacobs1985/32db6e7b34e338775fa1a5c02ebc51ef to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list = ls()) | |
library(dplyr) | |
library(ggplot2) | |
library(brms) | |
library(tidybayes) | |
library(bayestestR) | |
library(grid) | |
library(gridExtra) | |
HtWtDataGenerator <- function(nSubj, rndsd = NULL, maleProb = 0.50) { | |
# Random height, weight generator for males and females. Uses parameters from | |
# Brainard, J. & Burmaster, D. E. (1992). Bivariate distributions for height and | |
# weight of men and women in the United States. Risk Analysis, 12(2), 267-275. | |
# Kruschke, J. K. (2011). Doing Bayesian data analysis: | |
# A Tutorial with R and BUGS. Academic Press / Elsevier. | |
# Kruschke, J. K. (2014). Doing Bayesian data analysis, 2nd Edition: | |
# A Tutorial with R, JAGS and Stan. Academic Press / Elsevier. | |
# require(MASS) | |
# Specify parameters of multivariate normal (MVN) distributions. | |
# Men: | |
HtMmu <- 69.18 | |
HtMsd <- 2.87 | |
lnWtMmu <- 5.14 | |
lnWtMsd <- 0.17 | |
Mrho <- 0.42 | |
Mmean <- c(HtMmu, lnWtMmu) | |
Msigma <- matrix(c(HtMsd^2, Mrho * HtMsd * lnWtMsd, | |
Mrho * HtMsd * lnWtMsd, lnWtMsd^2), nrow = 2) | |
# Women cluster 1: | |
HtFmu1 <- 63.11 | |
HtFsd1 <- 2.76 | |
lnWtFmu1 <- 5.06 | |
lnWtFsd1 <- 0.24 | |
Frho1 <- 0.41 | |
prop1 <- 0.46 | |
Fmean1 <- c(HtFmu1, lnWtFmu1) | |
Fsigma1 <- matrix(c(HtFsd1^2, Frho1 * HtFsd1 * lnWtFsd1, | |
Frho1 * HtFsd1 * lnWtFsd1, lnWtFsd1^2), nrow = 2) | |
# Women cluster 2: | |
HtFmu2 <- 64.36 | |
HtFsd2 <- 2.49 | |
lnWtFmu2 <- 4.86 | |
lnWtFsd2 <- 0.14 | |
Frho2 <- 0.44 | |
prop2 <- 1 - prop1 | |
Fmean2 <- c(HtFmu2, lnWtFmu2) | |
Fsigma2 <- matrix(c(HtFsd2^2, Frho2 * HtFsd2 * lnWtFsd2, | |
Frho2 * HtFsd2 * lnWtFsd2, lnWtFsd2^2), nrow = 2) | |
# Randomly generate data values from those MVN distributions. | |
if (!is.null(rndsd)) {set.seed(rndsd)} | |
datamatrix <- matrix(0, nrow = nSubj, ncol = 3) | |
colnames(datamatrix) <- c("male", "height", "weight") | |
maleval <- 1; femaleval <- 0 # arbitrary coding values | |
for (i in 1:nSubj) { | |
# Flip coin to decide sex | |
sex <- sample(c(maleval, femaleval), size = 1, replace = TRUE, | |
prob = c(maleProb, 1 - maleProb)) | |
if (sex == maleval) {datum = MASS::mvrnorm(n = 1, mu = Mmean, Sigma = Msigma)} | |
if (sex == femaleval) { | |
Fclust = sample(c(1, 2), size = 1, replace = TRUE, prob = c(prop1, prop2)) | |
if (Fclust == 1) {datum = MASS::mvrnorm(n = 1, mu = Fmean1, Sigma = Fsigma1)} | |
if (Fclust == 2) {datum = MASS::mvrnorm(n = 1, mu = Fmean2, Sigma = Fsigma2)} | |
} | |
datamatrix[i, ] = c(sex, round(c(datum[1], exp(datum[2])), 1)) | |
} | |
return(datamatrix) | |
} # end function | |
set.seed(2) | |
d <- HtWtDataGenerator(nSubj = 57, maleProb = .5) %>%as_tibble() | |
d%>%head() | |
ggplot(d, aes(x=weight, | |
y=height))+ | |
geom_smooth(method="lm", alpha=0.2)+ | |
geom_point(alpha=0.5)+ | |
theme_bw() | |
ggplot(d, aes(x=weight, | |
y=height, | |
colour=as.factor(male), | |
fill=as.factor(male)))+ | |
geom_smooth(method="lm", alpha=0.2)+ | |
geom_point(alpha=0.5)+ | |
theme_bw() | |
fit2.0 <- lm(weight~height,data=d) | |
fit2.1 <- lm(weight~height+ | |
as.factor(male), data=d) | |
fit2.2 <- lm(weight~height* | |
as.factor(male), data=d) | |
fit2.0 | |
summary(fit2.0) | |
summary(fit2.1) | |
summary(fit2.2) | |
par(mfrow = c(2, 2)) | |
plot(fit2.2) | |
dev.off() | |
(ci<-confint(fit2.2)) | |
anova(fit2.0, fit2.1, fit2.2) | |
plot(density(fit2.2$residuals)) | |
mean(fit2.2$residuals) | |
sd(fit2.2$residuals) | |
var(fit2.2$residuals) | |
get_prior(weight ~ 0, data=d) | |
fit2.1 <- | |
brm(data = d, | |
family = gaussian, | |
weight ~ 1 + height) | |
summary(fit2.1) | |
plot(fit2.1) | |
stancode(fit2.1) | |
pp_check(fit2.1, ndraws=100) | |
plot(density(predict(fit2.1)[,1]-d$weight)) | |
mean(predict(fit2.1)[,1]-d$weight) | |
sd(predict(fit2.1)[,1]-d$weight) | |
var(predict(fit2.1)[,1]-d$weight) | |
fit2.2 <- | |
brm(data = d, | |
family = gaussian, | |
weight ~ 1 + height, | |
prior = c(prior(normal(100, 1), class = Intercept), | |
prior(normal(2, 1), class = b), | |
prior(gamma(1, 1), class = sigma)), | |
chains = 4, | |
cores = 6, | |
iter = 20000, | |
warmup = 1000, | |
seed = 2) | |
summary(fit2.2) | |
pp_check(fit2.2, ndraws=100) | |
pp_check(fit2.2, type = "error_hist", ndraws = 11) | |
pp_check(fit2.2, type = "scatter_avg", ndraws = 100) | |
pp_check(fit2.2, type = "stat_2d") | |
pp_check(fit2.2, type = "loo_pit") | |
post <- posterior_samples(fit2.2) | |
n_lines <- 150 | |
d %>% | |
ggplot(aes(x = height, y = weight)) + | |
geom_abline(intercept = post[1:n_lines, 1], | |
slope = post[1:n_lines, 2], | |
color = "grey50", size = 1/4, alpha = .3) + | |
geom_point(shape = 1) + | |
labs(subtitle = eval(substitute(paste("Data with", | |
n_lines, | |
"credible regression lines"))), | |
x = "Height in inches", | |
y = "Weight in pounds") + | |
coord_cartesian(xlim = c(55, 80), | |
ylim = c(50, 250)) + | |
theme_bw() | |
d$male<-as.factor(d$male) | |
get_prior(weight ~ 1 + height*male, data=d) | |
fit2.2 <- | |
brm(data = d, | |
family = gaussian, | |
weight ~ 1 + height*male, | |
prior = c(prior(normal(100, 1), class = Intercept), | |
prior(normal(2, 1), class = b, coef=height), | |
prior(normal(3, 2), class = b, coef=male1), | |
prior(normal(1.5, 1), class = b, coef=height:male1), | |
prior(gamma(1, 1), class = sigma)), | |
chains = 4, | |
cores = 6, | |
iter = 20000, | |
warmup = 1000, | |
seed = 2) | |
fit2.2$prior | |
summary(fit2.2) | |
stancode(fit2.2) | |
plot(fit2.2) | |
pp_check(fit2.2, ndraws=100) | |
pp_check(fit2.2, | |
type = "error_hist", | |
ndraws = 50) | |
pp_check(fit2.2, | |
type = "scatter_avg", | |
ndraws = 100) | |
pp_check(fit2.2, | |
type = "stat_2d") | |
pp_check(fit2.2, type = "rootogram") | |
pp_check(fit2.2, type = "loo_pit") | |
ce<-conditional_effects(fit2.2, surface = TRUE) | |
pl_ce<-plot(ce, rug=TRUE, theme=theme_bw()) | |
pl_ce | |
post <- posterior_samples(fit2.2) | |
ci_hdi <- ci(post$b_height, method = "HDI") | |
ci_eti <- ci(post$b_height, method = "ETI") | |
ci_quant<-quantile(post$b_height, prob=c(.025,.975)) | |
post %>% | |
select(b_height)%>% | |
estimate_density(extend=TRUE) %>% | |
ggplot(aes(x = x, y = y)) + | |
geom_area(fill = "orange") + | |
theme_classic() + | |
geom_vline(xintercept = ci_hdi$CI_low, color = "royalblue", size = 1) + | |
geom_vline(xintercept = ci_hdi$CI_high, color = "royalblue", size = 1) + | |
geom_vline(xintercept = ci_eti$CI_low, color = "red", size = 1) + | |
geom_vline(xintercept = ci_eti$CI_high, color = "red", size = 1)+ | |
geom_vline(xintercept = ci_quant[1], color = "green", size = 1) + | |
geom_vline(xintercept = ci_quant[2], color = "green", size = 1) | |
post <- posterior_samples(fit2.2, FIXED=TRUE) | |
post$male<-round(runif(76000)) | |
head(post) | |
n_lines <- 150 | |
g1<-d %>% filter(male==0)%>% | |
ggplot(aes(x = height, y = weight, group=male)) + | |
geom_abline(intercept = post[1:n_lines, 1] + (post[1:n_lines, 3]*0), | |
slope = post[1:n_lines, 2] + post[1:n_lines, 2]*(post[1:n_lines, 4]*0), | |
size = 1/4, | |
alpha = .3) + | |
geom_abline(intercept = mean(post[1:n_lines, 1]) + mean((post[1:n_lines, 3]*0)), | |
slope = mean(post[1:n_lines, 2]) + mean(post[1:n_lines, 2]*(post[1:n_lines, 4]*0)), | |
size = 2, | |
alpha = 1) + | |
geom_point(shape = 1) + | |
labs(subtitle = eval(substitute(paste("Data with", | |
n_lines, | |
"credible regression lines for male = 0"))), | |
x = "Height in inches", | |
y = "Weight in pounds") + | |
coord_cartesian(xlim = c(55, 80), | |
ylim = c(50, 250)) + | |
theme_bw() | |
g2<-d %>% filter(male==1)%>% | |
ggplot(aes(x = height, y = weight, group=male)) + | |
geom_abline(intercept = post[1:n_lines, 1] + (post[1:n_lines, 3]*1), | |
slope = post[1:n_lines, 2] + post[1:n_lines, 2]*(post[1:n_lines, 4]*1), | |
size = 1/4, | |
alpha = .3) + | |
geom_abline(intercept = mean(post[1:n_lines, 1]) + mean((post[1:n_lines, 3]*1)), | |
slope = mean(post[1:n_lines, 2]) + mean(post[1:n_lines, 2]*(post[1:n_lines, 4]*1)), | |
size = 2, | |
alpha = 1) + | |
geom_point(shape = 1) + | |
labs(subtitle = eval(substitute(paste("Data with", | |
n_lines, | |
"credible regression lines for male = 1"))), | |
x = "Height in inches", | |
y = "Weight in pounds") + | |
coord_cartesian(xlim = c(55, 80), | |
ylim = c(50, 250)) + | |
theme_bw() | |
g3<-d %>% | |
ggplot(aes(x = height, y = weight, group=male)) + | |
geom_abline(intercept = post[1:n_lines, 1] + (post[1:n_lines, 3]*0), | |
slope = post[1:n_lines, 2] + post[1:n_lines, 2]*(post[1:n_lines, 4]*0), | |
size = 1/4, | |
alpha = .3, | |
col="red") + | |
geom_abline(intercept = mean(post[1:n_lines, 1]) + mean((post[1:n_lines, 3]*0)), | |
slope = mean(post[1:n_lines, 2]) + mean(post[1:n_lines, 2]*(post[1:n_lines, 4]*0)), | |
size = 2, | |
alpha = 1, | |
col="red") + | |
geom_abline(intercept = post[1:n_lines, 1] + (post[1:n_lines, 3]*1), | |
slope = post[1:n_lines, 2] + post[1:n_lines, 2]*(post[1:n_lines, 4]*1), | |
size = 1/4, | |
alpha = .3, | |
col="blue") + | |
geom_abline(intercept = mean(post[1:n_lines, 1]) + mean((post[1:n_lines, 3]*1)), | |
slope = mean(post[1:n_lines, 2]) + mean(post[1:n_lines, 2]*(post[1:n_lines, 4]*1)), | |
size = 2, | |
alpha = 1, | |
col="blue") + | |
geom_point(shape = 1) + | |
labs(subtitle = eval(substitute(paste("Data with", | |
n_lines, | |
"credible regression lines where male = blue"))), | |
x = "Height in inches", | |
y = "Weight in pounds") + | |
coord_cartesian(xlim = c(55, 80), | |
ylim = c(50, 250)) + | |
theme_bw() | |
grid.arrange(g1,g2,g3,ncol=1) | |
post %>% | |
ggplot(aes(x = sigma, y = 0)) + | |
stat_histinterval(point_interval = mode_hdi, .width = .95, | |
fill = "grey67", slab_color = "grey92", | |
breaks = 40, slab_size = .2, outline_bars = T) + | |
stat_histinterval(point_interval = mean_qi, | |
.width = .95, | |
fill = "seagreen", | |
point_color = "green", | |
slab_type = "pdf", | |
slab_size = .2, | |
alpha=0.5) + | |
scale_y_continuous(NULL, breaks = NULL) + | |
labs(title = "The posterior distribution of sigma", | |
subtitle = "Mode and 85% HDI interval (grey) next to mean and 95% confidence interval (seagreen)", | |
x = expression(sigma[1]~(slope))) + | |
theme_bw() | |
fit2.0 <- lm(weight~height*male, data=d) | |
pred.int <- | |
predict( | |
fit2.0, | |
interval = "prediction", | |
level = 0.95) | |
mydata <- cbind(d, pred.int) | |
ggplot(mydata, aes(height, weight)) + | |
geom_point() + | |
geom_ribbon(aes(ymin=lwr, | |
ymax = upr), | |
fill = "red",alpha=0.1)+ | |
stat_smooth(method = lm) + | |
labs(title="Model of weight predicted by height", | |
subtitle = "Blue line is mean regression,grey is confidence interval,red is prediction interval")+ | |
theme_bw()+ | |
facet_wrap(~male) | |
nd <- tibble(height = seq(from = 53, to = 81, length.out = 20), | |
male = round(runif(20))) | |
predict(fit2.2, newdata = nd) %>% | |
data.frame() %>% | |
bind_cols(nd) %>% | |
ggplot(aes(x = height)) + | |
geom_pointrange(aes(y = Estimate, ymin = Q2.5, ymax = Q97.5), | |
color = "blue", shape = 20, lty=2) + | |
geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "blue", alpha=0.1) + | |
geom_line(aes(y = Estimate), | |
color = "blue", lwd=1) + | |
geom_point(data = d, | |
aes(y = weight), color="black") + | |
labs(title="Showing he build-up of confidence intervals", | |
subtitle = "Data with the percentile-based 95% intervals and the means of the posterior predictions", | |
x = "Height in inches", | |
y = "Weight in inches") + | |
theme_bw()+ | |
facet_wrap(~male) | |
nd <- tibble(height = seq(from = 53, to = 81, length.out = 30), | |
male = round(runif(30))) | |
predict(fit2.2, newdata = nd) %>% | |
data.frame() %>% | |
bind_cols(nd) %>% | |
ggplot(aes(x = height)) + | |
geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "grey75") + | |
geom_line(aes(y = Estimate), | |
color = "grey92") + | |
geom_point(data = d, | |
aes(y = weight), | |
alpha = 2/3) + | |
labs(subtitle = "Data with the percentile-based 95% intervals and\nthe means of the posterior predictions", | |
x = "Height in inches", | |
y = "Weight in inches") + | |
theme_bw()+ | |
facet_wrap(~male) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment