Skip to content

Instantly share code, notes, and snippets.

@MJacobs1985
Created June 3, 2022 20:55
Show Gist options
  • Save MJacobs1985/32db6e7b34e338775fa1a5c02ebc51ef to your computer and use it in GitHub Desktop.
Save MJacobs1985/32db6e7b34e338775fa1a5c02ebc51ef to your computer and use it in GitHub Desktop.
rm(list = ls())
library(dplyr)
library(ggplot2)
library(brms)
library(tidybayes)
library(bayestestR)
library(grid)
library(gridExtra)
HtWtDataGenerator <- function(nSubj, rndsd = NULL, maleProb = 0.50) {
# Random height, weight generator for males and females. Uses parameters from
# Brainard, J. & Burmaster, D. E. (1992). Bivariate distributions for height and
# weight of men and women in the United States. Risk Analysis, 12(2), 267-275.
# Kruschke, J. K. (2011). Doing Bayesian data analysis:
# A Tutorial with R and BUGS. Academic Press / Elsevier.
# Kruschke, J. K. (2014). Doing Bayesian data analysis, 2nd Edition:
# A Tutorial with R, JAGS and Stan. Academic Press / Elsevier.
# require(MASS)
# Specify parameters of multivariate normal (MVN) distributions.
# Men:
HtMmu <- 69.18
HtMsd <- 2.87
lnWtMmu <- 5.14
lnWtMsd <- 0.17
Mrho <- 0.42
Mmean <- c(HtMmu, lnWtMmu)
Msigma <- matrix(c(HtMsd^2, Mrho * HtMsd * lnWtMsd,
Mrho * HtMsd * lnWtMsd, lnWtMsd^2), nrow = 2)
# Women cluster 1:
HtFmu1 <- 63.11
HtFsd1 <- 2.76
lnWtFmu1 <- 5.06
lnWtFsd1 <- 0.24
Frho1 <- 0.41
prop1 <- 0.46
Fmean1 <- c(HtFmu1, lnWtFmu1)
Fsigma1 <- matrix(c(HtFsd1^2, Frho1 * HtFsd1 * lnWtFsd1,
Frho1 * HtFsd1 * lnWtFsd1, lnWtFsd1^2), nrow = 2)
# Women cluster 2:
HtFmu2 <- 64.36
HtFsd2 <- 2.49
lnWtFmu2 <- 4.86
lnWtFsd2 <- 0.14
Frho2 <- 0.44
prop2 <- 1 - prop1
Fmean2 <- c(HtFmu2, lnWtFmu2)
Fsigma2 <- matrix(c(HtFsd2^2, Frho2 * HtFsd2 * lnWtFsd2,
Frho2 * HtFsd2 * lnWtFsd2, lnWtFsd2^2), nrow = 2)
# Randomly generate data values from those MVN distributions.
if (!is.null(rndsd)) {set.seed(rndsd)}
datamatrix <- matrix(0, nrow = nSubj, ncol = 3)
colnames(datamatrix) <- c("male", "height", "weight")
maleval <- 1; femaleval <- 0 # arbitrary coding values
for (i in 1:nSubj) {
# Flip coin to decide sex
sex <- sample(c(maleval, femaleval), size = 1, replace = TRUE,
prob = c(maleProb, 1 - maleProb))
if (sex == maleval) {datum = MASS::mvrnorm(n = 1, mu = Mmean, Sigma = Msigma)}
if (sex == femaleval) {
Fclust = sample(c(1, 2), size = 1, replace = TRUE, prob = c(prop1, prop2))
if (Fclust == 1) {datum = MASS::mvrnorm(n = 1, mu = Fmean1, Sigma = Fsigma1)}
if (Fclust == 2) {datum = MASS::mvrnorm(n = 1, mu = Fmean2, Sigma = Fsigma2)}
}
datamatrix[i, ] = c(sex, round(c(datum[1], exp(datum[2])), 1))
}
return(datamatrix)
} # end function
set.seed(2)
d <- HtWtDataGenerator(nSubj = 57, maleProb = .5) %>%as_tibble()
d%>%head()
ggplot(d, aes(x=weight,
y=height))+
geom_smooth(method="lm", alpha=0.2)+
geom_point(alpha=0.5)+
theme_bw()
ggplot(d, aes(x=weight,
y=height,
colour=as.factor(male),
fill=as.factor(male)))+
geom_smooth(method="lm", alpha=0.2)+
geom_point(alpha=0.5)+
theme_bw()
fit2.0 <- lm(weight~height,data=d)
fit2.1 <- lm(weight~height+
as.factor(male), data=d)
fit2.2 <- lm(weight~height*
as.factor(male), data=d)
fit2.0
summary(fit2.0)
summary(fit2.1)
summary(fit2.2)
par(mfrow = c(2, 2))
plot(fit2.2)
dev.off()
(ci<-confint(fit2.2))
anova(fit2.0, fit2.1, fit2.2)
plot(density(fit2.2$residuals))
mean(fit2.2$residuals)
sd(fit2.2$residuals)
var(fit2.2$residuals)
get_prior(weight ~ 0, data=d)
fit2.1 <-
brm(data = d,
family = gaussian,
weight ~ 1 + height)
summary(fit2.1)
plot(fit2.1)
stancode(fit2.1)
pp_check(fit2.1, ndraws=100)
plot(density(predict(fit2.1)[,1]-d$weight))
mean(predict(fit2.1)[,1]-d$weight)
sd(predict(fit2.1)[,1]-d$weight)
var(predict(fit2.1)[,1]-d$weight)
fit2.2 <-
brm(data = d,
family = gaussian,
weight ~ 1 + height,
prior = c(prior(normal(100, 1), class = Intercept),
prior(normal(2, 1), class = b),
prior(gamma(1, 1), class = sigma)),
chains = 4,
cores = 6,
iter = 20000,
warmup = 1000,
seed = 2)
summary(fit2.2)
pp_check(fit2.2, ndraws=100)
pp_check(fit2.2, type = "error_hist", ndraws = 11)
pp_check(fit2.2, type = "scatter_avg", ndraws = 100)
pp_check(fit2.2, type = "stat_2d")
pp_check(fit2.2, type = "loo_pit")
post <- posterior_samples(fit2.2)
n_lines <- 150
d %>%
ggplot(aes(x = height, y = weight)) +
geom_abline(intercept = post[1:n_lines, 1],
slope = post[1:n_lines, 2],
color = "grey50", size = 1/4, alpha = .3) +
geom_point(shape = 1) +
labs(subtitle = eval(substitute(paste("Data with",
n_lines,
"credible regression lines"))),
x = "Height in inches",
y = "Weight in pounds") +
coord_cartesian(xlim = c(55, 80),
ylim = c(50, 250)) +
theme_bw()
d$male<-as.factor(d$male)
get_prior(weight ~ 1 + height*male, data=d)
fit2.2 <-
brm(data = d,
family = gaussian,
weight ~ 1 + height*male,
prior = c(prior(normal(100, 1), class = Intercept),
prior(normal(2, 1), class = b, coef=height),
prior(normal(3, 2), class = b, coef=male1),
prior(normal(1.5, 1), class = b, coef=height:male1),
prior(gamma(1, 1), class = sigma)),
chains = 4,
cores = 6,
iter = 20000,
warmup = 1000,
seed = 2)
fit2.2$prior
summary(fit2.2)
stancode(fit2.2)
plot(fit2.2)
pp_check(fit2.2, ndraws=100)
pp_check(fit2.2,
type = "error_hist",
ndraws = 50)
pp_check(fit2.2,
type = "scatter_avg",
ndraws = 100)
pp_check(fit2.2,
type = "stat_2d")
pp_check(fit2.2, type = "rootogram")
pp_check(fit2.2, type = "loo_pit")
ce<-conditional_effects(fit2.2, surface = TRUE)
pl_ce<-plot(ce, rug=TRUE, theme=theme_bw())
pl_ce
post <- posterior_samples(fit2.2)
ci_hdi <- ci(post$b_height, method = "HDI")
ci_eti <- ci(post$b_height, method = "ETI")
ci_quant<-quantile(post$b_height, prob=c(.025,.975))
post %>%
select(b_height)%>%
estimate_density(extend=TRUE) %>%
ggplot(aes(x = x, y = y)) +
geom_area(fill = "orange") +
theme_classic() +
geom_vline(xintercept = ci_hdi$CI_low, color = "royalblue", size = 1) +
geom_vline(xintercept = ci_hdi$CI_high, color = "royalblue", size = 1) +
geom_vline(xintercept = ci_eti$CI_low, color = "red", size = 1) +
geom_vline(xintercept = ci_eti$CI_high, color = "red", size = 1)+
geom_vline(xintercept = ci_quant[1], color = "green", size = 1) +
geom_vline(xintercept = ci_quant[2], color = "green", size = 1)
post <- posterior_samples(fit2.2, FIXED=TRUE)
post$male<-round(runif(76000))
head(post)
n_lines <- 150
g1<-d %>% filter(male==0)%>%
ggplot(aes(x = height, y = weight, group=male)) +
geom_abline(intercept = post[1:n_lines, 1] + (post[1:n_lines, 3]*0),
slope = post[1:n_lines, 2] + post[1:n_lines, 2]*(post[1:n_lines, 4]*0),
size = 1/4,
alpha = .3) +
geom_abline(intercept = mean(post[1:n_lines, 1]) + mean((post[1:n_lines, 3]*0)),
slope = mean(post[1:n_lines, 2]) + mean(post[1:n_lines, 2]*(post[1:n_lines, 4]*0)),
size = 2,
alpha = 1) +
geom_point(shape = 1) +
labs(subtitle = eval(substitute(paste("Data with",
n_lines,
"credible regression lines for male = 0"))),
x = "Height in inches",
y = "Weight in pounds") +
coord_cartesian(xlim = c(55, 80),
ylim = c(50, 250)) +
theme_bw()
g2<-d %>% filter(male==1)%>%
ggplot(aes(x = height, y = weight, group=male)) +
geom_abline(intercept = post[1:n_lines, 1] + (post[1:n_lines, 3]*1),
slope = post[1:n_lines, 2] + post[1:n_lines, 2]*(post[1:n_lines, 4]*1),
size = 1/4,
alpha = .3) +
geom_abline(intercept = mean(post[1:n_lines, 1]) + mean((post[1:n_lines, 3]*1)),
slope = mean(post[1:n_lines, 2]) + mean(post[1:n_lines, 2]*(post[1:n_lines, 4]*1)),
size = 2,
alpha = 1) +
geom_point(shape = 1) +
labs(subtitle = eval(substitute(paste("Data with",
n_lines,
"credible regression lines for male = 1"))),
x = "Height in inches",
y = "Weight in pounds") +
coord_cartesian(xlim = c(55, 80),
ylim = c(50, 250)) +
theme_bw()
g3<-d %>%
ggplot(aes(x = height, y = weight, group=male)) +
geom_abline(intercept = post[1:n_lines, 1] + (post[1:n_lines, 3]*0),
slope = post[1:n_lines, 2] + post[1:n_lines, 2]*(post[1:n_lines, 4]*0),
size = 1/4,
alpha = .3,
col="red") +
geom_abline(intercept = mean(post[1:n_lines, 1]) + mean((post[1:n_lines, 3]*0)),
slope = mean(post[1:n_lines, 2]) + mean(post[1:n_lines, 2]*(post[1:n_lines, 4]*0)),
size = 2,
alpha = 1,
col="red") +
geom_abline(intercept = post[1:n_lines, 1] + (post[1:n_lines, 3]*1),
slope = post[1:n_lines, 2] + post[1:n_lines, 2]*(post[1:n_lines, 4]*1),
size = 1/4,
alpha = .3,
col="blue") +
geom_abline(intercept = mean(post[1:n_lines, 1]) + mean((post[1:n_lines, 3]*1)),
slope = mean(post[1:n_lines, 2]) + mean(post[1:n_lines, 2]*(post[1:n_lines, 4]*1)),
size = 2,
alpha = 1,
col="blue") +
geom_point(shape = 1) +
labs(subtitle = eval(substitute(paste("Data with",
n_lines,
"credible regression lines where male = blue"))),
x = "Height in inches",
y = "Weight in pounds") +
coord_cartesian(xlim = c(55, 80),
ylim = c(50, 250)) +
theme_bw()
grid.arrange(g1,g2,g3,ncol=1)
post %>%
ggplot(aes(x = sigma, y = 0)) +
stat_histinterval(point_interval = mode_hdi, .width = .95,
fill = "grey67", slab_color = "grey92",
breaks = 40, slab_size = .2, outline_bars = T) +
stat_histinterval(point_interval = mean_qi,
.width = .95,
fill = "seagreen",
point_color = "green",
slab_type = "pdf",
slab_size = .2,
alpha=0.5) +
scale_y_continuous(NULL, breaks = NULL) +
labs(title = "The posterior distribution of sigma",
subtitle = "Mode and 85% HDI interval (grey) next to mean and 95% confidence interval (seagreen)",
x = expression(sigma[1]~(slope))) +
theme_bw()
fit2.0 <- lm(weight~height*male, data=d)
pred.int <-
predict(
fit2.0,
interval = "prediction",
level = 0.95)
mydata <- cbind(d, pred.int)
ggplot(mydata, aes(height, weight)) +
geom_point() +
geom_ribbon(aes(ymin=lwr,
ymax = upr),
fill = "red",alpha=0.1)+
stat_smooth(method = lm) +
labs(title="Model of weight predicted by height",
subtitle = "Blue line is mean regression,grey is confidence interval,red is prediction interval")+
theme_bw()+
facet_wrap(~male)
nd <- tibble(height = seq(from = 53, to = 81, length.out = 20),
male = round(runif(20)))
predict(fit2.2, newdata = nd) %>%
data.frame() %>%
bind_cols(nd) %>%
ggplot(aes(x = height)) +
geom_pointrange(aes(y = Estimate, ymin = Q2.5, ymax = Q97.5),
color = "blue", shape = 20, lty=2) +
geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "blue", alpha=0.1) +
geom_line(aes(y = Estimate),
color = "blue", lwd=1) +
geom_point(data = d,
aes(y = weight), color="black") +
labs(title="Showing he build-up of confidence intervals",
subtitle = "Data with the percentile-based 95% intervals and the means of the posterior predictions",
x = "Height in inches",
y = "Weight in inches") +
theme_bw()+
facet_wrap(~male)
nd <- tibble(height = seq(from = 53, to = 81, length.out = 30),
male = round(runif(30)))
predict(fit2.2, newdata = nd) %>%
data.frame() %>%
bind_cols(nd) %>%
ggplot(aes(x = height)) +
geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "grey75") +
geom_line(aes(y = Estimate),
color = "grey92") +
geom_point(data = d,
aes(y = weight),
alpha = 2/3) +
labs(subtitle = "Data with the percentile-based 95% intervals and\nthe means of the posterior predictions",
x = "Height in inches",
y = "Weight in inches") +
theme_bw()+
facet_wrap(~male)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment