Last active
August 29, 2015 13:57
-
-
Save benmarwick/9709811 to your computer and use it in GitHub Desktop.
Bayesian ANOVA with nice plots. from https://sites.google.com/site/jrmihaljevic/statistics/BayesANOVAheteroscedastic
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Joseph R. Mihaljevic | |
# July 2013 | |
# (Partial) Bayesian analysis of variance, accounting for heteroscedasticity | |
# Generate some artificial data: | |
# Normally distributed groups, but heteroscedastic | |
a <- rnorm(25, mean=8, sd=10) | |
b <- rnorm(50, mean=5, sd=2) | |
c <- rnorm(25, mean=3, sd=.1) | |
d <- rnorm(25, mean=11, sd=3) | |
e <- rnorm(50, mean=13, sd=2) | |
data <- data.frame(meas = c(a,b,c,d,e), group = rep(c(1,2,3,4,5), times=c(length(a), length(b), length(c), length(d), length(e)))) | |
lst <- split(data, data$group) | |
nobs <- length(unique(data$group)) | |
levels <- data$group | |
# JAGS needs a list of the data | |
jags_d <- list(y=data$meas, | |
levels=levels, | |
nobs=nobs) | |
# write model | |
cat(" | |
model{ | |
## Specify hyperpriors and priors | |
a1tau ~ dgamma(.1, .1) | |
scale ~ dunif(0, 1) | |
rate ~ dunif(0, 1) | |
## Specify likelihood for ANOVA model | |
for (i in 1:nobs){ | |
y[i] ~ dnorm(mu[i], tau[levels[i]]) #this creates a different tau for each level | |
mu[i] <- a1[levels[i]] | |
} | |
## Specify the effects for each level | |
for (j in 1:nobs){ | |
a1[j] ~ dnorm(0, a1tau) | |
} | |
## Specify variance for each level | |
for (j in 1:nobs){ | |
tau[j] ~ dgamma(scale, rate) | |
} | |
## Specify effect contrasts: | |
## You should follow the rules of independent contrasts. | |
e1v2 <- a1[1]-a1[2] | |
e1v3 <- a1[1]-a1[3] | |
e1v4 <- a1[1]-a1[4] | |
e1v5 <- a1[1]-a1[5] | |
e2v3 <- a1[2]-a1[3] | |
e2v4 <- a1[2]-a1[4] | |
e2v5 <- a1[2]-a1[5] | |
e3v4 <- a1[3]-a1[4] | |
e3v5 <- a1[3]-a1[5] | |
e4v5 <- a1[4]-a1[5] | |
# back-transfrom tau so that you can visualze the standard deviation | |
for (i in 1:nobs){ | |
sd[i] <- 1/sqrt(tau[i]) | |
} | |
} | |
", | |
fill=TRUE, file="FakeANOVAuneqvar.txt") | |
library(rjags) | |
# initiate model | |
mod1 <- jags.model(file="FakeANOVAuneqvar.txt", | |
data=jags_d, n.chains=3, n.adapt=1000) | |
# simulate the posterior | |
out <- coda.samples(mod1, n.iter=2000, | |
variable.names=c("a1", "tau", "a1tau", | |
"e1v2", "e1v3", "e1v4", "e1v5", | |
"e2v3", "e2v4", "e2v5", | |
"e3v4", "e3v5", | |
"e4v5", | |
"sd")) | |
str(out) # view the structure of the output object | |
summary(out) | |
windows() | |
plot(out) | |
#Visualize the means | |
plot(out[, c("a1[1]","a1[2]","a1[3]")]) #you can plot the variables individually this way | |
# Notice the differences in variance! (Look at the x-axis limits) | |
#Visualize the standard deviation estimates | |
plot(out[, c("sd[1]","sd[2]","sd[3]")]) | |
#Pretty accurate given our initial samples! | |
#Now look at the pairwise contrasts | |
plot(out[, c("e1v2", "e3v5")]) | |
#Notice that the difference between group 1 and 2 includes zero with relatively high | |
#probability. Thus, these groups are not different in mean effect, accounting for | |
#differences in group-level variance. | |
# Groups 1&2 (their average) are clearly different from group 3. | |
# Let's make some fancier graphs! | |
library(ggmcmc) | |
all_data <- ggs(out) #Create a data frame of all the simulations | |
str(all_data) #Look at the structure | |
#Subset out all of the means | |
mus <- subset(all_data, Parameter=="a1[1]"|Parameter=="a1[2]"|Parameter=="a1[3]", | |
select=c(Parameter, value)) | |
head(mus) | |
library(ggplot2) | |
#Re-label the group names so that it looks better on the plot | |
levels(mus$Parameter)[1] | |
for(i in 1:3){ | |
levels(mus$Parameter)[i] <- paste("Mean of Group", i, sep=" ") | |
} | |
means <- ggplot(mus, aes(x=value))+ | |
geom_density()+ | |
theme_bw()+ | |
facet_wrap(~Parameter, nrow=1, scales="free")+ | |
labs(x="Mean Value", y="Density") | |
quartz(height=3, width=5.5) | |
plot(means) | |
#Look at the standard deviations | |
sds <- subset(all_data, Parameter=="sd[1]"|Parameter=="sd[2]"|Parameter=="sd[3]", | |
select=c(Parameter, value)) | |
head(sds) | |
levels(sds$Parameter) | |
for(i in 7:9){ | |
levels(sds$Parameter)[i] <- paste("Std. Dev., Group", i-6, sep=" ") | |
} | |
stddevs <- ggplot(sds, aes(x=value))+ | |
geom_density()+ | |
theme_bw()+ | |
facet_wrap(~Parameter, nrow=1, scales="free")+ | |
labs(x="Std. Dev. Value", y="Density") | |
quartz(height=3, width=5.5) | |
plot(stddevs) | |
#Look at the differences in group means | |
diffs <- subset(all_data, Parameter=="e1v2"|Parameter=="e3v4", | |
select=c(Parameter, value)) | |
head(diffs) | |
levels(diffs$Parameter) | |
levels(diffs$Parameter)[5] <- "Groups 1 v 2" | |
levels(diffs$Parameter)[6] <- "Average of Groups 1 & 2 v 3" | |
differences <- ggplot(diffs, aes(x=value))+ | |
geom_density()+ | |
theme_bw()+ | |
facet_wrap(~Parameter, nrow=1, scales="free")+ | |
labs(x="Effects (Differences)", y="Density") | |
quartz(height=3, width=5.5) | |
plot(differences) | |
# NOTE: | |
# This is a partial analysis. You will also have to do some model validation! | |
# freq ANOVA | |
aovresult = aov( meas ~ as.factor(group) , data = data ) # NHST ANOVA | |
print( summary( aovresult ) ) | |
print( model.tables( aovresult , "means" ) , digits=4 ) | |
boxplot( meas ~ as.factor(group) , data = data , cex.axis=1.25 , ylab="Y") | |
print( TukeyHSD( aovresult) ) | |
plot( TukeyHSD( aovresult)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment