benmarwick · August 29, 2015 13:57
diff --git a/BANOVA.r b/BANOVA.r
 # Joseph R. Mihaljevic
 # July 2013
 # (Partial) Bayesian analysis of variance, accounting for heteroscedasticity
 # Generate some artificial data:
 # Normally distributed groups, but heteroscedastic
 a <- rnorm(25, mean=8, sd=10)
 b <- rnorm(50, mean=5, sd=2)
 c <- rnorm(25, mean=3, sd=.1)
 d <- rnorm(25, mean=11, sd=3)
 e <- rnorm(50, mean=13, sd=2)

 data <- data.frame(meas = c(a,b,c,d,e), group = rep(c(1,2,3,4,5), times=c(length(a), length(b), length(c),  length(d),  length(e))))
 lst <- split(data, data$group)

 nobs <- length(unique(data$group))
 levels <- data$group

 # JAGS needs a list of the data
 jags_d <- list(y=data$meas, 
               levels=levels, 
               nobs=nobs)


 # write model
 cat("
    model{
    ## Specify hyperpriors and priors
    
    a1tau ~ dgamma(.1, .1)
    scale ~ dunif(0, 1)
    rate ~ dunif(0, 1)
    
    ## Specify likelihood for ANOVA model
    for (i in 1:nobs){
    
    y[i] ~ dnorm(mu[i], tau[levels[i]]) #this creates a different tau for each level
    mu[i] <- a1[levels[i]]
    
    }
    
    ## Specify the effects for each level  
    for (j in 1:nobs){
    a1[j] ~ dnorm(0, a1tau)
    }
    ## Specify variance for each level
    for (j in 1:nobs){
    tau[j] ~ dgamma(scale, rate)
    }
    
    ## Specify effect contrasts:
    ## You should follow the rules of independent contrasts.
    e1v2 <- a1[1]-a1[2]
    e1v3 <- a1[1]-a1[3]
    e1v4 <- a1[1]-a1[4]
    e1v5 <- a1[1]-a1[5]
    e2v3 <- a1[2]-a1[3]
    e2v4 <- a1[2]-a1[4]
    e2v5 <- a1[2]-a1[5]
    e3v4 <- a1[3]-a1[4]
    e3v5 <- a1[3]-a1[5]
    e4v5 <- a1[4]-a1[5]
    
    
    # back-transfrom tau so that you can visualze the standard deviation
    for (i in 1:nobs){
    sd[i] <- 1/sqrt(tau[i])
    }
    }
    ",
    fill=TRUE, file="FakeANOVAuneqvar.txt")

 library(rjags)

 # initiate model
 mod1 <- jags.model(file="FakeANOVAuneqvar.txt", 
                   data=jags_d, n.chains=3, n.adapt=1000)


 # simulate the posterior
 out <- coda.samples(mod1, n.iter=2000, 
                    variable.names=c("a1", "tau", "a1tau", 
                                     "e1v2", "e1v3", "e1v4", "e1v5",
                                     "e2v3", "e2v4", "e2v5",
                                     "e3v4", "e3v5",
                                     "e4v5",
                                     "sd"))

 str(out) # view the structure of the output object
 summary(out)
 windows()
 plot(out)

 #Visualize the means
 plot(out[, c("a1[1]","a1[2]","a1[3]")]) #you can plot the variables individually this way
 # Notice the differences in variance! (Look at the x-axis limits)

 #Visualize the standard deviation estimates
 plot(out[, c("sd[1]","sd[2]","sd[3]")])
 #Pretty accurate given our initial samples!

 #Now look at the pairwise contrasts
 plot(out[, c("e1v2", "e3v5")])
 #Notice that the difference between group 1 and 2 includes zero with relatively high 
 #probability. Thus, these groups are not different in mean effect, accounting for 
 #differences in group-level variance. 
 # Groups 1&2 (their average) are clearly different from group 3. 


 # Let's make some fancier graphs!
 library(ggmcmc)
 all_data <- ggs(out) #Create a data frame of all the simulations
 str(all_data) #Look at the structure

 #Subset out all of the means
 mus <- subset(all_data, Parameter=="a1[1]"|Parameter=="a1[2]"|Parameter=="a1[3]",
              select=c(Parameter, value))
 head(mus)

 library(ggplot2)

 #Re-label the group names so that it looks better on the plot
 levels(mus$Parameter)[1]
 for(i in 1:3){
  levels(mus$Parameter)[i] <- paste("Mean of Group", i, sep=" ")
 }
 means <- ggplot(mus, aes(x=value))+
  geom_density()+
  theme_bw()+
  facet_wrap(~Parameter, nrow=1, scales="free")+
  labs(x="Mean Value", y="Density")
 quartz(height=3, width=5.5)
 plot(means)

 #Look at the standard deviations
 sds <- subset(all_data, Parameter=="sd[1]"|Parameter=="sd[2]"|Parameter=="sd[3]",
              select=c(Parameter, value))
 head(sds)
 levels(sds$Parameter)
 for(i in 7:9){
  levels(sds$Parameter)[i] <- paste("Std. Dev., Group", i-6, sep=" ")
 }
 stddevs <- ggplot(sds, aes(x=value))+
  geom_density()+
  theme_bw()+
  facet_wrap(~Parameter, nrow=1, scales="free")+
  labs(x="Std. Dev. Value", y="Density")
 quartz(height=3, width=5.5)
 plot(stddevs)

 #Look at the differences in group means

 diffs <- subset(all_data, Parameter=="e1v2"|Parameter=="e3v4",
                select=c(Parameter, value))
 head(diffs)
 levels(diffs$Parameter)

 levels(diffs$Parameter)[5] <- "Groups 1 v 2"
 levels(diffs$Parameter)[6] <- "Average of Groups 1 & 2 v 3"
 differences <- ggplot(diffs, aes(x=value))+
  geom_density()+
  theme_bw()+
  facet_wrap(~Parameter, nrow=1, scales="free")+
  labs(x="Effects (Differences)", y="Density")
 quartz(height=3, width=5.5)
 plot(differences)


 # NOTE:
 # This is a partial analysis. You will also have to do some model validation!

 # freq ANOVA
 aovresult = aov( meas ~ as.factor(group) , data = data ) # NHST ANOVA

 print( summary( aovresult ) )

 print( model.tables( aovresult , "means" ) , digits=4 )

 boxplot( meas ~ as.factor(group) , data = data , cex.axis=1.25 , ylab="Y")

 print( TukeyHSD( aovresult) )
 plot( TukeyHSD( aovresult))
	# Joseph R. Mihaljevic
	# July 2013
	# (Partial) Bayesian analysis of variance, accounting for heteroscedasticity
	# Generate some artificial data:
	# Normally distributed groups, but heteroscedastic
	a <- rnorm(25, mean=8, sd=10)
	b <- rnorm(50, mean=5, sd=2)
	c <- rnorm(25, mean=3, sd=.1)
	d <- rnorm(25, mean=11, sd=3)
	e <- rnorm(50, mean=13, sd=2)

	data <- data.frame(meas = c(a,b,c,d,e), group = rep(c(1,2,3,4,5), times=c(length(a), length(b), length(c), length(d), length(e))))
	lst <- split(data, data$group)

	nobs <- length(unique(data$group))
	levels <- data$group

	# JAGS needs a list of the data
	jags_d <- list(y=data$meas,
	levels=levels,
	nobs=nobs)


	# write model
	cat("
	model{
	## Specify hyperpriors and priors

	a1tau ~ dgamma(.1, .1)
	scale ~ dunif(0, 1)
	rate ~ dunif(0, 1)

	## Specify likelihood for ANOVA model
	for (i in 1:nobs){

	y[i] ~ dnorm(mu[i], tau[levels[i]]) #this creates a different tau for each level
	mu[i] <- a1[levels[i]]

	}

	## Specify the effects for each level
	for (j in 1:nobs){
	a1[j] ~ dnorm(0, a1tau)
	}
	## Specify variance for each level
	for (j in 1:nobs){
	tau[j] ~ dgamma(scale, rate)
	}

	## Specify effect contrasts:
	## You should follow the rules of independent contrasts.
	e1v2 <- a1[1]-a1[2]
	e1v3 <- a1[1]-a1[3]
	e1v4 <- a1[1]-a1[4]
	e1v5 <- a1[1]-a1[5]
	e2v3 <- a1[2]-a1[3]
	e2v4 <- a1[2]-a1[4]
	e2v5 <- a1[2]-a1[5]
	e3v4 <- a1[3]-a1[4]
	e3v5 <- a1[3]-a1[5]
	e4v5 <- a1[4]-a1[5]


	# back-transfrom tau so that you can visualze the standard deviation
	for (i in 1:nobs){
	sd[i] <- 1/sqrt(tau[i])
	}
	}
	",
	fill=TRUE, file="FakeANOVAuneqvar.txt")

	library(rjags)

	# initiate model
	mod1 <- jags.model(file="FakeANOVAuneqvar.txt",
	data=jags_d, n.chains=3, n.adapt=1000)


	# simulate the posterior
	out <- coda.samples(mod1, n.iter=2000,
	variable.names=c("a1", "tau", "a1tau",
	"e1v2", "e1v3", "e1v4", "e1v5",
	"e2v3", "e2v4", "e2v5",
	"e3v4", "e3v5",
	"e4v5",
	"sd"))

	str(out) # view the structure of the output object
	summary(out)
	windows()
	plot(out)

	#Visualize the means
	plot(out[, c("a1[1]","a1[2]","a1[3]")]) #you can plot the variables individually this way
	# Notice the differences in variance! (Look at the x-axis limits)

	#Visualize the standard deviation estimates
	plot(out[, c("sd[1]","sd[2]","sd[3]")])
	#Pretty accurate given our initial samples!

	#Now look at the pairwise contrasts
	plot(out[, c("e1v2", "e3v5")])
	#Notice that the difference between group 1 and 2 includes zero with relatively high
	#probability. Thus, these groups are not different in mean effect, accounting for
	#differences in group-level variance.
	# Groups 1&2 (their average) are clearly different from group 3.


	# Let's make some fancier graphs!
	library(ggmcmc)
	all_data <- ggs(out) #Create a data frame of all the simulations
	str(all_data) #Look at the structure

	#Subset out all of the means
	mus <- subset(all_data, Parameter=="a1[1]"\|Parameter=="a1[2]"\|Parameter=="a1[3]",
	select=c(Parameter, value))
	head(mus)

	library(ggplot2)

	#Re-label the group names so that it looks better on the plot
	levels(mus$Parameter)[1]
	for(i in 1:3){
	levels(mus$Parameter)[i] <- paste("Mean of Group", i, sep=" ")
	}
	means <- ggplot(mus, aes(x=value))+
	geom_density()+
	theme_bw()+
	facet_wrap(~Parameter, nrow=1, scales="free")+
	labs(x="Mean Value", y="Density")
	quartz(height=3, width=5.5)
	plot(means)

	#Look at the standard deviations
	sds <- subset(all_data, Parameter=="sd[1]"\|Parameter=="sd[2]"\|Parameter=="sd[3]",
	select=c(Parameter, value))
	head(sds)
	levels(sds$Parameter)
	for(i in 7:9){
	levels(sds$Parameter)[i] <- paste("Std. Dev., Group", i-6, sep=" ")
	}
	stddevs <- ggplot(sds, aes(x=value))+
	geom_density()+
	theme_bw()+
	facet_wrap(~Parameter, nrow=1, scales="free")+
	labs(x="Std. Dev. Value", y="Density")
	quartz(height=3, width=5.5)
	plot(stddevs)

	#Look at the differences in group means

	diffs <- subset(all_data, Parameter=="e1v2"\|Parameter=="e3v4",
	select=c(Parameter, value))
	head(diffs)
	levels(diffs$Parameter)

	levels(diffs$Parameter)[5] <- "Groups 1 v 2"
	levels(diffs$Parameter)[6] <- "Average of Groups 1 & 2 v 3"
	differences <- ggplot(diffs, aes(x=value))+
	geom_density()+
	theme_bw()+
	facet_wrap(~Parameter, nrow=1, scales="free")+
	labs(x="Effects (Differences)", y="Density")
	quartz(height=3, width=5.5)
	plot(differences)


	# NOTE:
	# This is a partial analysis. You will also have to do some model validation!

	# freq ANOVA
	aovresult = aov( meas ~ as.factor(group) , data = data ) # NHST ANOVA

	print( summary( aovresult ) )

	print( model.tables( aovresult , "means" ) , digits=4 )

	boxplot( meas ~ as.factor(group) , data = data , cex.axis=1.25 , ylab="Y")

	print( TukeyHSD( aovresult) )
	plot( TukeyHSD( aovresult))