mike-lawrence · February 26, 2016 14:46
diff --git a/hierarchical_gaussian_rstan.R b/hierarchical_gaussian_rstan.R
 ########
 # Define a data generating function and generate some data
 ########
 library(MASS)
 generate_data = function(
 	n # number of units of observation (ex. human participants in a cognitive science experiment)
 	, k # number of observations made within each condition within each unit
 	, noise # standard deviation of measurement noise
 	, I # population intercept
 	, sI # across-units standard deviation of intercepts
 	, A # population A effect
 	, sA # across-units standard deviation of A effects
 	, rIA # across-units correlation between intercepts and A effects
 ){
 	vI = sI^2
 	vA = sA^2
 	Sigma = c(
 		vI , sqrt(vI*vA)*rIA
 		, sqrt(vI*vA)*rIA , vA
 	)
 	Sigma = matrix(Sigma,2,2)
 	means = mvrnorm(n,c(I,A),Sigma)
 	temp = expand.grid(A=factor(c('a1','a2')),value=0)
 	contrasts(temp$A) = c(-.5,.5)
 	from_terms = terms(value~A)
 	mm = model.matrix(from_terms,temp)
 	data = expand.grid(A=c('a1','a2'),unit=1:n,trial=1:k)
 	for(i in 1:n){
 		data$value[data$unit==i] = as.numeric(mm %*% means[i,])
 	}
 	data$value = rnorm(nrow(data),data$value,noise)
 	data$unit = factor(data$unit)
 	data$A = factor(data$A)
 	contrasts(data$A) = c(-.5,.5)
 	return(data)
 }

 #set a random seed for replicability
 set.seed(1)

 #generate some data
 data = generate_data(
 	n = 100
 	, k = 100
 	, noise = 100
 	, I = 500
 	, sI = 100
 	, A = 20
 	, sA = 10
 	, rIA = .5
 )

 #show what the data looks like
 print(head(data))
 print(str(data))


 ########
 # Prepare data for stan
 ########

 #recode factor variable "A" to numeric value indicating level number (a1=1, a2=2)
 data$A_num = as.numeric(data$A)

 #recode factor variable unit to numeric value indicating level number
 data$unit_num = as.numeric(data$unit)

 print(head(data))
 print(str(data))

 dat = list(
 	N = length(unique(data$unit))
 	, L = nrow(data)
 	, unit = data$unit_num
 	, condition = data$A_num
 	, y = data$value
 )

 ########
 # Specify the stan model
 ########

 stan_code = '
 data {
 	int N;
 	int L;
 	int unit[L];
 	int condition[L];
 	real y[L];
 }
 transformed data {
 	vector[2] zero_vector; // dummy parameter to employ the "Matt trick" for multivariate models
 	real y_mean; // to store the mean of the data, which in turn lets us use a zero-centered "shift" parameter
 	for (k in 1:2) {
 		zero_vector[k] <- 0;
 	}
 	y_mean <- mean(y);
 }
 parameters {
 	// define the parameters and their domains
 	real shift; // "trick" parameter representing the shift from y_mean to mu (see transformed parameters below)
 	real mu_effect; // difference between conditions
 	real<lower=0> sigma; // standard deviation of measurement noise
 	real<lower=0> tau[2]; // across-units standard deviations in mu & mu_effect
 	corr_matrix[2] Tau; // across-units correlation matrix for deviations in mu & mu_effect
 	vector[2] e_beta[N]; // dummy parameter to employ the "Matt trick" for multivariate models
 }
 transformed parameters {
 	real mu; // the intercept of the model
 	vector[2] beta[N]; // intermediate parameter, for employing the "Matt trick"
 	vector[2] n_mu[N]; // mu for each participant in each condition
 	mu <- y_mean + shift; // this helps search because the algorithm can search for shift values around zero
 	for (n in 1:N) { // compute the sampled mu values for each person-by-condition
 		beta[n,1] <- mu + e_beta[n,1] * tau[1]; // rescale the "Matt trick"ed parameters to have mean of mu and sd of tau[1]
 		beta[n,2] <- mu_effect + e_beta[n,2] * tau[2]; // rescale the "Matt trick"ed parameters to have mean of mu_effect and sd of tau[2]
 		n_mu[n,1] <- beta[n,1]-beta[n,2]/2; // condition 1 mu = intercept - effect/2
 		n_mu[n,2] <- beta[n,1]+beta[n,2]/2; // condition 2 mu = intercept + effect/2
 	}
 }
 model {
 	//First, define a temporary variable
 	vector[L] mu_temp;
 	// Next, specify the model priors (Note: implicit uniform priors on tau[1], tau[2], and Tau[1,2])
 	mu ~ normal(0,100); // Gaussian prior on mu
 	mu_effect ~ normal(0,100); //Gaussian prior on mu_effect
 	sigma ~ lognormal(4,1); //logNormal prior on sigma
 	// Sample the multivariate for the "Matt trick"
 	for (n in 1:N){
 		e_beta[n] ~ multi_normal(zero_vector, Tau);
 	}
 	// iterate through the unit/condition data to generate values for mu_temp from n_mu
 	for(i in 1:L){
 		mu_temp[i] <- n_mu[unit[i],condition[i]];
 	}
 	// now define y as a gaussian function of mu_temp as mean and sigma as sd
 	y ~ normal(mu_temp, sigma);
 }
 '

 ########
 # Run the model in stan
 ########
 library(rstan)

 #set the compiler mode to "fast"
 set_cppo(mode = "fast")

 #set a random seed for replicability
 set.seed(1) 

 #Compile the model
 start = proc.time()[3]
 initial_fit <- stan(
 	model_code = stan_code
 	, data = dat
 	, iter = 1
 	, chains = 1
 	, pars = c('mu','mu_effect','sigma','tau','Tau')
 )
 print(proc.time()[3]-start)


 #now run the chains in multi-core (requires that you be running from Terminal)
 library(doMC)
 options(cores=4) #set this appropriate to your system
 registerDoMC()
 f <- function(i){
 	fit <- stan(
 		fit = initial_fit
 		, data = dat
 		, iter = 10000
 		, chains = 1
 		, verbose = FALSE
 		# , refresh = -1
 		, pars = c('mu','mu_effect','sigma','tau','Tau')
 	)
 	save(fit,file=paste(i,'.RData',sep='')) #save output of each chain in case some take longer than others (permits you to check the finished chains)
 	return(fit)	
 }
 start = proc.time()[3]
 parallel_fit <- foreach(i = 1:4) %dopar% f(i)
 print(proc.time()[3]-start)


 #visualize the samples obtained
 library(plyr)
 i=1
 samples = ldply(
 	.data = parallel_fit
 	, .fun = function(x){
 		x = attr(x,'sim')$samples
 		x = data.frame(x)
 		x = x[,names(x)%in%c('mu','mu_effect','sigma','tau.1.','tau.2.','Tau.1,2.')]
 		x$iteration = 1:nrow(x)
 		x$chain = i
 		i <<- i+1
 		return(x)
 	}
 )

 #reformat to long
 n = names(samples)[1:(ncol(samples)-2)]
 samples_long = reshape(samples,direction='long',varying=n,v.names='value',times=n,timevar='var')
 row.names(samples_long) = 1:nrow(samples_long)


 #visualize the samples
 ggplot(
 	data = samples_long[samples_long$iteration>(max(samples_long$iteration)/2),]
 	, mapping = aes(
 		x = value
 		, fill = I(factor(chain))
 	)
 )+
 geom_density(
 	position = 'identity'
 	, alpha = .5
 	, colour = 'transparent'
 	# , fill = 'black'
 )+
 facet_wrap(
 	~ var
 	, scales = 'free'
 )


 #alternative approach to combining results from parallel chains that makes things directly visualizable via ggmcmc
 library(ggmcmc)
 library(coda) #for mcmc.list
 source('stanfit-class.R') #available from: https://code.google.com/p/stan/source/browse/rstan/rstan/R/stanfit-class.R
 source('misc.R') #available from: https://code.google.com/p/stan/source/browse/rstan/rstan/R/misc.R
 all_chains = sflist2stanfit(parallel_fit)
 s <- as.array(all_chains)
 included_parameters <- c('mu','mu_effect','sigma','tau[1]','tau[2]','Tau[1,2]')
 s.list=mcmc.list(list(
 	"chain:1"=mcmc(s[,1,included_parameters])
 	,"chain:2"=mcmc(s[,2,included_parameters])
 	,"chain:3"=mcmc(s[,3,included_parameters])
 	,"chain:4"=mcmc(s[,4,included_parameters])
 ))
 S <- ggs(s.list)
 ggmcmc(S,param.page=6)
	########
	# Define a data generating function and generate some data
	########
	library(MASS)
	generate_data = function(
	n # number of units of observation (ex. human participants in a cognitive science experiment)
	, k # number of observations made within each condition within each unit
	, noise # standard deviation of measurement noise
	, I # population intercept
	, sI # across-units standard deviation of intercepts
	, A # population A effect
	, sA # across-units standard deviation of A effects
	, rIA # across-units correlation between intercepts and A effects
	){
	vI = sI^2
	vA = sA^2
	Sigma = c(
	vI , sqrt(vIvA)rIA
	, sqrt(vIvA)rIA , vA
	)
	Sigma = matrix(Sigma,2,2)
	means = mvrnorm(n,c(I,A),Sigma)
	temp = expand.grid(A=factor(c('a1','a2')),value=0)
	contrasts(temp$A) = c(-.5,.5)
	from_terms = terms(value~A)
	mm = model.matrix(from_terms,temp)
	data = expand.grid(A=c('a1','a2'),unit=1:n,trial=1:k)
	for(i in 1:n){
	data$value[data$unit==i] = as.numeric(mm %*% means[i,])
	}
	data$value = rnorm(nrow(data),data$value,noise)
	data$unit = factor(data$unit)
	data$A = factor(data$A)
	contrasts(data$A) = c(-.5,.5)
	return(data)
	}

	#set a random seed for replicability
	set.seed(1)

	#generate some data
	data = generate_data(
	n = 100
	, k = 100
	, noise = 100
	, I = 500
	, sI = 100
	, A = 20
	, sA = 10
	, rIA = .5
	)

	#show what the data looks like
	print(head(data))
	print(str(data))


	########
	# Prepare data for stan
	########

	#recode factor variable "A" to numeric value indicating level number (a1=1, a2=2)
	data$A_num = as.numeric(data$A)

	#recode factor variable unit to numeric value indicating level number
	data$unit_num = as.numeric(data$unit)

	print(head(data))
	print(str(data))

	dat = list(
	N = length(unique(data$unit))
	, L = nrow(data)
	, unit = data$unit_num
	, condition = data$A_num
	, y = data$value
	)

	########
	# Specify the stan model
	########

	stan_code = '
	data {
	int N;
	int L;
	int unit[L];
	int condition[L];
	real y[L];
	}
	transformed data {
	vector[2] zero_vector; // dummy parameter to employ the "Matt trick" for multivariate models
	real y_mean; // to store the mean of the data, which in turn lets us use a zero-centered "shift" parameter
	for (k in 1:2) {
	zero_vector[k] <- 0;
	}
	y_mean <- mean(y);
	}
	parameters {
	// define the parameters and their domains
	real shift; // "trick" parameter representing the shift from y_mean to mu (see transformed parameters below)
	real mu_effect; // difference between conditions
	real<lower=0> sigma; // standard deviation of measurement noise
	real<lower=0> tau[2]; // across-units standard deviations in mu & mu_effect
	corr_matrix[2] Tau; // across-units correlation matrix for deviations in mu & mu_effect
	vector[2] e_beta[N]; // dummy parameter to employ the "Matt trick" for multivariate models
	}
	transformed parameters {
	real mu; // the intercept of the model
	vector[2] beta[N]; // intermediate parameter, for employing the "Matt trick"
	vector[2] n_mu[N]; // mu for each participant in each condition
	mu <- y_mean + shift; // this helps search because the algorithm can search for shift values around zero
	for (n in 1:N) { // compute the sampled mu values for each person-by-condition
	beta[n,1] <- mu + e_beta[n,1] * tau[1]; // rescale the "Matt trick"ed parameters to have mean of mu and sd of tau[1]
	beta[n,2] <- mu_effect + e_beta[n,2] * tau[2]; // rescale the "Matt trick"ed parameters to have mean of mu_effect and sd of tau[2]
	n_mu[n,1] <- beta[n,1]-beta[n,2]/2; // condition 1 mu = intercept - effect/2
	n_mu[n,2] <- beta[n,1]+beta[n,2]/2; // condition 2 mu = intercept + effect/2
	}
	}
	model {
	//First, define a temporary variable
	vector[L] mu_temp;
	// Next, specify the model priors (Note: implicit uniform priors on tau[1], tau[2], and Tau[1,2])
	mu ~ normal(0,100); // Gaussian prior on mu
	mu_effect ~ normal(0,100); //Gaussian prior on mu_effect
	sigma ~ lognormal(4,1); //logNormal prior on sigma
	// Sample the multivariate for the "Matt trick"
	for (n in 1:N){
	e_beta[n] ~ multi_normal(zero_vector, Tau);
	}
	// iterate through the unit/condition data to generate values for mu_temp from n_mu
	for(i in 1:L){
	mu_temp[i] <- n_mu[unit[i],condition[i]];
	}
	// now define y as a gaussian function of mu_temp as mean and sigma as sd
	y ~ normal(mu_temp, sigma);
	}
	'

	########
	# Run the model in stan
	########
	library(rstan)

	#set the compiler mode to "fast"
	set_cppo(mode = "fast")

	#set a random seed for replicability
	set.seed(1)

	#Compile the model
	start = proc.time()[3]
	initial_fit <- stan(
	model_code = stan_code
	, data = dat
	, iter = 1
	, chains = 1
	, pars = c('mu','mu_effect','sigma','tau','Tau')
	)
	print(proc.time()[3]-start)


	#now run the chains in multi-core (requires that you be running from Terminal)
	library(doMC)
	options(cores=4) #set this appropriate to your system
	registerDoMC()
	f <- function(i){
	fit <- stan(
	fit = initial_fit
	, data = dat
	, iter = 10000
	, chains = 1
	, verbose = FALSE
	# , refresh = -1
	, pars = c('mu','mu_effect','sigma','tau','Tau')
	)
	save(fit,file=paste(i,'.RData',sep='')) #save output of each chain in case some take longer than others (permits you to check the finished chains)
	return(fit)
	}
	start = proc.time()[3]
	parallel_fit <- foreach(i = 1:4) %dopar% f(i)
	print(proc.time()[3]-start)


	#visualize the samples obtained
	library(plyr)
	i=1
	samples = ldply(
	.data = parallel_fit
	, .fun = function(x){
	x = attr(x,'sim')$samples
	x = data.frame(x)
	x = x[,names(x)%in%c('mu','mu_effect','sigma','tau.1.','tau.2.','Tau.1,2.')]
	x$iteration = 1:nrow(x)
	x$chain = i
	i <<- i+1
	return(x)
	}
	)

	#reformat to long
	n = names(samples)[1:(ncol(samples)-2)]
	samples_long = reshape(samples,direction='long',varying=n,v.names='value',times=n,timevar='var')
	row.names(samples_long) = 1:nrow(samples_long)


	#visualize the samples
	ggplot(
	data = samples_long[samples_long$iteration>(max(samples_long$iteration)/2),]
	, mapping = aes(
	x = value
	, fill = I(factor(chain))
	)
	)+
	geom_density(
	position = 'identity'
	, alpha = .5
	, colour = 'transparent'
	# , fill = 'black'
	)+
	facet_wrap(
	~ var
	, scales = 'free'
	)


	#alternative approach to combining results from parallel chains that makes things directly visualizable via ggmcmc
	library(ggmcmc)
	library(coda) #for mcmc.list
	source('stanfit-class.R') #available from: https://code.google.com/p/stan/source/browse/rstan/rstan/R/stanfit-class.R
	source('misc.R') #available from: https://code.google.com/p/stan/source/browse/rstan/rstan/R/misc.R
	all_chains = sflist2stanfit(parallel_fit)
	s <- as.array(all_chains)
	included_parameters <- c('mu','mu_effect','sigma','tau[1]','tau[2]','Tau[1,2]')
	s.list=mcmc.list(list(
	"chain:1"=mcmc(s[,1,included_parameters])
	,"chain:2"=mcmc(s[,2,included_parameters])
	,"chain:3"=mcmc(s[,3,included_parameters])
	,"chain:4"=mcmc(s[,4,included_parameters])
	))
	S <- ggs(s.list)
	ggmcmc(S,param.page=6)