Skip to content

Instantly share code, notes, and snippets.

@mike-lawrence
Created November 30, 2016 22:18
Show Gist options
  • Save mike-lawrence/a124e7ae144ecdab93f36f87ef8c513e to your computer and use it in GitHub Desktop.
Save mike-lawrence/a124e7ae144ecdab93f36f87ef8c513e to your computer and use it in GitHub Desktop.
Multiple comarisons & Stan
data {
#N: number of cases
int N ;
#: K: number of outcome variables
int K ;
#X: matrix of outcomes for each case & variable
matrix[N,K] X ;
#Y: outcomes
vector[N] Y;
}
parameters {
#intercept:
real intercept ;
#betas: effects of each predictor
vector[K] betas ;
#sigma: measurement error
real<lower=0> sigma ;
#tau: estimate of sd of betas
real<lower=0> tau ;
#nu: log(df) of betas
real<lower=0> nu ;
}
model {
#priors on population parameters; presumes data has been scale()ed
intercept ~ normal(0,1) ;
betas ~ student_t(exp(nu),0,tau) ;
sigma ~ weibull(2,1) ;
tau ~ weibull(2,1) ;
nu ~ normal(0,1);
#outcomes modelled as normal with mean determined by predictors and noise
Y ~ normal( intercept + X*betas , sigma ) ;
}
#Load packages ----
library(rstan)
rstan_options(auto_write = TRUE)
#Create some fake data ----
#K: number of predictors
K = 1e3
#N: number of outcome instances observed
N = K*2
#set seed for replicability
set.seed(1)
#X: matrix of predictors
X = matrix(
rnorm(N*K)
, nrow = N
, ncol = K
)
#Y: outcome is unrelated to any predictor
Y = rnorm(N)
#package the data for Stan
data_for_stan = list(
N = N
, K = K
, X = scale(X)
, Y = scale(Y)[,1]
)
#Try a frequentist regression ----
fit = lm( Y ~ X)
CIs = confint(fit)
sig = (CIs[,1]>0) | (CIs[,2]<0)
mean(sig) #false alarm rate, 5%-ish
#compile & sample the unpooled model ----
unpooled = rstan::stan_model('unpooled.stan')
post_unpooled = rstan::sampling(
object = unpooled
, data = data_for_stan
, seed = 1
, cores = 4
, iter = 500 # 500 takes about 5min
)
#compute proportion of betas that exclude zero
betas = data.frame(summary(post_unpooled,pars='betas',probs=c(.025,.975))$summary)
betas$sig = (betas$X2.5.>0) | (betas$X97.5.<0)
mean(betas$sig) #5%-ish
#compile & sample the partially-pooled model ----
partially_pooled = rstan::stan_model('partially_pooled.stan')
post_partially_pooled = rstan::sampling(
object = partially_pooled
, data = data_for_stan
, seed = 1
, cores = 4
, iter = 500 # 2 takes about 5min
)
#compute proportion of betas that exclude zero
betas = data.frame(summary(post_partially_pooled,pars='betas',probs=c(.025,.975))$summary)
betas$sig = (betas$X2.5.>0) | (betas$X97.5.<0)
mean(betas$sig) #0%-ish
#Add a predictor that does relate to the outcome ----
#set the random seed again for replicability (just in case)
set.seed(2)
#Y: outcome, associated with only the first predictor
Y = X[,1] + rnorm(N)
#repackage the data
data_for_stan = list(
N = N
, K = K
, X = scale(X)
, Y = scale(Y)[,1]
)
#Try the partially-pooled model again ----
post_partially_pooled = rstan::sampling(
object = partially_pooled
, data = data_for_stan
, seed = 1
, cores = 4
, iter = 500 # 500 takes about 3min
)
#compute proportion of betas that exclude zero
betas = data.frame(summary(post_partially_pooled,pars='betas',probs=c(.025,.975))$summary)
betas$sig = (betas$X2.5.>0) | (betas$X97.5.<0)
mean(betas$sig[2:nrow(betas)]) #2%-ish
#Compile & sample the heavy-tailed partially-pooled model ----
heavy_partially_pooled = rstan::stan_model('heavy_partially_pooled.stan')
post_heavy_partially_pooled = rstan::sampling(
object = heavy_partially_pooled
, data = data_for_stan
, seed = 3 #chain 3 gets stuck with seed=1
, cores = 4
, iter = 500 # 500 takes about 3min
)
#compute proportion of betas that exclude zero
betas = data.frame(summary(post_heavy_partially_pooled,pars='betas',probs=c(.025,.975))$summary)
betas$sig = (betas$X2.5.>0) | (betas$X97.5.<0)
mean(betas$sig[2:nrow(betas)]) #0%
betas[1,] #check the beta expected to exclude zero
#check ESS/rhat for tau & nu
print(
post_heavy_partially_pooled
, pars = c('tau','nu')
, probs = c(.025,.975)
, digits = 2
)
data {
#N: number of cases
int N ;
#: K: number of outcome variables
int K ;
#X: matrix of outcomes for each case & variable
matrix[N,K] X ;
#Y: outcomes
vector[N] Y;
}
parameters {
#intercept:
real intercept ;
#betas: effects of each predictor
vector[K] betas ;
#sigma: measurement error
real<lower=0> sigma ;
#tau: estimate of sd of betas
real<lower=0> tau ;
}
model {
#priors on population parameters; presumes data has been scale()ed
intercept ~ normal(0,1) ;
betas ~ normal(0,tau) ;
sigma ~ weibull(2,1) ;
tau ~ weibull(2,1) ;
#outcomes modelled as normal with mean determined by predictors and noise
Y ~ normal( intercept + X*betas , sigma ) ;
}
data {
#N: number of cases
int N ;
#: K: number of outcome variables
int K ;
#X: matrix of outcomes for each case & variable
matrix[N,K] X ;
#Y: outcomes
vector[N] Y;
}
parameters {
#intercept:
real intercept ;
#betas: effects of each predictor
vector[K] betas ;
#sigma: measurement error
real<lower=0> sigma ;
}
model {
#priors on population parameters; presumes data has been scale()ed
intercept ~ normal(0,1) ;
betas ~ normal(0,1) ;
sigma ~ weibull(2,1) ;
#outcomes modelled as normal with mean determined by predictors and noise
Y ~ normal( intercept + X*betas , sigma ) ;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment