Created
November 30, 2016 22:18
-
-
Save mike-lawrence/a124e7ae144ecdab93f36f87ef8c513e to your computer and use it in GitHub Desktop.
Multiple comarisons & Stan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
#N: number of cases | |
int N ; | |
#: K: number of outcome variables | |
int K ; | |
#X: matrix of outcomes for each case & variable | |
matrix[N,K] X ; | |
#Y: outcomes | |
vector[N] Y; | |
} | |
parameters { | |
#intercept: | |
real intercept ; | |
#betas: effects of each predictor | |
vector[K] betas ; | |
#sigma: measurement error | |
real<lower=0> sigma ; | |
#tau: estimate of sd of betas | |
real<lower=0> tau ; | |
#nu: log(df) of betas | |
real<lower=0> nu ; | |
} | |
model { | |
#priors on population parameters; presumes data has been scale()ed | |
intercept ~ normal(0,1) ; | |
betas ~ student_t(exp(nu),0,tau) ; | |
sigma ~ weibull(2,1) ; | |
tau ~ weibull(2,1) ; | |
nu ~ normal(0,1); | |
#outcomes modelled as normal with mean determined by predictors and noise | |
Y ~ normal( intercept + X*betas , sigma ) ; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Load packages ---- | |
library(rstan) | |
rstan_options(auto_write = TRUE) | |
#Create some fake data ---- | |
#K: number of predictors | |
K = 1e3 | |
#N: number of outcome instances observed | |
N = K*2 | |
#set seed for replicability | |
set.seed(1) | |
#X: matrix of predictors | |
X = matrix( | |
rnorm(N*K) | |
, nrow = N | |
, ncol = K | |
) | |
#Y: outcome is unrelated to any predictor | |
Y = rnorm(N) | |
#package the data for Stan | |
data_for_stan = list( | |
N = N | |
, K = K | |
, X = scale(X) | |
, Y = scale(Y)[,1] | |
) | |
#Try a frequentist regression ---- | |
fit = lm( Y ~ X) | |
CIs = confint(fit) | |
sig = (CIs[,1]>0) | (CIs[,2]<0) | |
mean(sig) #false alarm rate, 5%-ish | |
#compile & sample the unpooled model ---- | |
unpooled = rstan::stan_model('unpooled.stan') | |
post_unpooled = rstan::sampling( | |
object = unpooled | |
, data = data_for_stan | |
, seed = 1 | |
, cores = 4 | |
, iter = 500 # 500 takes about 5min | |
) | |
#compute proportion of betas that exclude zero | |
betas = data.frame(summary(post_unpooled,pars='betas',probs=c(.025,.975))$summary) | |
betas$sig = (betas$X2.5.>0) | (betas$X97.5.<0) | |
mean(betas$sig) #5%-ish | |
#compile & sample the partially-pooled model ---- | |
partially_pooled = rstan::stan_model('partially_pooled.stan') | |
post_partially_pooled = rstan::sampling( | |
object = partially_pooled | |
, data = data_for_stan | |
, seed = 1 | |
, cores = 4 | |
, iter = 500 # 2 takes about 5min | |
) | |
#compute proportion of betas that exclude zero | |
betas = data.frame(summary(post_partially_pooled,pars='betas',probs=c(.025,.975))$summary) | |
betas$sig = (betas$X2.5.>0) | (betas$X97.5.<0) | |
mean(betas$sig) #0%-ish | |
#Add a predictor that does relate to the outcome ---- | |
#set the random seed again for replicability (just in case) | |
set.seed(2) | |
#Y: outcome, associated with only the first predictor | |
Y = X[,1] + rnorm(N) | |
#repackage the data | |
data_for_stan = list( | |
N = N | |
, K = K | |
, X = scale(X) | |
, Y = scale(Y)[,1] | |
) | |
#Try the partially-pooled model again ---- | |
post_partially_pooled = rstan::sampling( | |
object = partially_pooled | |
, data = data_for_stan | |
, seed = 1 | |
, cores = 4 | |
, iter = 500 # 500 takes about 3min | |
) | |
#compute proportion of betas that exclude zero | |
betas = data.frame(summary(post_partially_pooled,pars='betas',probs=c(.025,.975))$summary) | |
betas$sig = (betas$X2.5.>0) | (betas$X97.5.<0) | |
mean(betas$sig[2:nrow(betas)]) #2%-ish | |
#Compile & sample the heavy-tailed partially-pooled model ---- | |
heavy_partially_pooled = rstan::stan_model('heavy_partially_pooled.stan') | |
post_heavy_partially_pooled = rstan::sampling( | |
object = heavy_partially_pooled | |
, data = data_for_stan | |
, seed = 3 #chain 3 gets stuck with seed=1 | |
, cores = 4 | |
, iter = 500 # 500 takes about 3min | |
) | |
#compute proportion of betas that exclude zero | |
betas = data.frame(summary(post_heavy_partially_pooled,pars='betas',probs=c(.025,.975))$summary) | |
betas$sig = (betas$X2.5.>0) | (betas$X97.5.<0) | |
mean(betas$sig[2:nrow(betas)]) #0% | |
betas[1,] #check the beta expected to exclude zero | |
#check ESS/rhat for tau & nu | |
print( | |
post_heavy_partially_pooled | |
, pars = c('tau','nu') | |
, probs = c(.025,.975) | |
, digits = 2 | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
#N: number of cases | |
int N ; | |
#: K: number of outcome variables | |
int K ; | |
#X: matrix of outcomes for each case & variable | |
matrix[N,K] X ; | |
#Y: outcomes | |
vector[N] Y; | |
} | |
parameters { | |
#intercept: | |
real intercept ; | |
#betas: effects of each predictor | |
vector[K] betas ; | |
#sigma: measurement error | |
real<lower=0> sigma ; | |
#tau: estimate of sd of betas | |
real<lower=0> tau ; | |
} | |
model { | |
#priors on population parameters; presumes data has been scale()ed | |
intercept ~ normal(0,1) ; | |
betas ~ normal(0,tau) ; | |
sigma ~ weibull(2,1) ; | |
tau ~ weibull(2,1) ; | |
#outcomes modelled as normal with mean determined by predictors and noise | |
Y ~ normal( intercept + X*betas , sigma ) ; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
#N: number of cases | |
int N ; | |
#: K: number of outcome variables | |
int K ; | |
#X: matrix of outcomes for each case & variable | |
matrix[N,K] X ; | |
#Y: outcomes | |
vector[N] Y; | |
} | |
parameters { | |
#intercept: | |
real intercept ; | |
#betas: effects of each predictor | |
vector[K] betas ; | |
#sigma: measurement error | |
real<lower=0> sigma ; | |
} | |
model { | |
#priors on population parameters; presumes data has been scale()ed | |
intercept ~ normal(0,1) ; | |
betas ~ normal(0,1) ; | |
sigma ~ weibull(2,1) ; | |
#outcomes modelled as normal with mean determined by predictors and noise | |
Y ~ normal( intercept + X*betas , sigma ) ; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment