Created
March 21, 2017 15:10
-
-
Save mike-lawrence/dd2435f290a567bd1fd03370ee669688 to your computer and use it in GitHub Desktop.
Demo using Stan for Confirmatory Factor Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data{ | |
# n_subj: number of subjects | |
int n_subj ; | |
# n_y: number of outcomes | |
int n_y ; | |
# y: matrix of outcomes | |
matrix[n_subj,n_y] y ; | |
# n_fac: number of latent factors | |
int n_fac ; | |
# y_fac: list of which factor is associated with each outcome | |
int<lower=1,upper=n_fac> y_fac[n_y] ; | |
} | |
transformed data{ | |
# scaled_y: z-scaled outcomes | |
matrix[n_subj,n_y] scaled_y ; | |
for(i in 1:n_y){ | |
scaled_y[,i] = (y[,i]-mean(y[,i])) / sd(y[,i]) ; | |
} | |
} | |
parameters{ | |
# normal01: a helper variable for more efficient non-centered multivariate | |
# sampling of subj_facs | |
matrix[n_fac, n_subj] normal01; | |
# fac_cor_helper: correlations (on cholesky factor scale) amongst latent | |
# factors | |
cholesky_factor_corr[n_fac] fac_cor_helper ; | |
# scaled_y_means: means for each outcome | |
vector[n_y] scaled_y_means ; | |
# scaled_y_noise: measurement noise for each outcome | |
vector<lower=0>[n_y] scaled_y_noise ; | |
# betas: how each factor loads on to each outcome. | |
# Must be postive for identifiability. | |
vector<lower=0>[n_y] betas ; | |
} | |
transformed parameters{ | |
# subj_facs: matrix of values for each factor associated with each subject | |
matrix[n_subj,n_fac] subj_facs ; | |
# the following calculation implies that rows of subj_facs are sampled from | |
# a multivariate normal distribution with mean of 0 and sd of 1 in all | |
# dimensions and a correlation matrix of fac_cor. | |
subj_facs = transpose(fac_cor_helper * normal01) ; | |
} | |
model{ | |
# normal01 must have normal(0,1) prior for "non-centered" parameterization | |
# on the multivariate distribution of latent factors | |
to_vector(normal01) ~ normal(0,1) ; | |
# flat prior on correlations | |
fac_cor_helper ~ lkj_corr_cholesky(1) ; | |
# normal prior on y means | |
scaled_y_means ~ normal(0,1) ; | |
# weibull prior on measurement noise | |
scaled_y_noise ~ weibull(2,1) ; | |
# normal(0,1) priors on betas | |
betas ~ normal(0,1) ; | |
# each outcome as normal, influenced by its associated latent factor | |
for(i in 1:n_y){ | |
scaled_y[,i] ~ normal( | |
scaled_y_means[i] + subj_facs[,y_fac[i]] * betas[i] | |
, scaled_y_noise[i] | |
) ; | |
} | |
} | |
generated quantities{ | |
#cors: correlations amongst within-subject predictors' effects | |
corr_matrix[n_fac] fac_cor ; | |
# y_means: outcome means on the original scale of Y | |
vector[n_y] y_means ; | |
# y_noise: outcome noise on the original scale of Y | |
vector[n_y] y_noise ; | |
fac_cor = multiply_lower_tri_self_transpose(fac_cor_helper) ; | |
for(i in 1:n_y){ | |
y_means[i] = scaled_y_means[i]*sd(y[,i]) + mean(y[,i]) ; | |
y_noise[i] = scaled_y_noise[i]*sd(y[,i]) ; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rstan) | |
rstan_options(auto_write = TRUE) | |
# get some data from the lavaan package | |
# install.packages('lavaan') | |
library(lavaan) | |
data(HolzingerSwineford1939) | |
# a visual factor measured by 3 variables: x1, x2 and x3 | |
# a textual factor measured by 3 variables: x4, x5 and x6 | |
# a speed factor measured by 3 variables: x7, x8 and x9 | |
#subset the data down to a few columns | |
HolzingerSwineford1939 %>% | |
dplyr::select( | |
x1,x2,x3,x4,x5,x6,x7,x8,x9 | |
) -> dat | |
#prep the list of data to pass to Stan | |
data_for_stan = list( | |
# n_y: number of outcomes | |
n_y = ncol(dat) | |
# n_subj: number of subjects | |
, n_subj = nrow(dat) | |
# y: matrix of outcomes | |
, y = as.matrix(dat) | |
# n_fac: number of latent factors | |
, n_fac = 3 | |
# y_fac: list of which factor is associated with each outcome | |
, y_fac = c(1,1,1,2,2,2,3,3,3) | |
) | |
# Sample the model | |
post = rstan::stan( | |
file = 'cfa.stan' | |
, data = data_for_stan | |
, seed = 1 | |
, cores = 4 | |
, chains = 4 | |
, iter = 2e3 | |
, pars = c('normal01','fac_cor_helper') | |
, include = FALSE | |
) | |
# betas: magnitude of relationship between each outcome & it's associated | |
# latent factor | |
print( | |
post | |
, pars = 'betas' | |
, probs = c(.025,.975) | |
, digits = 2 | |
, use_cache = FALSE | |
) | |
# fac_cor: correlations amongst latent factors | |
print( | |
post | |
, pars = 'fac_cor' | |
, probs = c(.025,.975) | |
, digits = 2 | |
, use_cache = FALSE | |
) | |
# y_means: outcome means | |
print( | |
post | |
, pars = 'y_means' | |
, probs = c(.025,.975) | |
, digits = 2 | |
, use_cache = FALSE | |
) | |
# y_means: outcome measurement noise | |
print( | |
post | |
, pars = 'y_noise' | |
, probs = c(.025,.975) | |
, digits = 2 | |
, use_cache = FALSE | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment