-
-
Save rmcelreath/68fb40bf7a3e73bf2623e10a5973216d to your computer and use it in GitHub Desktop.
| # proxy confounder problem | |
| # cf https://twitter.com/y2silence/status/1251151157264674816 | |
| # note that I rename X1,X2 to A,B and A to X | |
| # The DAG: | |
| # X -> Y | |
| # X <- U -> Y | |
| # A <- U -> B | |
| # Can we use A and B to deconfound X->Y? | |
| # Depends upon functions relating the variables | |
| # But in linear/gaussian case, progress possible | |
| # simulate | |
| N <- 500 | |
| U <- rnorm(N) | |
| X <- rnorm(N, U ) | |
| Y <- rnorm(N, 0.5*X - U ) | |
| A <- rnorm(N, U ) | |
| B <- rnorm(N, U ) | |
| # basic regression says what? | |
| library(rethinking) | |
| precis( lm(Y ~ X) ) # confounded naturally | |
| # now full bayes model of SCM | |
| # missing data version - works remarkably well | |
| # treat each U as unobserved and assign it a parameter | |
| # rest is just the data generating model | |
| # techinical issue here is that U values can flip pos/neg across chains and then the coefficients flip pos/neg to match | |
| # but bXY seems to work fine despite this | |
| stan_code <- " | |
| data{ | |
| int N; | |
| vector[N] Y; | |
| vector[N] X; | |
| vector[N] A; | |
| vector[N] B; | |
| } | |
| parameters{ | |
| vector[4] a; | |
| real bXY; | |
| real bUX; | |
| real bUY; | |
| real bUA; | |
| real<upper=bUA> bUB; | |
| vector<lower=0>[4] sigma; | |
| vector[N] U; | |
| } | |
| model{ | |
| vector[N] muY; | |
| vector[N] muX; | |
| vector[N] muA; | |
| vector[N] muB; | |
| sigma ~ exponential( 1 ); | |
| bXY ~ normal( 0 , 1 ); | |
| bUY ~ normal( 0 , 1 ); | |
| bUX ~ normal( 0 , 1 ); | |
| bUA ~ normal( 0 , 1 ); | |
| bUB ~ normal( 0 , 1 ); | |
| a ~ normal( 0 , 1 ); | |
| U ~ normal( 0 , 1 ); | |
| // B <- U | |
| for ( i in 1:500 ) | |
| muB[i] = a[4] + bUB*U[i]; | |
| B ~ normal( muB , sigma[4] ); | |
| // A <- U | |
| for ( i in 1:500 ) | |
| muA[i] = a[3] + bUA*U[i]; | |
| A ~ normal( muA , sigma[3] ); | |
| // X <- U | |
| for ( i in 1:500 ) | |
| muX[i] = a[2] + bUX*U[i]; | |
| X ~ normal( muX , sigma[2] ); | |
| // U -> Y <- X | |
| for ( i in 1:500 ) | |
| muY[i] = a[1] + bXY*X[i] + bUY*U[i]; | |
| Y ~ normal( muY , sigma[1] ); | |
| } | |
| " | |
| dat <- list(N=N,Y=Y,X=X,A=A,B=B) | |
| ms <- stan( model_code=stan_code , data=dat , chains=1 , cores=4 , control=list(adapt_delta=0.99) , iter=4000 ) | |
| precis(ms) | |
| # covariance version marginalizing over unobserved U | |
| # need to express paths in the covariance matrix | |
| # this has pos/neg flipping issues with all the correlations with U | |
| # but again gets the X -> Y right | |
| stan_marginal <- " | |
| data{ | |
| int N; | |
| vector[N] B; | |
| vector[N] A; | |
| vector[N] Y; | |
| vector[N] X; | |
| } | |
| transformed data{ | |
| vector[4] YY[N]; | |
| for ( j in 1:N ) YY[j] = [ Y[j] , X[j] , A[j] , B[j] ]'; | |
| } | |
| parameters{ | |
| real aB; | |
| real aA; | |
| real aX; | |
| real aY; | |
| real rXY; | |
| real rUX; | |
| real rUA; | |
| real rUB; | |
| real rUY; | |
| vector<lower=0>[4] s; // std dev of each obs var | |
| } | |
| model{ | |
| vector[N] muY; | |
| matrix[4,4] SIGMA; | |
| s ~ exponential( 1 ); | |
| rXY ~ normal( 0 , 1 ); | |
| rUY ~ normal( 0 , 1 ); | |
| rUX ~ normal( 0 , 1 ); | |
| rUA ~ normal( 0 , 1 ); | |
| rUB ~ normal( 0 , 1 ); | |
| aY ~ normal( 0 , 1 ); | |
| aX ~ normal( 0 , 1 ); | |
| aA ~ normal( 0 , 1 ); | |
| aB ~ normal( 0 , 1 ); | |
| // build covariance matrix from path model | |
| for( i in 1:4 ) SIGMA[i,i] = s[i]^2; | |
| SIGMA[1,2] = rUY*rUX + rXY; // YX | |
| SIGMA[1,3] = rUY*rUA; // YA | |
| SIGMA[1,4] = rUY*rUB; // YB | |
| SIGMA[2,3] = rUX*rUA; // XA | |
| SIGMA[2,4] = rUX*rUB; // XB | |
| SIGMA[3,4] = rUA*rUB; // AB | |
| for ( i in 1:3 ) for ( j in (i+1):4 ) SIGMA[j,i] = SIGMA[i,j]; | |
| { | |
| vector[4] MU; | |
| MU = [ aY , aX , aA , aB ]'; | |
| YY ~ multi_normal( MU , SIGMA ); | |
| } | |
| } | |
| " | |
| # priors not constrained to produce positive definite covariance matrix | |
| # chains will sputter while trying to sample valid initial matrix | |
| # some inits could help with adaptation | |
| m2s <- stan( model_code=stan_marginal , data=list( N=N , Y=Y , X=X , A=A, B=B ) , chains=1, cores=4 ) | |
| precis(m2s) | |
| # blavaan | |
| # works but priors are quite different | |
| library(blavaan) | |
| m4 <- 'U =~ A + B + X + Y | |
| Y ~ X' | |
| mb <- bsem( m4, data=data.frame( Y=Y , X=X , A=A, B=B ) , bcontrol=list(cores=4) ) | |
| summary(mb) | |
| stancode( mb@external$mcmcout ) # this is unreadable |
If
So, if
Thanks, sounds tricky but makes sense. One other clarification:
Stan continues to run but it is not drawing from the posterior distribution that it would draw from if you incorporated the truncation term.
I see two possible implications:
- The priors you think you are using are not the priors you are actually using.
- The posterior might be improper, because you are using an improper prior.
Is that right, and am I missing other implications?
I don't think (2) is an issue because if you had priors that integrated to 1 before considering that the positive definiteness constraint took a bite out of the parameter space, then whatever is left integrates to some non-constant that is less than 1. But there is also (3) that Stan might not be able to sample efficiently enough for the MCMC CLT to hold.

Ben, say that I want a prior for a 4x4 covariance matrix, except the model specification requires that two off-diagonal entries must be fixed to 0 (say, sigma_12 and sigma_34). What do you do to enforce positive definite there? Something like lkj would be over the whole correlation matrix, without fixed 0s. I typically use a beta prior for each individual correlation, where beta has support on (-1,1), but I can see that possibly being problematic.