Last active
October 17, 2023 15:02
-
-
Save stonegold546/d2e1e7d99e7f95cf5c4a25956fbad3b3 to your computer and use it in GitHub Desktop.
Stan example with left-censored predictor at 0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rstan) | |
options(mc.cores = parallel::detectCores()) | |
rstan_options(auto_write = TRUE) | |
# Simulate an example | |
set.seed(1234) | |
hist(x_true <- rt(2e2, 3) * 1.5) | |
# Create outcome | |
hist(y <- scale(rnorm(length(x_true), -.075 * x_true))[, 1]) | |
# Create censored predictor from true predictor | |
x_cens <- x_true | |
x_cens[x_true < 0] <- 0 | |
plot(y ~ x_true) | |
plot(y ~ x_cens) | |
summary(lm(y ~ x_true)) | |
summary(lm(y ~ x_cens)) # less power | |
# Analysis begins here | |
dat.list <- list( | |
y = y, # outcome variable | |
cens_pred = x_cens, # censored predictor | |
p = 0 # number of other predictors | |
) | |
dat.list$N <- length(dat.list$y) # Leave unchanged | |
# Leave next line unchanged if number of other predictors = 0 | |
# otherwise, simply set dat.list$X <- (matrix of other predictors) | |
dat.list$X <- matrix(nrow = length(y), ncol = dat.list$p) | |
# Run Stan model, takes some time first run | |
fit <- stan(file = "cens_pred_mod.stan", data = dat.list) | |
# Print results for major parameters | |
print(fit, c("cens_vals"), include = FALSE) | |
# Print results for regression parameters only | |
print(fit, c("intercept", "beta_cens", "beta", "sigma"), digits_summary = 3) | |
coef(summary(lm(y ~ x_true))) | |
coef(summary(lm(y ~ x_cens))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Linear regression with left-censored (at 0) predictor | |
// Assumes the left-censored predictor is t-distributed | |
functions { | |
int count_0(vector x) { | |
int n = 0; | |
for (i in 1:num_elements(x)) if (x[i] == 0) n += 1; | |
return n; | |
} | |
} | |
data { | |
int N; | |
int p; | |
vector[N] cens_pred; | |
matrix[N, p] X; | |
vector[N] y; | |
} | |
transformed data { | |
int N_cens = count_0(cens_pred); | |
int cens_idx[N_cens]; // index censored cases | |
{ | |
int pos = 0; | |
for (i in 1:N) { | |
if (cens_pred[i] == 0) { | |
pos += 1; | |
cens_idx[pos] = i; | |
} | |
} | |
} | |
} | |
parameters { | |
real<lower = 0> nu_pred; // predictor degrees of freedom | |
real loc_pred; // predictor location/mean | |
real<lower = 0> scale_pred; // predictor scale, proportional to predictor SD | |
real intercept; // regression intercept | |
real beta_cens; // coefficient of censored variable | |
vector[p] beta; // coefficient of other variables | |
real<lower = 0> sigma; // regression residual SD | |
vector<upper = 0>[N_cens] cens_vals; // estimates of true values that have been censored | |
} | |
model { | |
// Predictor priors | |
nu_pred ~ gamma(2, .1); | |
loc_pred ~ student_t(3, 0, 2.5); | |
scale_pred ~ student_t(3, 0, 2.5); | |
// Regression model priors | |
intercept ~ student_t(3, 0, 2.5); | |
beta_cens ~ student_t(3, 0, 2.5); | |
beta ~ student_t(3, 0, 2.5); | |
sigma ~ student_t(3, 0, 2.5); | |
{ | |
vector[N] true_pred = cens_pred; | |
true_pred[cens_idx] = cens_vals; | |
true_pred ~ student_t(nu_pred, loc_pred, scale_pred); | |
if (p > 0) y ~ normal(intercept + true_pred * beta_cens + X * beta, sigma); | |
else y ~ normal(intercept + true_pred * beta_cens, sigma); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Essentially, censored data are missing data with known limits. You could do this via multiple imputation if your multiple imputation software allows you draw from a truncated distribution.