Skip to content

Instantly share code, notes, and snippets.

@stonegold546
Last active October 17, 2023 15:02
Show Gist options
  • Save stonegold546/d2e1e7d99e7f95cf5c4a25956fbad3b3 to your computer and use it in GitHub Desktop.
Save stonegold546/d2e1e7d99e7f95cf5c4a25956fbad3b3 to your computer and use it in GitHub Desktop.
Stan example with left-censored predictor at 0
library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
# Simulate an example
set.seed(1234)
hist(x_true <- rt(2e2, 3) * 1.5)
# Create outcome
hist(y <- scale(rnorm(length(x_true), -.075 * x_true))[, 1])
# Create censored predictor from true predictor
x_cens <- x_true
x_cens[x_true < 0] <- 0
plot(y ~ x_true)
plot(y ~ x_cens)
summary(lm(y ~ x_true))
summary(lm(y ~ x_cens)) # less power
# Analysis begins here
dat.list <- list(
y = y, # outcome variable
cens_pred = x_cens, # censored predictor
p = 0 # number of other predictors
)
dat.list$N <- length(dat.list$y) # Leave unchanged
# Leave next line unchanged if number of other predictors = 0
# otherwise, simply set dat.list$X <- (matrix of other predictors)
dat.list$X <- matrix(nrow = length(y), ncol = dat.list$p)
# Run Stan model, takes some time first run
fit <- stan(file = "cens_pred_mod.stan", data = dat.list)
# Print results for major parameters
print(fit, c("cens_vals"), include = FALSE)
# Print results for regression parameters only
print(fit, c("intercept", "beta_cens", "beta", "sigma"), digits_summary = 3)
coef(summary(lm(y ~ x_true)))
coef(summary(lm(y ~ x_cens)))
// Linear regression with left-censored (at 0) predictor
// Assumes the left-censored predictor is t-distributed
functions {
int count_0(vector x) {
int n = 0;
for (i in 1:num_elements(x)) if (x[i] == 0) n += 1;
return n;
}
}
data {
int N;
int p;
vector[N] cens_pred;
matrix[N, p] X;
vector[N] y;
}
transformed data {
int N_cens = count_0(cens_pred);
int cens_idx[N_cens]; // index censored cases
{
int pos = 0;
for (i in 1:N) {
if (cens_pred[i] == 0) {
pos += 1;
cens_idx[pos] = i;
}
}
}
}
parameters {
real<lower = 0> nu_pred; // predictor degrees of freedom
real loc_pred; // predictor location/mean
real<lower = 0> scale_pred; // predictor scale, proportional to predictor SD
real intercept; // regression intercept
real beta_cens; // coefficient of censored variable
vector[p] beta; // coefficient of other variables
real<lower = 0> sigma; // regression residual SD
vector<upper = 0>[N_cens] cens_vals; // estimates of true values that have been censored
}
model {
// Predictor priors
nu_pred ~ gamma(2, .1);
loc_pred ~ student_t(3, 0, 2.5);
scale_pred ~ student_t(3, 0, 2.5);
// Regression model priors
intercept ~ student_t(3, 0, 2.5);
beta_cens ~ student_t(3, 0, 2.5);
beta ~ student_t(3, 0, 2.5);
sigma ~ student_t(3, 0, 2.5);
{
vector[N] true_pred = cens_pred;
true_pred[cens_idx] = cens_vals;
true_pred ~ student_t(nu_pred, loc_pred, scale_pred);
if (p > 0) y ~ normal(intercept + true_pred * beta_cens + X * beta, sigma);
else y ~ normal(intercept + true_pred * beta_cens, sigma);
}
}
@stonegold546
Copy link
Author

stonegold546 commented Apr 15, 2021

Essentially, censored data are missing data with known limits. You could do this via multiple imputation if your multiple imputation software allows you draw from a truncated distribution.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment