set.seed(123)
# Number of predictors
P <- 10
# Sample size
N <- 10000
# Generate variance covariance matrix as toeplitz
V <- toeplitz(P:1/P)
# Relative size of coefficients
b <- 1:P
# Set coefficient of determination
R2 <- 0.5
# Calculate regression coefficients
b <- b * c(sqrt(R2 / b %*% V %*% b))
# Generate data
X <- matrix(rnorm(N * P), nrow = N, ncol = P) %*% chol(V)
y <- X %*% b + rnorm(N, 0, sqrt(1-R2))
# Collect data in data frame
df <- data.frame(X = X, y = y)
# Induce missings
amp <- mice::ampute(
  df, 
  prop = 0.5, 
  patterns = 1 - lower.tri(matrix(1, P, P+1), TRUE),
  mech = "MCAR"
)$amp
# Perform regression imputation
predimp <- mice::mice(
  amp, 
  m = 1, 
  method = "norm.predict", 
  predictorMatrix = cbind(1 - diag(nrow = P+1, ncol = P), 0),
  print = FALSE
)
# Perform multiple imputation
miimp <- mice::mice(
  amp, m = 10, maxit = 10, method = "norm", print = FALSE
)
# Fit quantile regression model with regression imputation data
predfit <- with(
  predimp, quantreg::rq(y~X.1 + X.2 + X.3 + X.4 + X.5, tau = c(0.1, 0.5, 0.9))
)
# Calculate prediction interval for y
predint <- predfit$analyses[[1]] |> predict()
# Fit quantile regression model with multiple imputation data
mifit <- with(
  miimp, quantreg::rq(y~X.1 + X.2 + X.3 + X.4 + X.5, tau = c(0.1, 0.5, 0.9))
)
# Calculate quantile regression prediction interval
miint <- lapply(mifit$analyses, predict) |>
  unlist() |>
  array(dim = c(N, 3, 10)) |>
  apply(c(1,2), mean)
# Plot predicted versus observed y values with corresponding prediction intervals
# under regression imputation
predorder <- order(predint[,2])
plot(predint[predorder,2], y[predorder])
points(predint[predorder, 2][rowSums(is.na(amp)) >= 4],
       y[predorder][rowSums(is.na(amp)) >= 4], col = "red")
segments(x0 = predint[predorder, 2],
         y0 = predint[predorder, 1],
         y1 = predint[predorder, 3],
         col = "lightblue")# Evaluate marginal and conditional coverage of prediction intervals with
# corresponding interval width under regression imputation
cbind(amp, predint) |>
  dplyr::mutate(sumna = rowSums(is.na(amp))) |>
  dplyr::group_by(sumna) |>
  dplyr::summarise(pred = mean(`2`),
                   cov = mean(`1` < y & y < `3`),
                   width = mean(`3` - `1`)) |>
  dplyr::bind_rows(data.frame(sumna = 99, 
                              pred = mean(predint[,2]),
                              cov = mean(predint[,1] < y & y < predint[,3]),
                              width = mean(predint[,3] - predint[,1])))
#> # A tibble: 12 × 4
#>    sumna     pred   cov width
#>    <dbl>    <dbl> <dbl> <dbl>
#>  1     0  0.00322 0.814  2.14
#>  2     1  0.0613  0.841  2.14
#>  3     2  0.0243  0.828  2.14
#>  4     3 -0.00620 0.815  2.14
#>  5     4  0.0278  0.821  2.14
#>  6     5  0.0125  0.831  2.14
#>  7     6  0.0319  0.784  2.14
#>  8     7  0.0303  0.760  2.14
#>  9     8 -0.00505 0.751  2.14
#> 10     9  0.0228  0.711  2.14
#> 11    10  0.0103  0.716  2.14
#> 12    99  0.0122  0.799  2.14
# Plot predicted versus observed y values with corresponding prediction intervals
# under multiple imputation
predorder <- order(miint[,2])
plot(miint[predorder,2], y[predorder])
points(miint[predorder, 2][rowSums(is.na(amp)) >= 4],
       y[predorder][rowSums(is.na(amp)) >= 4], col = "red")
segments(x0 = miint[predorder, 2],
         y0 = miint[predorder, 1],
         y1 = miint[predorder, 3],
         col = "lightblue")# Evaluate marginal and conditional coverage of prediction intervals with
# corresponding interval width under multiple imputation
cbind(amp, miint) |>
  dplyr::mutate(sumna = rowSums(is.na(amp))) |>
  dplyr::group_by(sumna) |>
  dplyr::summarise(pred = mean(`2`),
                   cov = mean(`1` < y & y < `3`),
                   width = mean(`3` - `1`)) |>
  dplyr::bind_rows(data.frame(sumna = 99, 
                              pred = mean(miint[,2]),
                              cov = mean(miint[,1] < y & y < miint[,3]),
                              width = mean(miint[,3] - miint[,1])))
#> # A tibble: 12 × 4
#>    sumna     pred   cov width
#>    <dbl>    <dbl> <dbl> <dbl>
#>  1     0  0.00556 0.811  2.11
#>  2     1  0.0635  0.839  2.11
#>  3     2  0.0239  0.824  2.11
#>  4     3 -0.00259 0.801  2.11
#>  5     4  0.0281  0.802  2.11
#>  6     5  0.0181  0.833  2.11
#>  7     6  0.0397  0.815  2.11
#>  8     7  0.0436  0.821  2.11
#>  9     8  0.0104  0.852  2.11
#> 10     9  0.0284  0.811  2.11
#> 11    10  0.0257  0.798  2.11
#> 12    99  0.0169  0.815  2.11Created on 2025-06-03 with reprex v2.1.1


So, multiple imputation yields slightly smaller prediction interval widths than those obtained under regression imputation, but also overcovers marginally (note that the sumna = 99 corresponds to the marginal coverage). On the upside, the conditional coverage is much better than the conditional coverage under regression imputation.