Created
June 8, 2016 07:28
-
-
Save mbojan/7540899db112218b4d4590011ebbb7d8 to your computer and use it in GitHub Desktop.
Imputation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Jako notebook w RStudio (Ctrl+Shift+K) | |
suppressPackageStartupMessages({ | |
library(MASS) | |
library(dplyr) | |
library(tidyr) | |
library(ggplot2) | |
library(mice) | |
}) | |
#' `N` samples from a trivariate normal | |
set.seed(123) | |
N <- 1000 | |
mu <- rep(1, 3) | |
s <- matrix(c( | |
1, 0.2, 0.3, | |
0.2, 1, -0.2, | |
0.3, -0.2, 1), | |
3, 3, byrow=TRUE | |
) | |
m <- MASS::mvrnorm(N, mu, s) | |
#' `set` of data assigned purely randomly. | |
#' Variable `Y_obs` is observed only for `set = FALSE` | |
#' Variable `Z_obs` is observed only for `set = TRUE`. | |
m %>% as.data.frame() %>% | |
rename(X=V1, Y=V2, Z=V3) %>% | |
mutate( set = sample(c(TRUE, FALSE), N, replace=TRUE) ) %>% | |
mutate( | |
Y_obs = replace(Y, which(set), NA), | |
Z_obs = replace(Z, which(!set), NA) | |
) -> d | |
#' Impute `Y_obs` and `Z_obs` multiple times | |
d %>% select(X, Y_obs, Z_obs) %>% | |
mice::mice(m=100, printFlag=FALSE) -> mi | |
#' Calculate correlation matrices for each imputed dataset and convert to a data frame. | |
cmat <- lapply(1:100, function(i) cbind(did=i, as.data.frame(as.table(cor(mice::complete(mi,i)))))) | |
dc <- do.call("rbind", cmat) | |
#' Density estimate of correlation between Y and Z with the true correlation marked with a vertical line | |
dc %>% rename(rho = Freq) %>% | |
filter(Var1=="Y_obs", Var2=="Z_obs") %>% | |
ggplot(aes(x=rho)) + geom_density() + geom_vline(aes(xintercept=-0.2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment