Created
February 14, 2018 04:02
-
-
Save sadatnfs/bf080695965f822eff5afed2cae8e989 to your computer and use it in GitHub Desktop.
A quick example of using Amelia
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################ | |
## Author : Nafis Sadat | |
## Purpose : Testing out Multiple Imputation with Amelia | |
## Created: Feb 13, 2018 | |
################################################ | |
## Get all the libraries we'll need | |
pacman::p_load(mvtnorm, Amelia, data.table) | |
set.seed(1) | |
################################################ | |
## (1) Prep data for Amelia | |
################################################ | |
## Use the iris dataset, and add a time variable to denote a | |
## rectangular dataset, identifiable by species an time | |
data(iris) | |
iris_dt <- copy(data.table(iris)) | |
iris_dt[, Time:= seq(.N), by = 'Species'] | |
detach() | |
## Randomly drop 40% of data points from the baseline dataset | |
drop_rows <- sample(c(1:nrow(iris_dt)), | |
size = round(nrow(iris_dt)*(.4)), | |
replace = T) | |
vars <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") | |
iris_dropped <- data.table::copy(iris_dt)[drop_rows, (vars):= NA] | |
################################################ | |
## (2) Run Amelia on the partial dataset | |
################################################ | |
poly <- 1 | |
empri <- .01*dim(iris_dropped)[[1]] | |
intercs <- TRUE | |
amelia_1 <- amelia(x = iris_dropped, | |
ts="Time", cs="Species", | |
m=10, | |
intercs=intercs, | |
polytime=poly, | |
ncpus = parallel::detectCores(), | |
empri=empri, | |
logs = c(1:4)) | |
## Extract imputed data | |
amelia_1_data <- lapply(c(1:10), function(x) amelia_1$imputations[[paste0('imp', x)]]) | |
################################################ | |
## (3) Compute out-of-sample statistics | |
################################################ | |
oos_stats <- function(amelia_data, baseline_data) { | |
## Merge the two datasets | |
merged_dt <- merge(amelia_data, baseline_data, c("Species", "Time")) | |
## Compute the RMSE | |
merged_dt[, paste0(vars, '_rmse'):= lapply(vars, function(x) sqrt(sum( (get(paste0(x,'.x')) - get(paste0(x,'.y'))) )) )] | |
merged_dt <- merged_dt[, .SD, .SDcols = c('Species' , 'Time',paste0(vars, '_rmse') )][1, .SD, .SDcols = paste0(vars, '_rmse')] | |
return(merged_dt) | |
} | |
amelia_1_data_oos <- rbindlist(lapply(c(1:10), | |
function(x) oos_stats(amelia_1_data[[x]], iris_dt)[, imputation:= x]), | |
use.names = T) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment