Skip to content

Instantly share code, notes, and snippets.

@sadatnfs
Created February 14, 2018 04:02
Show Gist options
  • Save sadatnfs/bf080695965f822eff5afed2cae8e989 to your computer and use it in GitHub Desktop.
Save sadatnfs/bf080695965f822eff5afed2cae8e989 to your computer and use it in GitHub Desktop.
A quick example of using Amelia
################################################
## Author : Nafis Sadat
## Purpose : Testing out Multiple Imputation with Amelia
## Created: Feb 13, 2018
################################################
## Get all the libraries we'll need
pacman::p_load(mvtnorm, Amelia, data.table)
set.seed(1)
################################################
## (1) Prep data for Amelia
################################################
## Use the iris dataset, and add a time variable to denote a
## rectangular dataset, identifiable by species an time
data(iris)
iris_dt <- copy(data.table(iris))
iris_dt[, Time:= seq(.N), by = 'Species']
detach()
## Randomly drop 40% of data points from the baseline dataset
drop_rows <- sample(c(1:nrow(iris_dt)),
size = round(nrow(iris_dt)*(.4)),
replace = T)
vars <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
iris_dropped <- data.table::copy(iris_dt)[drop_rows, (vars):= NA]
################################################
## (2) Run Amelia on the partial dataset
################################################
poly <- 1
empri <- .01*dim(iris_dropped)[[1]]
intercs <- TRUE
amelia_1 <- amelia(x = iris_dropped,
ts="Time", cs="Species",
m=10,
intercs=intercs,
polytime=poly,
ncpus = parallel::detectCores(),
empri=empri,
logs = c(1:4))
## Extract imputed data
amelia_1_data <- lapply(c(1:10), function(x) amelia_1$imputations[[paste0('imp', x)]])
################################################
## (3) Compute out-of-sample statistics
################################################
oos_stats <- function(amelia_data, baseline_data) {
## Merge the two datasets
merged_dt <- merge(amelia_data, baseline_data, c("Species", "Time"))
## Compute the RMSE
merged_dt[, paste0(vars, '_rmse'):= lapply(vars, function(x) sqrt(sum( (get(paste0(x,'.x')) - get(paste0(x,'.y'))) )) )]
merged_dt <- merged_dt[, .SD, .SDcols = c('Species' , 'Time',paste0(vars, '_rmse') )][1, .SD, .SDcols = paste0(vars, '_rmse')]
return(merged_dt)
}
amelia_1_data_oos <- rbindlist(lapply(c(1:10),
function(x) oos_stats(amelia_1_data[[x]], iris_dt)[, imputation:= x]),
use.names = T)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment