sadatnfs · February 14, 2018 04:02
diff --git a/amelia_demo.r b/amelia_demo.r



    ################################################
    ## Author : Nafis Sadat
    ## Purpose : Testing out Multiple Imputation with Amelia 
    ## Created: Feb 13, 2018
    ################################################
    
    
    ## Get all the libraries we'll need
    pacman::p_load(mvtnorm, Amelia, data.table)
    set.seed(1)
    
    ################################################
    ## (1) Prep data for Amelia
    ################################################
    
    ## Use the iris dataset, and add a time variable to denote a 
    ## rectangular dataset, identifiable by species an time
    
    data(iris)
    iris_dt <- copy(data.table(iris))
    iris_dt[, Time:= seq(.N), by = 'Species']
    detach()
    
    ## Randomly drop 40% of data points from the baseline dataset
    drop_rows <- sample(c(1:nrow(iris_dt)), 
                            size = round(nrow(iris_dt)*(.4)), 
                            replace = T)
    vars <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
    iris_dropped <- data.table::copy(iris_dt)[drop_rows,  (vars):= NA]
    
    
    ################################################
    ## (2) Run Amelia on the partial dataset
    ################################################
   
    poly <- 1
    empri <- .01*dim(iris_dropped)[[1]]
    intercs <- TRUE
    amelia_1 <- amelia(x = iris_dropped, 
                       ts="Time", cs="Species",
                       m=10, 
                       intercs=intercs, 
                       polytime=poly, 
                       ncpus = parallel::detectCores(),
                       empri=empri,
                       logs = c(1:4))
    
    
    ## Extract imputed data
    amelia_1_data <- lapply(c(1:10), function(x) amelia_1$imputations[[paste0('imp', x)]])
    
    
    
    ################################################
    ## (3) Compute out-of-sample statistics
    ################################################
    
    oos_stats <- function(amelia_data, baseline_data) {
      
      ## Merge the two datasets
      merged_dt <- merge(amelia_data, baseline_data, c("Species", "Time"))
      
      ## Compute the RMSE
      merged_dt[, paste0(vars, '_rmse'):= lapply(vars, function(x) sqrt(sum( (get(paste0(x,'.x')) - get(paste0(x,'.y'))) )) )]
      merged_dt <- merged_dt[, .SD, .SDcols = c('Species' , 'Time',paste0(vars, '_rmse') )][1, .SD, .SDcols = paste0(vars, '_rmse')]
      return(merged_dt)
    }
    
    amelia_1_data_oos <- rbindlist(lapply(c(1:10), 
                                          function(x) oos_stats(amelia_1_data[[x]], iris_dt)[, imputation:= x]),
                                   use.names = T)



	################################################
	## Author : Nafis Sadat
	## Purpose : Testing out Multiple Imputation with Amelia
	## Created: Feb 13, 2018
	################################################


	## Get all the libraries we'll need
	pacman::p_load(mvtnorm, Amelia, data.table)
	set.seed(1)

	################################################
	## (1) Prep data for Amelia
	################################################

	## Use the iris dataset, and add a time variable to denote a
	## rectangular dataset, identifiable by species an time

	data(iris)
	iris_dt <- copy(data.table(iris))
	iris_dt[, Time:= seq(.N), by = 'Species']
	detach()

	## Randomly drop 40% of data points from the baseline dataset
	drop_rows <- sample(c(1:nrow(iris_dt)),
	size = round(nrow(iris_dt)*(.4)),
	replace = T)
	vars <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
	iris_dropped <- data.table::copy(iris_dt)[drop_rows, (vars):= NA]


	################################################
	## (2) Run Amelia on the partial dataset
	################################################

	poly <- 1
	empri <- .01*dim(iris_dropped)[[1]]
	intercs <- TRUE
	amelia_1 <- amelia(x = iris_dropped,
	ts="Time", cs="Species",
	m=10,
	intercs=intercs,
	polytime=poly,
	ncpus = parallel::detectCores(),
	empri=empri,
	logs = c(1:4))


	## Extract imputed data
	amelia_1_data <- lapply(c(1:10), function(x) amelia_1$imputations[[paste0('imp', x)]])



	################################################
	## (3) Compute out-of-sample statistics
	################################################

	oos_stats <- function(amelia_data, baseline_data) {

	## Merge the two datasets
	merged_dt <- merge(amelia_data, baseline_data, c("Species", "Time"))

	## Compute the RMSE
	merged_dt[, paste0(vars, '_rmse'):= lapply(vars, function(x) sqrt(sum( (get(paste0(x,'.x')) - get(paste0(x,'.y'))) )) )]
	merged_dt <- merged_dt[, .SD, .SDcols = c('Species' , 'Time',paste0(vars, '_rmse') )][1, .SD, .SDcols = paste0(vars, '_rmse')]
	return(merged_dt)
	}

	amelia_1_data_oos <- rbindlist(lapply(c(1:10),
	function(x) oos_stats(amelia_1_data[[x]], iris_dt)[, imputation:= x]),
	use.names = T)