Skip to content

Instantly share code, notes, and snippets.

@stephenrho
Last active November 9, 2024 16:14
Show Gist options
  • Save stephenrho/63f114281ee124cc6eec1db2433c9be5 to your computer and use it in GitHub Desktop.
Save stephenrho/63f114281ee124cc6eec1db2433c9be5 to your computer and use it in GitHub Desktop.
Load NIS data ("Core", "Hospital", or "Severity") into R
# Note: I have only tested this on 2016-2020
# need `fs` and `readr` installed
#' Get NIS file specifications
#'
#' @description
#' Uses stata load files from hcup-us.ahrq.gov to get specs for loading NIS files
#'
#' @param year of interest
#' @param file "Core", "Hospital", or "Severity"
#' @return data.frame with 4 columns ("type", "varname", "start", "end") for use in reading ASC files
#' @export
get_specs <- function(year, file = c("Core", "Hospital", "Severity")){
file <- match.arg(file)
start_str <- "*** Read data elements from the ASCII file ***"
end_str <- "*** Assign labels to the data elements ***"
types <- c("int" = "i", "byte" = "d", "double" = "d", "long" = "d", "str" = "c")
suff <- if (year %in% 2019:2020) "_V2" else ""
url <- paste0("https://hcup-us.ahrq.gov/db/nation/nis/tools/pgms/StataLoad_NIS_", year, "_", file, suff, ".Do")
lines <- readLines(url)
lines <- lines[(which(lines == start_str) + 1):(which(lines == end_str) - 3)]
lines <- gsub("infix|\\/|\\-", "", lines)
lines <- trimws(lines)
lines <- strsplit(lines, split = " +")
specs <- as.data.frame(do.call(rbind, lines))
if (ncol(specs) != 4){
stop("Something went wrong in getting specifications. Please check stata load program")
}
colnames(specs) <- c("type", "varname", "start", "end")
specs$start = as.numeric(specs$start)
specs$end = as.numeric(specs$end)
#specs$type = types[specs$type]
specs
}
#' Load NIS files into R
#'
#' @param year of interest
#' @param file "Core", "Hospital", or "Severity"
#' @param nis_path path to data files. If not given, assume they are in a subfolder titled paste0("NIS_", year)
#'
#' @return data.frame containing NIS data
#' @export
load_nis <- function(year, file = c("Core", "Hospital", "Severity"),
nis_path){
# https://gist.github.com/markdanese/e53dcbfbb0c00f109e6bd65712d07cfa
types <- c("int" = "i", "byte" = "d", "double" = "d", "long" = "d", "str" = "c")
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666)))
if (missing(nis_path)){
nis_path <- fs::dir_ls(glob=paste0("*NIS_", year),
recurse = TRUE, type = "directory")
nis_path <- paste0(nis_path, "/")
}
file <- match.arg(file)
specs <- get_specs(year = year, file = file)
specs$varname <- tolower(specs$varname)
specs$type <- types[specs$type]
specs$width <- with(specs, end - start + 1)
fn <- paste0(nis_path, "NIS_", year, "_", file, ".ASC")
d <- readr::read_fwf(file = fn,
col_positions = readr::fwf_widths(specs$width),
col_types = paste0(specs$type, collapse = ""),
trim_ws = TRUE,
na = missing_values)
colnames(d) <- specs$varname
as.data.frame(d)
}
# example (need NIS datafiles https://hcup-us.ahrq.gov/nisoverview.jsp)
# devtools::source_gist("63f114281ee124cc6eec1db2433c9be5")
# out = load_nis(year = 2020, file = "Core")
# data.table::fwrite(out, "NIS_2020_Core.csv.gz")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment