Last active
November 9, 2024 16:14
-
-
Save stephenrho/63f114281ee124cc6eec1db2433c9be5 to your computer and use it in GitHub Desktop.
Load NIS data ("Core", "Hospital", or "Severity") into R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: I have only tested this on 2016-2020 | |
# need `fs` and `readr` installed | |
#' Get NIS file specifications | |
#' | |
#' @description | |
#' Uses stata load files from hcup-us.ahrq.gov to get specs for loading NIS files | |
#' | |
#' @param year of interest | |
#' @param file "Core", "Hospital", or "Severity" | |
#' @return data.frame with 4 columns ("type", "varname", "start", "end") for use in reading ASC files | |
#' @export | |
get_specs <- function(year, file = c("Core", "Hospital", "Severity")){ | |
file <- match.arg(file) | |
start_str <- "*** Read data elements from the ASCII file ***" | |
end_str <- "*** Assign labels to the data elements ***" | |
types <- c("int" = "i", "byte" = "d", "double" = "d", "long" = "d", "str" = "c") | |
suff <- if (year %in% 2019:2020) "_V2" else "" | |
url <- paste0("https://hcup-us.ahrq.gov/db/nation/nis/tools/pgms/StataLoad_NIS_", year, "_", file, suff, ".Do") | |
lines <- readLines(url) | |
lines <- lines[(which(lines == start_str) + 1):(which(lines == end_str) - 3)] | |
lines <- gsub("infix|\\/|\\-", "", lines) | |
lines <- trimws(lines) | |
lines <- strsplit(lines, split = " +") | |
specs <- as.data.frame(do.call(rbind, lines)) | |
if (ncol(specs) != 4){ | |
stop("Something went wrong in getting specifications. Please check stata load program") | |
} | |
colnames(specs) <- c("type", "varname", "start", "end") | |
specs$start = as.numeric(specs$start) | |
specs$end = as.numeric(specs$end) | |
#specs$type = types[specs$type] | |
specs | |
} | |
#' Load NIS files into R | |
#' | |
#' @param year of interest | |
#' @param file "Core", "Hospital", or "Severity" | |
#' @param nis_path path to data files. If not given, assume they are in a subfolder titled paste0("NIS_", year) | |
#' | |
#' @return data.frame containing NIS data | |
#' @export | |
load_nis <- function(year, file = c("Core", "Hospital", "Severity"), | |
nis_path){ | |
# https://gist.github.com/markdanese/e53dcbfbb0c00f109e6bd65712d07cfa | |
types <- c("int" = "i", "byte" = "d", "double" = "d", "long" = "d", "str" = "c") | |
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666))) | |
if (missing(nis_path)){ | |
nis_path <- fs::dir_ls(glob=paste0("*NIS_", year), | |
recurse = TRUE, type = "directory") | |
nis_path <- paste0(nis_path, "/") | |
} | |
file <- match.arg(file) | |
specs <- get_specs(year = year, file = file) | |
specs$varname <- tolower(specs$varname) | |
specs$type <- types[specs$type] | |
specs$width <- with(specs, end - start + 1) | |
fn <- paste0(nis_path, "NIS_", year, "_", file, ".ASC") | |
d <- readr::read_fwf(file = fn, | |
col_positions = readr::fwf_widths(specs$width), | |
col_types = paste0(specs$type, collapse = ""), | |
trim_ws = TRUE, | |
na = missing_values) | |
colnames(d) <- specs$varname | |
as.data.frame(d) | |
} | |
# example (need NIS datafiles https://hcup-us.ahrq.gov/nisoverview.jsp) | |
# devtools::source_gist("63f114281ee124cc6eec1db2433c9be5") | |
# out = load_nis(year = 2020, file = "Core") | |
# data.table::fwrite(out, "NIS_2020_Core.csv.gz") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment