Last active
June 12, 2024 18:36
-
-
Save markdanese/e53dcbfbb0c00f109e6bd65712d07cfa to your computer and use it in GitHub Desktop.
National Inpatient Sample (NIS) read program
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this loads the 2016 NIS fixed width (asc) files into R | |
# it also saves the result as an fst file for much faster re-reading into R | |
library(data.table) | |
library(readr) | |
library(fst) | |
# load core data -------------------------------------------------------------------- | |
nis_specs <- fread("./docs/nis_specs_core.csv") | |
nis_specs[, width := end - start + 1] | |
nis_specs[, varname := tolower(varname)] | |
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))] | |
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666))) | |
dt <- | |
read_fwf( | |
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2016/NIS_2016_Core.ASC", | |
col_positions = fwf_widths(nis_specs$width), | |
col_types = paste0(nis_specs$type, collapse = ""), | |
trim_ws = TRUE, | |
na = missing_values | |
) | |
setDT(dt) | |
setnames(dt, names(dt), nis_specs$varname) | |
write_fst(dt, "./data/analysis/hospital.fst", compress = 75) | |
dt <- read_fst("./data/analysis/hospital.fst", as.data.table = TRUE) | |
write_fst(dt, "./data/analysis/core.fst", compress = 100) | |
# load hospital data ---------------------------------------------------------------- | |
nis_specs <- fread("./docs/nis_specs_hospital.csv") | |
nis_specs[, width := end - start + 1] | |
nis_specs[, varname := tolower(varname)] | |
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))] | |
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666))) | |
dt <- | |
read_fwf( | |
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2016/NIS_2016_Hospital.ASC", | |
col_positions = fwf_widths(nis_specs$width), | |
col_types = paste0(nis_specs$type, collapse = ""), | |
trim_ws = TRUE, | |
# n_max = 1000, | |
na = missing_values | |
) | |
setDT(dt) | |
setnames(dt, names(dt), nis_specs$varname) | |
write_fst(dt, "./data/analysis/hospital.fst", compress = 100) | |
# load severity data ---------------------------------------------------------------- | |
nis_specs <- fread("./docs/nis_specs_severity.csv") | |
nis_specs[, width := end - start + 1] | |
nis_specs[, varname := tolower(varname)] | |
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))] | |
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666))) | |
dt <- | |
read_fwf( | |
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2016/NIS_2016_Severity.ASC", | |
col_positions = fwf_widths(nis_specs$width), | |
col_types = paste0(nis_specs$type, collapse = ""), | |
trim_ws = TRUE, | |
# n_max = 1000, | |
na = missing_values | |
) | |
setDT(dt) | |
setnames(dt, names(dt), nis_specs$varname) | |
write_fst(dt, "./data/analysis/severity.fst", compress = 100) | |
# copy the information below into a text editor and save as "nis_specs_xxx.csv" where "xxx" is | |
# "core", "hospital", or "severity" (see above) | |
# the information is taken from the Stata load programs | |
# white space is stripped when read into R, but kept below for legibility | |
# core data csv | |
type, varname, start, end | |
int, AGE, 1, 3 | |
byte, AGE_NEONATE, 4, 5 | |
byte, AMONTH, 6, 7 | |
byte, AWEEKEND, 8, 9 | |
byte, DIED, 10, 11 | |
double, DISCWT, 12, 22 | |
byte, DISPUNIFORM, 23, 24 | |
byte, DQTR, 25, 26 | |
int, DRG, 27, 29 | |
byte, DRGVER, 30, 31 | |
int, DRG_NoPOA, 32, 34 | |
byte, DXVER, 35, 36 | |
byte, ELECTIVE, 37, 38 | |
byte, FEMALE, 39, 40 | |
int, HCUP_ED, 41, 43 | |
byte, HOSP_DIVISION, 44, 45 | |
long, HOSP_NIS, 46, 50 | |
str, I10_DX1, 51, 57 | |
str, I10_DX2, 58, 64 | |
str, I10_DX3, 65, 71 | |
str, I10_DX4, 72, 78 | |
str, I10_DX5, 79, 85 | |
str, I10_DX6, 86, 92 | |
str, I10_DX7, 93, 99 | |
str, I10_DX8, 100, 106 | |
str, I10_DX9, 107, 113 | |
str, I10_DX10, 114, 120 | |
str, I10_DX11, 121, 127 | |
str, I10_DX12, 128, 134 | |
str, I10_DX13, 135, 141 | |
str, I10_DX14, 142, 148 | |
str, I10_DX15, 149, 155 | |
str, I10_DX16, 156, 162 | |
str, I10_DX17, 163, 169 | |
str, I10_DX18, 170, 176 | |
str, I10_DX19, 177, 183 | |
str, I10_DX20, 184, 190 | |
str, I10_DX21, 191, 197 | |
str, I10_DX22, 198, 204 | |
str, I10_DX23, 205, 211 | |
str, I10_DX24, 212, 218 | |
str, I10_DX25, 219, 225 | |
str, I10_DX26, 226, 232 | |
str, I10_DX27, 233, 239 | |
str, I10_DX28, 240, 246 | |
str, I10_DX29, 247, 253 | |
str, I10_DX30, 254, 260 | |
str, I10_ECAUSE1, 261, 267 | |
str, I10_ECAUSE2, 268, 274 | |
str, I10_ECAUSE3, 275, 281 | |
str, I10_ECAUSE4, 282, 288 | |
byte, I10_NDX, 289, 290 | |
int, I10_NECAUSE, 291, 293 | |
byte, I10_NPR, 294, 295 | |
str, I10_PR1, 296, 302 | |
str, I10_PR2, 303, 309 | |
str, I10_PR3, 310, 316 | |
str, I10_PR4, 317, 323 | |
str, I10_PR5, 324, 330 | |
str, I10_PR6, 331, 337 | |
str, I10_PR7, 338, 344 | |
str, I10_PR8, 345, 351 | |
str, I10_PR9, 352, 358 | |
str, I10_PR10, 359, 365 | |
str, I10_PR11, 366, 372 | |
str, I10_PR12, 373, 379 | |
str, I10_PR13, 380, 386 | |
str, I10_PR14, 387, 393 | |
str, I10_PR15, 394, 400 | |
double, KEY_NIS, 401, 410 | |
long, LOS, 411, 415 | |
byte, MDC, 416, 417 | |
byte, MDC_NoPOA, 418, 419 | |
int, NIS_STRATUM, 420, 423 | |
byte, PAY1, 424, 425 | |
int, PL_NCHS, 426, 428 | |
int, PRDAY1, 429, 431 | |
int, PRDAY2, 432, 434 | |
int, PRDAY3, 435, 437 | |
int, PRDAY4, 438, 440 | |
int, PRDAY5, 441, 443 | |
int, PRDAY6, 444, 446 | |
int, PRDAY7, 447, 449 | |
int, PRDAY8, 450, 452 | |
int, PRDAY9, 453, 455 | |
int, PRDAY10, 456, 458 | |
int, PRDAY11, 459, 461 | |
int, PRDAY12, 462, 464 | |
int, PRDAY13, 465, 467 | |
int, PRDAY14, 468, 470 | |
int, PRDAY15, 471, 473 | |
byte, PRVER, 474, 475 | |
byte, RACE, 476, 477 | |
double, TOTCHG, 478, 487 | |
byte, TRAN_IN, 488, 489 | |
byte, TRAN_OUT, 490, 491 | |
int, YEAR, 492, 495 | |
byte, ZIPINC_QRTL, 496, 497 | |
# hospital csv | |
type,varname,start,end | |
double, DISCWT, 1,11 | |
byte, HOSP_BEDSIZE, 12,13 | |
byte, HOSP_DIVISION,14,15 | |
byte, HOSP_LOCTEACH,16,17 | |
long, HOSP_NIS, 18,22 | |
byte, HOSP_REGION, 23,24 | |
byte, H_CONTRL, 25,26 | |
int, NIS_STRATUM, 27,30 | |
long, N_DISC_U, 31,38 | |
int, N_HOSP_U, 39,42 | |
long, S_DISC_U, 43,50 | |
int, S_HOSP_U, 51,54 | |
long, TOTAL_DISC, 55,60 | |
int, YEAR, 61,64 | |
# severity csv | |
type,varname,start,end | |
long, HOSP_NIS, 1, 5 | |
double, KEY_NIS, 6,15 | |
int, APRDRG, 16,19 | |
byte, APRDRG_Risk_Mortality, 20,21 | |
byte, APRDRG_Severity, 22,23 |
How did you get your specs data in a .csv format? Did you convert it from the NIS File Specifications website? https://hcup-us.ahrq.gov/db/nation/nis/nisfilespecs.jsp#2016NIS
As i mentioned above, I simply edit the available SAS or Stata load programs using a text editor like Sublime text that allows you to edit multiple rows simultaneously.
Thanks for sharing this! I adapted your code with a function to read specs from the stata load files online. I've tested 2016-2020 and it seems to work well https://gist.github.com/stephenrho/63f114281ee124cc6eec1db2433c9be5
@stephenrho -- that sounds great! Thanks for doing that.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sorry, I don't have any recent experience with those. I typically read things in by adapting the SAS or Stata load programs, if they are available. I do it manually or with a script depending on the complexity.