Skip to content

Instantly share code, notes, and snippets.

@markdanese
Last active June 12, 2024 18:36
Show Gist options
  • Save markdanese/e53dcbfbb0c00f109e6bd65712d07cfa to your computer and use it in GitHub Desktop.
Save markdanese/e53dcbfbb0c00f109e6bd65712d07cfa to your computer and use it in GitHub Desktop.
National Inpatient Sample (NIS) read program
# this loads the 2016 NIS fixed width (asc) files into R
# it also saves the result as an fst file for much faster re-reading into R
library(data.table)
library(readr)
library(fst)
# load core data --------------------------------------------------------------------
nis_specs <- fread("./docs/nis_specs_core.csv")
nis_specs[, width := end - start + 1]
nis_specs[, varname := tolower(varname)]
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))]
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666)))
dt <-
read_fwf(
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2016/NIS_2016_Core.ASC",
col_positions = fwf_widths(nis_specs$width),
col_types = paste0(nis_specs$type, collapse = ""),
trim_ws = TRUE,
na = missing_values
)
setDT(dt)
setnames(dt, names(dt), nis_specs$varname)
write_fst(dt, "./data/analysis/hospital.fst", compress = 75)
dt <- read_fst("./data/analysis/hospital.fst", as.data.table = TRUE)
write_fst(dt, "./data/analysis/core.fst", compress = 100)
# load hospital data ----------------------------------------------------------------
nis_specs <- fread("./docs/nis_specs_hospital.csv")
nis_specs[, width := end - start + 1]
nis_specs[, varname := tolower(varname)]
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))]
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666)))
dt <-
read_fwf(
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2016/NIS_2016_Hospital.ASC",
col_positions = fwf_widths(nis_specs$width),
col_types = paste0(nis_specs$type, collapse = ""),
trim_ws = TRUE,
# n_max = 1000,
na = missing_values
)
setDT(dt)
setnames(dt, names(dt), nis_specs$varname)
write_fst(dt, "./data/analysis/hospital.fst", compress = 100)
# load severity data ----------------------------------------------------------------
nis_specs <- fread("./docs/nis_specs_severity.csv")
nis_specs[, width := end - start + 1]
nis_specs[, varname := tolower(varname)]
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))]
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666)))
dt <-
read_fwf(
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2016/NIS_2016_Severity.ASC",
col_positions = fwf_widths(nis_specs$width),
col_types = paste0(nis_specs$type, collapse = ""),
trim_ws = TRUE,
# n_max = 1000,
na = missing_values
)
setDT(dt)
setnames(dt, names(dt), nis_specs$varname)
write_fst(dt, "./data/analysis/severity.fst", compress = 100)
# copy the information below into a text editor and save as "nis_specs_xxx.csv" where "xxx" is
# "core", "hospital", or "severity" (see above)
# the information is taken from the Stata load programs
# white space is stripped when read into R, but kept below for legibility
# core data csv
type, varname, start, end
int, AGE, 1, 3
byte, AGE_NEONATE, 4, 5
byte, AMONTH, 6, 7
byte, AWEEKEND, 8, 9
byte, DIED, 10, 11
double, DISCWT, 12, 22
byte, DISPUNIFORM, 23, 24
byte, DQTR, 25, 26
int, DRG, 27, 29
byte, DRGVER, 30, 31
int, DRG_NoPOA, 32, 34
byte, DXVER, 35, 36
byte, ELECTIVE, 37, 38
byte, FEMALE, 39, 40
int, HCUP_ED, 41, 43
byte, HOSP_DIVISION, 44, 45
long, HOSP_NIS, 46, 50
str, I10_DX1, 51, 57
str, I10_DX2, 58, 64
str, I10_DX3, 65, 71
str, I10_DX4, 72, 78
str, I10_DX5, 79, 85
str, I10_DX6, 86, 92
str, I10_DX7, 93, 99
str, I10_DX8, 100, 106
str, I10_DX9, 107, 113
str, I10_DX10, 114, 120
str, I10_DX11, 121, 127
str, I10_DX12, 128, 134
str, I10_DX13, 135, 141
str, I10_DX14, 142, 148
str, I10_DX15, 149, 155
str, I10_DX16, 156, 162
str, I10_DX17, 163, 169
str, I10_DX18, 170, 176
str, I10_DX19, 177, 183
str, I10_DX20, 184, 190
str, I10_DX21, 191, 197
str, I10_DX22, 198, 204
str, I10_DX23, 205, 211
str, I10_DX24, 212, 218
str, I10_DX25, 219, 225
str, I10_DX26, 226, 232
str, I10_DX27, 233, 239
str, I10_DX28, 240, 246
str, I10_DX29, 247, 253
str, I10_DX30, 254, 260
str, I10_ECAUSE1, 261, 267
str, I10_ECAUSE2, 268, 274
str, I10_ECAUSE3, 275, 281
str, I10_ECAUSE4, 282, 288
byte, I10_NDX, 289, 290
int, I10_NECAUSE, 291, 293
byte, I10_NPR, 294, 295
str, I10_PR1, 296, 302
str, I10_PR2, 303, 309
str, I10_PR3, 310, 316
str, I10_PR4, 317, 323
str, I10_PR5, 324, 330
str, I10_PR6, 331, 337
str, I10_PR7, 338, 344
str, I10_PR8, 345, 351
str, I10_PR9, 352, 358
str, I10_PR10, 359, 365
str, I10_PR11, 366, 372
str, I10_PR12, 373, 379
str, I10_PR13, 380, 386
str, I10_PR14, 387, 393
str, I10_PR15, 394, 400
double, KEY_NIS, 401, 410
long, LOS, 411, 415
byte, MDC, 416, 417
byte, MDC_NoPOA, 418, 419
int, NIS_STRATUM, 420, 423
byte, PAY1, 424, 425
int, PL_NCHS, 426, 428
int, PRDAY1, 429, 431
int, PRDAY2, 432, 434
int, PRDAY3, 435, 437
int, PRDAY4, 438, 440
int, PRDAY5, 441, 443
int, PRDAY6, 444, 446
int, PRDAY7, 447, 449
int, PRDAY8, 450, 452
int, PRDAY9, 453, 455
int, PRDAY10, 456, 458
int, PRDAY11, 459, 461
int, PRDAY12, 462, 464
int, PRDAY13, 465, 467
int, PRDAY14, 468, 470
int, PRDAY15, 471, 473
byte, PRVER, 474, 475
byte, RACE, 476, 477
double, TOTCHG, 478, 487
byte, TRAN_IN, 488, 489
byte, TRAN_OUT, 490, 491
int, YEAR, 492, 495
byte, ZIPINC_QRTL, 496, 497
# hospital csv
type,varname,start,end
double, DISCWT, 1,11
byte, HOSP_BEDSIZE, 12,13
byte, HOSP_DIVISION,14,15
byte, HOSP_LOCTEACH,16,17
long, HOSP_NIS, 18,22
byte, HOSP_REGION, 23,24
byte, H_CONTRL, 25,26
int, NIS_STRATUM, 27,30
long, N_DISC_U, 31,38
int, N_HOSP_U, 39,42
long, S_DISC_U, 43,50
int, S_HOSP_U, 51,54
long, TOTAL_DISC, 55,60
int, YEAR, 61,64
# severity csv
type,varname,start,end
long, HOSP_NIS, 1, 5
double, KEY_NIS, 6,15
int, APRDRG, 16,19
byte, APRDRG_Risk_Mortality, 20,21
byte, APRDRG_Severity, 22,23
@lilin-tong
Copy link

Do you have a load program for the state data (e.g., SID, SEDD)? Or have tips on how to create a load program for those datasets?

@markdanese
Copy link
Author

Sorry, I don't have any recent experience with those. I typically read things in by adapting the SAS or Stata load programs, if they are available. I do it manually or with a script depending on the complexity.

@MorganAC42
Copy link

How did you get your specs data in a .csv format? Did you convert it from the NIS File Specifications website? https://hcup-us.ahrq.gov/db/nation/nis/nisfilespecs.jsp#2016NIS

@markdanese
Copy link
Author

As i mentioned above, I simply edit the available SAS or Stata load programs using a text editor like Sublime text that allows you to edit multiple rows simultaneously.

@stephenrho
Copy link

Thanks for sharing this! I adapted your code with a function to read specs from the stata load files online. I've tested 2016-2020 and it seems to work well https://gist.github.com/stephenrho/63f114281ee124cc6eec1db2433c9be5

@markdanese
Copy link
Author

@stephenrho -- that sounds great! Thanks for doing that.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment