Created
September 14, 2022 21:05
-
-
Save stephenrho/9c95e0db2e8fbdac834cc1bf14d99bd3 to your computer and use it in GitHub Desktop.
code to calculate scores underlying county health rankings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://www.countyhealthrankings.org/explore-health-rankings/our-methods/calculating-ranks | |
library(data.table) | |
chr = fread("https://www.countyhealthrankings.org/sites/default/files/media/document/analytic_data2022.csv", skip = 1) | |
# https://www.countyhealthrankings.org/sites/default/files/media/document/2022%20Ranked%20and%20Additional%20Measures%2C%20Data%20Sources%20and%20Years_0.pdf | |
measures = list( | |
# note some are reverse coded (see -ve weights) | |
health_behaviors = list( | |
names = c("v009_rawvalue", # Adult smoking | |
"v011_rawvalue", # Adult obesity | |
"v133_rawvalue", # Food environment index | |
"v070_rawvalue", # Physical inactivity | |
"v132_rawvalue", # Access to exercise opportunities | |
"v049_rawvalue", # Excessive drinking | |
"v134_rawvalue", # Alcohol-impaired driving deaths | |
"v045_rawvalue", # Sexually transmitted infections | |
"v014_rawvalue" # Teen births | |
), | |
weights = c(.1, .05, -.02, .02, -.01, .025, .025, .025, .025) | |
), | |
clinical_care = list( | |
names = c("v003_rawvalue", # Uninsured | |
"v004_rawvalue", # Primary care physicians | |
"v088_rawvalue", # Dentists | |
"v062_rawvalue", # Mental health providers | |
"v005_rawvalue", # Preventable hospital stays | |
"v050_rawvalue", # Mammography screening | |
"v155_rawvalue" # Flu vaccinations | |
), | |
weights = c(.05, -.03, -.01, -.01, .05, -.025, -.025) | |
), | |
social_economic_factors = list( | |
names = c("v021_rawvalue", # High school graduation | |
"v069_rawvalue", # Some college | |
"v023_rawvalue", # Unemployment | |
"v024_rawvalue", # Children in poverty | |
"v044_rawvalue", # Income inequality | |
"v082_rawvalue", # Children in single-parent households | |
"v140_rawvalue", # Social associations | |
"v043_rawvalue", # Violent crime | |
"v135_rawvalue" # Injury Deaths | |
), | |
weights = c(-.05, -.05, .1, .075, .025, .025, -.025, .025, .025) | |
), | |
physical_environment = list( | |
names = c("v125_rawvalue", # Air pollution – particulate matter | |
"v124_rawvalue", # Drinking water violations | |
"v136_rawvalue", # Severe housing problems | |
"v067_rawvalue", # Driving alone to work | |
"v137_rawvalue" # Long commute – driving alone | |
), | |
weights = c(.025, .025, .02, .02, .01) | |
) | |
) | |
calc_scores = function(chr, measures, z_state = F){ | |
chr = subset(chr, state != "US") | |
vars = unlist(lapply(measures, \(x) x$names)) | |
# replace missing vals with state means | |
m = chr[, lapply(.SD, mean, na.rm=T), by=state, .SDcols=vars] | |
m = merge(chr[,"state"], m) | |
for (v in vars){ | |
set(chr, i = which(is.na(chr[[v]])), j = v, value = m[which(is.na(chr[[v]])), ..v]) | |
} | |
if (z_state){ | |
# state means and SDs | |
s = chr[, lapply(.SD, sd, na.rm=T), by=state, .SDcols=vars] | |
s = merge(chr[,"state"], s) | |
X = (chr[, ..vars] - m[, ..vars])/s[, ..vars] | |
chr[, (paste0(vars, "_z")) := X] | |
} else{ | |
# z-score w/o considering state | |
chr[, (paste0(vars, "_z")) := lapply(.SD, scale), .SDcols=vars] | |
} | |
# truncate smaller counties | |
#v051_rawvalue = population | |
for (v in paste0(vars, "_z")){ | |
set(chr, i = which(chr$v051_rawvalue <= 20000 & chr[[v]] < -3), j = v, value = -3) | |
set(chr, i = which(chr$v051_rawvalue <= 20000 & chr[[v]] > 3), j = v, value = 3) | |
} | |
# calculate scores | |
# loop over measures... | |
for (m in names(measures)){ | |
mvars = paste0(measures[[m]]$names, "_z") | |
chr[, (m) := as.matrix(chr[, mget(mvars)]) %*% measures[[m]]$weights ] | |
} | |
return(chr) | |
} | |
# chr = calc_scores(chr = chr, measures = measures, z_state = F) | |
# | |
# pairs(chr[, mget(c('health_behaviors', 'clinical_care', 'social_economic_factors', 'physical_environment'))]) | |
# cor(chr[, mget(c('health_behaviors', 'clinical_care', 'social_economic_factors', 'physical_environment'))], use = "pair") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment