Skip to content

Instantly share code, notes, and snippets.

@levithatcher
Last active January 25, 2019 21:15
Show Gist options
  • Save levithatcher/070496ca48c165d7ced37e0ffcd24dc7 to your computer and use it in GitHub Desktop.
Save levithatcher/070496ca48c165d7ced37e0ffcd24dc7 to your computer and use it in GitHub Desktop.
Create county-level choropleths of low-birth weight, premature death, and their relationship to county income
removeCommasInNumber <- function(column) {
column <- as.numeric(gsub(",", "", column))
column
}
createChoropleth <- function(df,
colToPlot,
title,
legend,
numColors=1,
NAReplace=NULL) {
library(choroplethr)
library(choroplethrMaps)
library(ggplot2)
# Remove commas from column of interest
if (!is.numeric(df[[colToPlot]])) {
df[[colToPlot]] <- removeCommasInNumber(df[[colToPlot]])
}
# Remove state rows from dataset
df <- subset(df, COUNTYCODE != 0)
df$STATECODE <- as.integer(df$STATECODE)
df$COUNTYCODE <- as.integer(df$COUNTYCODE)
# Pad county digits
df$COUNTYCODE <- sprintf("%03d", df$COUNTYCODE)
# Concatenate and create FIPS
df$FIPSCODE <- as.numeric(paste0(df$STATECODE,df$COUNTYCODE))
# Reduce dataset and rename cols for county_choropleth func
df <- subset(df, select = c('FIPSCODE', colToPlot))
colnames(df) <- c("region","value")
print("NA count:")
print(count(is.na(df["value"])))
# Fill NA cells with something (so choropleth works)
if (!is.null(NAReplace)) {
df["value"][is.na(df["value"])] <- NAReplace
}
str(df)
# Plot data on a US map
county_choropleth(df, num_colors = numColors, legend = legend) +
ggtitle(title) +
theme(plot.title = element_text(hjust = 0.5))
}
createPercentiles <- function(x) {
(x - min(x, na.rm = TRUE))/(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}
library(healthcareai)
# Start analysis -- read in data
# Data comes from here: http://www.countyhealthrankings.org/rankings/data
df <- read.csv('2015 CHR Analytic Data.csv')
# Remove state-summary rows
df <- subset(df, COUNTYCODE != 0)
# Prepare columns
# Convert to percentage when plotting its choropleth
#df$Low.birthweight.Value <- df$Low.birthweight.Value * 100
df$Median.household.income.Value <- removeCommasInNumber(df$Median.household.income.Value)
df$Premature.death.Value <- removeCommasInNumber(df$Premature.death.Value)
# Change from factors to numerics
#df$Median.household.income.Value <- as.numeric(df$Median.household.income.Value)
#df$Premature.death.Value <- as.numeric(df$Premature.death.Value)
# Change to percentiles
df$Median.household.income.Percentile <- round(createPercentiles(df$Median.household.income.Value), 2)
# Change to percentiles
# We subtract 1 from income and do abs, since we want 100th percentile to be desirable
df$Low.birthweight.Percentile <- round(abs(createPercentiles(df$Low.birthweight.Value) - 1), 2)
df$Premature.death.Percentile <- round(abs(createPercentiles(df$Premature.death.Value) - 1), 2)
# Calculate diff between county income and health outcomes (leads to -1 to 1)
df$LBW.PATWI <- round(df$Low.birthweight.Percentile -
df$Median.household.income.Percentile, 2)
df$Early.Death.PATWI <- round(df$Premature.death.Percentile -
df$Median.household.income.Percentile, 2)
df <- subset(df, select = c('County','State','Median.household.income.Value','Median.household.income.Percentile','Low.birthweight.Percentile','LBW.PATWI','Premature.death.Percentile','Early.Death.PATWI'))
str(df)
# Plot choropleth
createChoropleth(df,
#colToPlot = 'Median.household.income.Value',
#colToPlot = 'Premature.death.Value',
#colToPlot = 'Low.birthweight.Value',
#colToPlot = 'LBWPcntlMinusIncomePcntl',
colToPlot = 'EarlyDeathPctlMinusIncomePctl',
title = 'Prime Years Lost Compared to Income by County',
legend = 'Percentile Diff',
numColors = 7,
NAReplace = 0)
# Find statistical relationships
df$Low.birthweight.Value <- imputeColumn(df$Low.birthweight.Value)
calculateTargetedCorrelations(df[,c('Median.household.income.Value',
'Premature.death.Value',
'Low.birthweight.Value')],
targetCol = 'Median.household.income.Value')
lm(Premature.death.Value ~ Median.household.income.Value, data = df)
lm(Low.birthweight.Value ~ Median.household.income.Value, data = df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment