Skip to content

Instantly share code, notes, and snippets.

@k5cents
Created October 14, 2019 21:29
Show Gist options
  • Save k5cents/06dc20846c98235a056ad1638f74f7d6 to your computer and use it in GitHub Desktop.
Save k5cents/06dc20846c98235a056ad1638f74f7d6 to your computer and use it in GitHub Desktop.
Comparing the states by partisanship and other development metrics
library(tidyverse)
library(janitor)
library(magrittr)
library(campfin)
library(rvest)
url_edu <- "https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_educational_attainment"
url_life <- "https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_life_expectancy"
url_gdp <- "https://en.wikipedia.org/wiki/List_of_U.S._states_by_GDP_per_capita"
url_hdi <- "https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_Human_Development_Index"
url_income <- "https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_income"
url_pvi <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/partisan-lean/fivethirtyeight_partisan_lean_STATES.csv"
parse_percent <- function(x) {
parse_number(x)/100
}
edu <-
read_html(url_edu) %>%
html_node(".wikitable") %>%
html_table(fill = TRUE) %>%
clean_names("snake") %>%
select(-contains("rank")) %>%
as_tibble() %>%
set_names(c("state", "high_school", "bach_degree", "grad_degree")) %>%
mutate_at(vars(-state), parse_percent) %>%
mutate_at(vars(state), abbrev_state)
life <-
read_html(url_life) %>%
html_node(".wikitable") %>%
html_table(fill = TRUE) %>%
clean_names() %>%
na_if("–") %>%
select(2, 3) %>%
as_tibble() %>%
set_names(c("state", "life_expect")) %>%
mutate_at(vars(state), abbrev_state)
gdp <-
read_html(url_gdp) %>%
html_nodes(".wikitable") %>%
html_table(fill = TRUE) %>%
extract2(2) %>%
select(state = `State`, gdp = `2018`) %>%
as_tibble() %>%
mutate_at(vars(-state), parse_number) %>%
mutate_at(vars(state), abbrev_state)
hdi <-
read_html(url_hdi) %>%
html_node(".wikitable") %>%
html_table(fill = TRUE) %>%
select(state = 2, hdi = 3) %>%
as_tibble() %>%
mutate_at(vars(state), abbrev_state) %>%
filter(state %in% state.abb)
income <-
read_html(url_income) %>%
html_nodes(".wikitable") %>%
html_table(fill = TRUE) %>%
extract2(2) %>%
select(state = 2, ind_income = 3, house_income = 4) %>%
as_tibble() %>%
mutate_at(vars(-state), parse_number) %>%
mutate_at(vars(state), abbrev_state)
pvi <-
read_csv(url_pvi) %>%
separate(
col = pvi_538,
into = c("party", "lean"),
convert = TRUE
) %>%
mutate(
state = abbrev_state(state),
lean = if_else(party == "D", lean * -1L, lean)
)
left_join(pvi, hdi) %>%
ggplot(aes(x = lean, y = hdi)) +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_label(aes(label = state, fill = lean), size = 5) +
scale_fill_distiller(palette = "RdBu", guide = FALSE) +
labs(
x = "Partisan Lean Index (538)",
y = "Human Development Index (UNDP)"
)
ggsave("~/Pictures/hdi_pvi.png", dpi = "retina", height = 5, width = 9)
left_join(pvi, edu) %>%
pivot_longer(cols = c("high_school", "bach_degree", "grad_degree")) %>%
mutate(name = as_factor(name)) %>%
ggplot(aes(x = lean, y = value)) +
facet_wrap(~name, scales = "free_y") +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_label(aes(label = state, fill = lean), size = 5) +
scale_fill_distiller(palette = "RdBu", guide = FALSE) +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Partisan Lean Index (538)",
y = "Percent Attained"
)
ggsave("~/Pictures/pvi_devel.png", dpi = "retina", height = 5, width = 9)
left_join(pvi, life) %>%
ggplot(aes(x = lean, y = life_expect)) +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_label(aes(label = state, fill = lean), size = 5) +
scale_fill_distiller(palette = "RdBu", guide = FALSE) +
labs(
x = "Partisan Lean Index (538)",
y = "Life Expectancy"
)
ggsave("~/Pictures/pvi_life.png", dpi = "retina", height = 5, width = 9)
left_join(pvi, gdp) %>%
ggplot(aes(x = lean, y = gdp)) +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_label(aes(label = state, fill = lean), size = 5) +
scale_fill_distiller(palette = "RdBu", guide = FALSE) +
scale_y_continuous(labels = scales::dollar) +
labs(
x = "Partisan Lean Index (538)",
y = "GDP Per Capita"
)
ggsave("~/Pictures/pvi_gdp.png", dpi = "retina", height = 5, width = 9)
left_join(pvi, income) %>%
pivot_longer(cols = c("ind_income", "house_income")) %>%
ggplot(aes(x = lean, y = value)) +
facet_wrap(~name) +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_label(aes(label = state, fill = lean), size = 5) +
scale_fill_distiller(palette = "RdBu", guide = FALSE) +
scale_y_continuous(labels = scales::dollar) +
labs(
x = "Partisan Lean Index (538)",
y = "Income"
)
ggsave("~/Pictures/pvi_income.png", dpi = "retina", height = 5, width = 9)
url_charity <- "https://www.forbes.com/sites/karstenstrauss/2017/12/04/the-most-and-least-charitable-states-in-the-u-s-in-2017/#4d6150720703"
charity <-
read_html(url_charity) %>%
html_node("table") %>%
html_table(fill = TRUE) %>%
select(state = State, score = `Total Score`) %>%
as_tibble() %>%
mutate_at(vars(state), abbrev_state)
left_join(pvi, charity) %>%
ggplot(aes(x = lean, y = score)) +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_label(aes(label = state, fill = lean), size = 5) +
scale_fill_distiller(palette = "RdBu", guide = FALSE) +
labs(
x = "Partisan Lean Index (538)",
y = "Charitability Score"
)
@k5cents
Copy link
Author

k5cents commented Oct 14, 2019

hdi_pvi

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment