Skip to content

Instantly share code, notes, and snippets.

@gdbassett
Created April 6, 2020 20:44
Show Gist options
  • Save gdbassett/1a7da5140f3b36596b96e85487245f4d to your computer and use it in GitHub Desktop.
Save gdbassett/1a7da5140f3b36596b96e85487245f4d to your computer and use it in GitHub Desktop.
Proportion of population having died from covid19 over time by country
# https://population.un.org/wpp/Download/Standard/Population/
pop <- readr::read_csv("~/Documents/Data/covid19/population.csv") %>%
select(`Region, subregion, country or area *`, `Country code`, `2020`) %>%
rename(name = `Region, subregion, country or area *`, alpha_3 = `Country code`, population=`2020`) %>%
mutate(population = as.integer(gsub(" ", "", population))) %>%
mutate(population = population * 1000) %>%
glimpse()
# From https://github.com/CSSEGISandData/COVID-19
covid <- readr::read_csv("~/Documents/Development/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%
rename(province_state = `Province/State`, country_region = `Country/Region`) %>%
tidyr::pivot_longer(c(-province_state, -country_region, -Lat, -Long), names_to="date", values_to="deaths") %>%
mutate(date = lubridate::mdy(date)) %>%
mutate(country_region = plyr::mapvalues(country_region, c("US", "Korea, South"), c("United States of America", "Republic of Korea"))) %>%
glimpse()
# https://en.wikipedia.org/wiki/Spanish_flu
# us proportion of deaths to spanish flu: 0.48 to 0.64
inner_join(
pop,
covid,
by=c("name" = "country_region")
) %>%
filter(name %in% c("Germany", "China", "France", "Italy", "Republic of Korea", "United States of America", "United Kingdom")) %>%
glimpse()
base_dates <- covid %>%
group_by(country_region, date) %>%
filter(sum(deaths) == 0) %>%
ungroup() %>%
group_by(country_region) %>%
filter(date == max(date)) %>%
ungroup() %>%
rename(base_date = date) %>%
select(country_region, base_date)
chunk <- inner_join(
pop,
covid,
by=c("name" = "country_region")
) %>%
inner_join(base_dates, by=c("name" = "country_region")) %>%
mutate(days = date - base_date) %>%
filter(name %in% c("Germany", "China", "France", "Italy", "Republic of Korea", "United States of America", "United Kingdom")) %>%
group_by(name, date) %>%
summarize(deaths = sum(deaths, na.rm=TRUE), population = first(population), days= first(days)) %>%
ungroup() %>%
mutate(proportion = deaths / population)
chunk2 <- chunk %>%
group_by(name) %>%
filter(days == max(days)) %>%
ungroup()
ggplot() +
geom_line(
aes(x=days, y=proportion, group=name, color=name),
data = chunk
) +
geom_point(
aes(x=days, y=proportion),
data = chunk2
) +
ggrepel::geom_label_repel(
aes(x=days, y=proportion, label=paste0(scales::comma(deaths), " deaths"), color=name),
data = chunk2
) +
scale_y_continuous(labels=scales::percent_format(accuracy=0.01)) +
scale_x_continuous(limits=c(0,NA)) +
labs(x="Days since first death", y="Percent of population dead", title="COVID-19 deaths as a percentage of country population", color=NULL) +
ggthemes::theme_economist()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment