Created
April 6, 2020 20:44
-
-
Save gdbassett/1a7da5140f3b36596b96e85487245f4d to your computer and use it in GitHub Desktop.
Proportion of population having died from covid19 over time by country
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://population.un.org/wpp/Download/Standard/Population/ | |
pop <- readr::read_csv("~/Documents/Data/covid19/population.csv") %>% | |
select(`Region, subregion, country or area *`, `Country code`, `2020`) %>% | |
rename(name = `Region, subregion, country or area *`, alpha_3 = `Country code`, population=`2020`) %>% | |
mutate(population = as.integer(gsub(" ", "", population))) %>% | |
mutate(population = population * 1000) %>% | |
glimpse() | |
# From https://github.com/CSSEGISandData/COVID-19 | |
covid <- readr::read_csv("~/Documents/Development/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>% | |
rename(province_state = `Province/State`, country_region = `Country/Region`) %>% | |
tidyr::pivot_longer(c(-province_state, -country_region, -Lat, -Long), names_to="date", values_to="deaths") %>% | |
mutate(date = lubridate::mdy(date)) %>% | |
mutate(country_region = plyr::mapvalues(country_region, c("US", "Korea, South"), c("United States of America", "Republic of Korea"))) %>% | |
glimpse() | |
# https://en.wikipedia.org/wiki/Spanish_flu | |
# us proportion of deaths to spanish flu: 0.48 to 0.64 | |
inner_join( | |
pop, | |
covid, | |
by=c("name" = "country_region") | |
) %>% | |
filter(name %in% c("Germany", "China", "France", "Italy", "Republic of Korea", "United States of America", "United Kingdom")) %>% | |
glimpse() | |
base_dates <- covid %>% | |
group_by(country_region, date) %>% | |
filter(sum(deaths) == 0) %>% | |
ungroup() %>% | |
group_by(country_region) %>% | |
filter(date == max(date)) %>% | |
ungroup() %>% | |
rename(base_date = date) %>% | |
select(country_region, base_date) | |
chunk <- inner_join( | |
pop, | |
covid, | |
by=c("name" = "country_region") | |
) %>% | |
inner_join(base_dates, by=c("name" = "country_region")) %>% | |
mutate(days = date - base_date) %>% | |
filter(name %in% c("Germany", "China", "France", "Italy", "Republic of Korea", "United States of America", "United Kingdom")) %>% | |
group_by(name, date) %>% | |
summarize(deaths = sum(deaths, na.rm=TRUE), population = first(population), days= first(days)) %>% | |
ungroup() %>% | |
mutate(proportion = deaths / population) | |
chunk2 <- chunk %>% | |
group_by(name) %>% | |
filter(days == max(days)) %>% | |
ungroup() | |
ggplot() + | |
geom_line( | |
aes(x=days, y=proportion, group=name, color=name), | |
data = chunk | |
) + | |
geom_point( | |
aes(x=days, y=proportion), | |
data = chunk2 | |
) + | |
ggrepel::geom_label_repel( | |
aes(x=days, y=proportion, label=paste0(scales::comma(deaths), " deaths"), color=name), | |
data = chunk2 | |
) + | |
scale_y_continuous(labels=scales::percent_format(accuracy=0.01)) + | |
scale_x_continuous(limits=c(0,NA)) + | |
labs(x="Days since first death", y="Percent of population dead", title="COVID-19 deaths as a percentage of country population", color=NULL) + | |
ggthemes::theme_economist() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment