Last active
October 7, 2020 17:56
-
-
Save mhoehle/f8d08bc7ad04c0c144a11589f41ca921 to your computer and use it in GitHub Desktop.
Make an age-stratified plot of the weekly COVID-19 incidence in Germany
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################# | |
# Illustrative script showing how to use SurvStat data | |
# to make an age-stratified COVID-19 incidence plot for Germany. | |
# | |
# Author: Michael Höhle <https://www.math.su.se/~hoehle> | |
# Date: 2020-10-07 | |
# Code License: MIT (https://en.wikipedia.org/wiki/MIT_License) | |
############################################################# | |
# Load packages | |
library(tidyverse) | |
library(lubridate) | |
###################### | |
## Helper functions | |
###################### | |
# Convert NA entries to the numeric value 0 | |
na2zero <- function(x) { x[is.na(x)] <- 0 ; return(x)} | |
# Determine midpoint of an age group in the format A50..55 | |
midpoint <- function(x) { | |
str_sub(x, start=2, end=3) %>% as.numeric() + 2.5 | |
} | |
####################################################################### | |
# Manual steps for downloading the data. | |
# - download data from survstat https://survstat.rki.de | |
# - open file and store again in UTF-8 encoding (file has a naste UTF-16 encoding) | |
# - fix first "" in line 2 to contain week number, i.e. replace "" to "W" | |
# | |
# Big question: How to do this automatic? | |
###################################################################### | |
# Adopt locale for the file reading | |
locale <- default_locale() | |
locale$decimal_mark <- "," | |
locale$grouping_mark <- "." | |
#Load data | |
covid <- read_tsv(file="../survstat/Data.csv", skip=1, na="", locale=locale) | |
# Sum up raw incidence to detect weeks without any cases | |
total <- covid %>% select(-W) %>% as.matrix() %>% rowSums(na.rm=TRUE) | |
# Which weeks to show? | |
first_week <- 10 | |
last_week <- max(which(total > 0)) | |
# Restrict data to the relevant weeks and fill with zeroes | |
covid <- covid %>% mutate_all(as.numeric) %>% | |
filter(W >= first_week & W <= last_full_week) %>% | |
mutate_all(na2zero) %>% | |
select(-Unbekannt) | |
# Convert to tidy format | |
covid_long <- covid %>% | |
pivot_longer(cols=-W, names_to="alter", values_to="inzidenz") %>% | |
mutate(alter_mitte = midpoint(alter), | |
alter5 = cut(alter_mitte, breaks=c(seq(0,80, by=5),Inf), right=FALSE), | |
inzidenz_discrete = cut_number(inzidenz, n=8)) %>% | |
mutate(alter = factor(alter)) | |
covid_long | |
covid_long$inzidenz_discrete %>% table() | |
# Matrix type of plot showing incidence over week and age-group | |
ggplot(covid_long, aes(x=W, y=alter5, fill=inzidenz_discrete)) + | |
geom_raster() + | |
scale_x_continuous(breaks= seq(first_week, last_week, by=2), | |
minor_breaks = seq(first_week,last_week,by=1), expand=c(0,0)) + | |
scale_fill_brewer(type="seq", palette="BuPu", guide=FALSE) + | |
theme_minimal() + | |
xlab("Week") + ylab("Age group") + | |
geom_text(aes(x=W, y=as.numeric(alter5), label=round(inzidenz,digits=0)), cex=1.5) + | |
theme(legend.position="bottom") + | |
ggtitle("Weekly reported COVID-19 incidence (per 100,000 population) in Germany") + | |
labs(caption="Data Source: https://survstat.rki.de\nAuthor: @m_hoehle") | |
ggsave(file="ageweek.png", width=8, height=4, dpi=300) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment