Last active
March 31, 2020 19:52
-
-
Save diegovalle/f8e56c7ea33c4c5730e3bc0854410dd0 to your computer and use it in GitHub Desktop.
cumsum.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Auto-Install the following packages | |
.packs <- c("ggplot2", "tabulizer", "dplyr", "stringr", | |
"rvest", "hrbrthemes", "scales", "tidyr", | |
"lubridate", "directlabels") | |
.success <- suppressWarnings(sapply(.packs, require, character.only = TRUE)) | |
if (length(names(.success)[!.success])) { | |
install.packages(names(.success)[!.success]) | |
sapply(names(.success)[!.success], require, character.only = TRUE) | |
} | |
options(stringsAsFactors = FALSE) | |
page <- read_html("https://serendipia.digital/2020/03/datos-abiertos-sobre-casos-de-coronavirus-covid-19-en-mexico/") | |
links <- page %>% html_nodes("a") %>% html_attr('href') | |
pos <- data.frame() | |
for (url in links[which(str_detect(links, "positivo.*\\.csv"))]) { | |
df = read.csv(url) | |
#df <- filter(df, Procedencia == "Contacto") | |
df$date <- as.Date(df$Fecha.de.Inicio.de.síntomas, format = "%d/%m/%Y") | |
df <- df %>% | |
group_by(date) %>% | |
tally() %>% | |
arrange(date) %>% | |
mutate(cumsum = cumsum(n), | |
source = str_extract(url, "[0-9]{4}\\.[0-9]{2}\\.[0-9]{2}")) | |
df$source <- str_replace_all(df$source, "\\.", "-") | |
pos <- rbind(pos, df) | |
} | |
pos$date <- as.Date(pos$date) | |
ggplot(pos, aes(date, cumsum, group = as.Date(source), color = as.Date(source))) + | |
geom_line() + | |
scale_colour_gradient("Fecha de\npublicación", trans = "date", | |
low = "#d9d9d9", high = "#000000") + | |
theme_ipsum() + | |
xlab("fecha de inicio de síntomas") + | |
ylab("casos acumulados") + | |
labs(title = "Casos acumulados de coronavirs, por fecha de inicio de síntomas", | |
subtitle = "Cada línea corresponde a los datos presentados en un reporte de la SSA", | |
caption = "Comunicado Técnico Diario. https://www.gob.mx/salud/documentos/coronavirus-covid-19-comunicado-tecnico-diario-238449") | |
pos$source <- as.Date(pos$source) | |
p <- pos %>% | |
filter(date >= "2020-03-15") %>% | |
group_by(date) %>% | |
arrange(source, date) %>% | |
mutate(day = time_length(interval(date, source), "days")) %>% | |
ggplot(aes(day, n, group = date, color = date)) + | |
geom_line() + | |
scale_colour_gradient("Fecha de\npublicación", trans = "date", | |
low = "#d9d9d9", high = "#000000") + | |
theme_ipsum() + | |
xlim(0,16) + | |
labs(title = "Casos acumulados de coronavirus desde el 15 de marzo, por día de inicio de síntomas", | |
subtitle = paste0("Cada línea representa una fecha de incio de síntomas", | |
". Por ejemplo, ", | |
"para los casos cuyos síntomas ", | |
"iniciaron el 2020-03-16, después de 5 días (el 21 de marzo)\n", | |
"apenas habían 21 casos, para el día 10 ", | |
"ya eran 65, y para el día 13 (el 29 de marzo) eran 88"), | |
caption = "Comunicado Técnico Diario. https://www.gob.mx/salud/documentos/coronavirus-covid-19-comunicado-tecnico-diario-238449") + | |
xlab("días desde el inicio de síntomas hasta el registro como positivo") + | |
ylab("número de casos") | |
direct.label(p, "last.bumpup") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment