Created
November 30, 2020 10:17
-
-
Save coulmont/610ed5417f840a3897709ceec81d116c to your computer and use it in GitHub Desktop.
décès quotidien, coordonnees polaires
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Comptage des décès covid et de la surmortalité en 2020 | |
library(tidyverse) | |
library(lubridate) | |
#library(ngram) | |
library(hrbrthemes) | |
library(glue) | |
setwd("~/Dropbox/projets-R/2020-covid/") | |
# date du jour | |
aujourdhui <- Sys.Date() | |
# décès 2001-2019 quotidiens | |
load("data/dc_jour.Rdata") | |
# Bisextilisation ------------------------------------- | |
# pour superposer les années l'une sur l'autre, il faut | |
# créer des faux 29 février | |
# et leur affecter une valeur moyenne | |
ving_neuf_fevriers <- dc_jour %>% | |
# mutate(date_fictive = dmy(paste(jour,mois,"2020",sep="-"))) %>% | |
filter((mois == 2 & jour %in% c(28,29)) | (mois == 3 & jour ==1)) %>% | |
complete(annee, nesting(jour, mois)) %>% | |
group_by(annee) %>% | |
arrange(annee,mois,jour) %>% | |
mutate(N = case_when(is.na(N) ~ as.integer(round(.5*(lead(N) + lag(N)))) , | |
TRUE ~ N) ) %>% | |
group_by(jour) %>% | |
mutate(maxi = max(N), | |
mini = min(N)) | |
# jointure du fichier des 29 février | |
dc_jour <- dc_jour %>% | |
filter(! (mois == 2 & jour == 29)) %>% | |
bind_rows(ving_neuf_fevriers %>% filter(mois == 2 & jour == 29)) | |
dc_jour <- dc_jour %>% | |
filter(annee<2020) %>% | |
mutate(type = "deces_constates", | |
categorie = "fichier_deces") %>% | |
mutate(date_fictive = dmy(paste(jour,mois,"2020",sep="-"))) | |
# décès covid ecdc | |
# read the Dataset sheet into “R”. The dataset will be called "deces_ecdc". | |
# GET("https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-05-04.xlsx", | |
# write_disk(tf <- tempfile(fileext = ".xlsx"))) | |
# deces_ecdc <- readxl::read_excel(tf) | |
deces_ecdc <- read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv") #, na.strings = "", fileEncoding = "UTF-8-BOM") | |
deces_ecdc <- deces_ecdc %>% filter(countriesAndTerritories=="France") %>% | |
# bind_rows(data.frame(deaths=900, | |
# dateRep = ("14/11/2020"), | |
# date_jour=18581)) %>% | |
#mutate(date_fictive = ymd(dateRep)) %>% | |
mutate(date_fictive = dmy(dateRep)) %>% | |
arrange(date_fictive) %>% | |
select(date_fictive, | |
deces_covid = deaths) %>% | |
left_join(dc_jour %>% | |
filter(annee<2020, annee>2009) %>% | |
mutate(date_fictive = dmy(paste(jour,mois,"2020",sep="-"))) %>% | |
group_by(date_fictive) %>% | |
summarize(N=mean(N))) %>% | |
ungroup() %>% | |
# moyenne sur sept jours car beaucoup de variations (EHPAD en retard, etc...) | |
mutate(deces_covid_m = zoo::rollmean(deces_covid,7,align="left", fill = list(tail(deces_covid,6) ) ) ) %>% | |
# filter(deces_covid_m>0) %>% | |
mutate(N = N + deces_covid_m) %>% | |
filter(date_fictive > ymd("2020-02-15")) %>% | |
select(date_fictive,N) %>% | |
mutate(type = "deces_estimes") %>% | |
mutate(categorie = "ecdc") | |
poids <-c(8,5,4,3,2,1, | |
5,8,5,4,3,2, | |
4,5,8,5,4,3, | |
3,4,5,8,5,4, | |
2,3,4,5,8,5, | |
1,2,3,4,5,8) | |
rollwm <- function(valeurs,poids) { | |
wm <- c(0,0,0,0,0,0) | |
n <- length(poids)/6 | |
for (i in 0:(n-1)) { | |
poids_calc <- poids[ (6*i+1):(6*i+6)] | |
wm[i+1] <- weighted.mean(valeurs,poids_calc) | |
} | |
return(wm) | |
} | |
deces_ecdc$N[(length(deces_ecdc$N)-5):length(deces_ecdc$N)] <- rollwm(tail(deces_ecdc$N),poids) | |
# deces_ecdc <- deces_ecdc %>% | |
# mutate(N = ifelse(date_fictive < (max(date_fictive)-5), N, | |
# rollwm(tail(deces_ecdc$N),poids) ) ) | |
# décès insee, fichier détaillé individuel | |
dc_insee_temporaire <- read_csv2("data/2020-04-30_detail/DC_jan2018-avr2020_det.csv") | |
# on ne garde que les décès de 2020, car les décès précédents sont dans dc_jour | |
dc_insee_jour <- dc_insee_temporaire %>% | |
filter(ADEC > 2019) %>% | |
# "date_fictive" pour pouvoir tracer la courbe de 2020 sur le graphique 2001-2019 | |
mutate(date_fictive = ymd(paste("2020",MDEC,JDEC,sep="-"))) %>% | |
group_by(date_fictive) %>% | |
summarize(N=n()) %>% | |
mutate(annee = 2020) %>% | |
# on garde Janvier-mars | |
filter(date_fictive<ymd("2020-03-02")) %>% | |
mutate(type = "deces_constates", | |
categorie = "insee_hebdo") | |
rm(dc_insee_temporaire) | |
# décès insee estimation s+4 | |
# -------------------------- | |
load("data/deces_insee_provisoires.Rdata") | |
# compilation pour graphiques | |
# --------------------------- | |
dc <- bind_rows(dc_jour,dc_insee_jour,deces_ecdc,deces_insee_provisoires) | |
# calcul de l'aire sous les courbes | |
# --------------------------------- | |
moyenne <- dc %>% | |
filter(categorie == "fichier_deces") %>% | |
filter(annee>2009,annee<2020) %>% | |
group_by(date_fictive) %>% | |
summarize(moyenne=mean(N)) %>% | |
ungroup() | |
# aire 2020 avec décès ECDC | |
#-------------------------- | |
# aire_2020 <- dc %>% filter(categorie %in% c("estimation_insee","ecdc")) %>% | |
# group_by(date_fictive) %>% | |
# filter(N == max(N) & date_fictive > ymd("2020-03-07")) %>% | |
# select(date_fictive,N) %>% | |
# ungroup() %>% | |
# left_join(moyenne,by= "date_fictive") %>% | |
# mutate(surmortalite = N-moyenne) | |
# | |
# total_aire_2020 <- aire_2020 %>% | |
# summarize(total = sum(surmortalite)) %>% | |
# mutate(total = 1000*(round(total / 1000) )) %>% | |
# as.numeric() %>% round() | |
# aire_2020 juste avec décès insee | |
aire_2020_printemps <- dc %>% filter(categorie %in% c("estimation_insee")) %>% | |
group_by(date_fictive) %>% | |
filter(N == max(N) & date_fictive > ymd("2020-03-07") & date_fictive<ymd("2020-06-01")) %>% | |
select(date_fictive,N) %>% | |
ungroup() %>% | |
left_join(moyenne,by= "date_fictive") %>% | |
mutate(surmortalite = N-moyenne) | |
total_aire_2020_printemps <- aire_2020_printemps %>% | |
summarize(total = sum(surmortalite)) %>% | |
mutate(total = 1000*(round(total / 1000) )) %>% | |
as.numeric() %>% round() | |
# aire 2020 automne | |
aire_2020_automne <- dc %>% | |
filter(categorie %in% c("estimation_insee")) %>% | |
group_by(date_fictive) %>% | |
filter(N == max(N) & date_fictive > ymd("2020-09-01") & date_fictive<ymd("2020-12-31")) %>% | |
select(date_fictive,N) %>% | |
ungroup() %>% | |
left_join(moyenne,by= "date_fictive") %>% | |
mutate(surmortalite = N-moyenne) | |
total_aire_2020_automne <- aire_2020_automne %>% | |
summarize(total = sum(surmortalite)) %>% | |
mutate(total = 1000*(round(total / 1000) )) %>% | |
as.numeric() %>% round() | |
# aire 2003 | |
aire_2003 <- dc %>% filter(annee == 2003) %>% | |
select(date_fictive,N,date_deces) %>% | |
left_join(moyenne,by= "date_fictive") %>% | |
mutate(surmortalite = N-moyenne) %>% | |
filter(date_deces>ymd("2003-08-01"), | |
date_deces<ymd("2003-08-20")) | |
total_aire_2003 <- aire_2003 %>% | |
summarize(total = sum(surmortalite)) %>% | |
mutate(total = 1000*(round(total / 1000) )) %>% | |
as.numeric() %>% round() | |
ggplot(dc) + | |
# courbe des décès 2001-2019 | |
geom_line(data = . %>% filter(annee<2020), | |
aes(date_fictive,N, group = annee), | |
color="black",alpha=.1,size=.1) + | |
coord_polar() + | |
scale_y_continuous(limits = c(0,3615), | |
expand=expansion(add=c(0,0))) + | |
# aire grisée maxi - mini quotidien | |
geom_ribbon(data = . %>% filter(annee == 2016) , | |
aes(x= date_fictive, #y = N, | |
ymin = mini, ymax = maxi), | |
inherit.aes = F, | |
alpha=.2, color=NA) + | |
# # aire sous la courbe 2020 printemps | |
geom_ribbon(data = aire_2020_printemps, | |
aes(x = date_fictive, ymin = moyenne, ymax = N), | |
fill = "cornflowerblue", #color="cornflowerblue", | |
alpha=.5, | |
outline.type = "full") + | |
# aire sous la courbe 2020 automne | |
geom_ribbon(data = aire_2020_automne, | |
aes(x = date_fictive, ymin = moyenne, ymax = N), | |
fill = "cornflowerblue", #color="cornflowerblue", | |
alpha=.5, | |
outline.type = "full") + | |
# aire sous la courbe 2003 | |
geom_ribbon(data = aire_2003, | |
aes(x = date_fictive, ymin = moyenne, ymax = N), | |
fill = "orange", alpha=.3) + | |
# courbe de l'année 2003 | |
geom_line(data = . %>% filter(annee == 2003), | |
aes(date_fictive,N, group = annee), | |
color="navyblue",alpha=.1,size=.2) + | |
# courbe de la moyenne pour les années 2010-2019 | |
geom_line(data = . %>% | |
filter(categorie == "fichier_deces" & annee>2009 & annee<2020) %>% | |
group_by(date_fictive) %>% | |
summarize(N=mean(N)) %>% ungroup() %>% | |
mutate(annee=2010), | |
aes(date_fictive,N), | |
size=.5, color="black") + | |
# en rouge 2020 janvier - mars | |
geom_line(data = . %>% filter(annee==2020, categorie == "insee_hebdo"), | |
aes(date_fictive,N), | |
color="red") + | |
# en rouge 2020 mars - mai , décès estimés | |
geom_line(data = . %>% filter(categorie == "estimation_insee"), | |
aes(date_fictive, N), | |
inherit.aes=F, | |
color="red") + | |
# en vert les décès covid ECDC | |
geom_line(data = . %>% filter(categorie == "ecdc" & date_fictive > ymd("2020-02-29")), | |
aes(date_fictive, N), | |
inherit.aes=F, | |
color="darkgreen") + | |
# annotations | |
annotate(geom="text",x= ymd("2020-08-15"), y = 3200, | |
label = paste("Canicule de 2003 :\nEnviron", total_aire_2003, "décès\nen plus de la moyenne."), adj =0 ) + | |
annotate(geom="text",x= ymd("2020-05-01"), y = 2000, | |
label = "2020 : estimations provisoires\n(données INSEE redressées)", adj=0, color="firebrick1", size=2.5) + | |
annotate(geom="text",x= ymd("2020-06-01"), y = 2000, | |
label = "Moyenne décennale + Décès COVID19 \n(Hôpitaux et Ehpad)", adj=0, color="darkgreen",size=2.5) + | |
annotate(geom="text",x = ymd("2020-01-10"),y = 3100, adj=0, | |
label = paste("Printemps :\n",total_aire_2020_printemps," décès en plus de la moyenne", sep="")) + | |
annotate(geom="text",x= ymd("2020-11-01"), y = 1250, label = "Moyenne\n2010-2019", adj=0, size=4) + | |
geom_segment(data = data.frame(x=ymd("2020-10-30"), y=1250, | |
xend = ymd("2020-10-01"), yend = 1490, | |
annee=2010), | |
aes(x=x,y=y,xend=xend,yend=yend), color = "black", | |
arrow = arrow(length = unit(0.1, "inches"), type="closed") ) + | |
geom_segment(data = data.frame(x=ymd("2020-02-02"), y=3000, | |
xend = ymd("2020-04-01"), yend = 2000, | |
annee=2020), | |
aes(x=x,y=y,xend=xend,yend=yend), color = "black", | |
inherit.aes = F, | |
arrow = arrow(length = unit(0.1, "inches"), type="closed") ) + | |
scale_x_date(date_breaks = "1 month", date_labels = "%B", expand=expansion(add=c(0,0))) + | |
labs(title = "Nombre quotidien de décès en France, 2001-2020", | |
subtitle = "En rouge, l'année 2020, en gris les années 2001 à 2019. Vert : Moyenne décennale + décès covid dans les hôpitaux et les Ehpad.", | |
y=NULL,x=NULL, | |
caption = glue('Sources : Fichier des décès sur data.gouv.fr ', | |
'et Fichier des décès sur insee.fr et ECDC ', | |
'({format(aujourdhui, "%d %B %Y")}) ', | |
'| Graphique et erreurs : B. Coulmont.' )) + | |
theme_ipsum(plot_margin = margin(5, 5, 0, 5), | |
plot_title_margin=5 , | |
subtitle_margin=5, | |
base_family = "Helvetica") + | |
theme(plot.title.position="plot", | |
legend.position = "none", | |
axis.title = element_text(margin = margin(5,5,5,5))) |
[https://www.dropbox.com/s/ftqns4iinstqfjy/dc_jour.Rdata?dl=1](lien dropbox)
J'espère que ca marche : sinon me contacter par mail.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi, could you share dc_jour.Rdata please ?