Created
March 12, 2020 16:19
-
-
Save MattSandy/8c189ed3357b5be8f7a000513cad0408 to your computer and use it in GitHub Desktop.
Plots a 7 day forecast for the Coronavirus
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(lubridate) | |
library(ggthemes) | |
library(forecast) | |
library(xts) | |
library(timetk) | |
future <- 7 | |
confirmed <- read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv") | |
confirmed_A <- confirmed %>% select(`Province/State`:`3/9/20`) | |
confirmed_B <- confirmed %>% select(!`1/22/20`:`3/9/20`) | |
# Split data by Count Level Reporting Change ------------------------------ | |
US <- list() | |
US$Confirmed_A <- confirmed_A %>% | |
filter(`Country/Region`=="US") %>% | |
select(-(`Province/State`:Long),`Country/Region`) %>% | |
summarise_if(is.numeric, sum) | |
US$Confirmed_B <- confirmed_B %>% | |
filter(`Country/Region`=="US" & !`Province/State` %in% state.name) %>% | |
select(-(`Province/State`:Long),`Country/Region`) %>% | |
summarise_if(is.numeric, sum) | |
# Bind into single dataframe ---------------------------------------------- | |
US$Confirmed <- US$Confirmed_A %>% cbind(US$Confirmed_B) | |
# Future ------------------------------------------------------------------ | |
US$melted <- US$Confirmed %>% | |
gather %>% | |
mutate(date = key %>% mdy) %>% | |
rename(counts = value) | |
US$xts <- xts(US$melted %>% select(counts),US$melted$date) | |
d.arima <- auto.arima(US$xts) | |
d.forecast <- forecast(d.arima, level = c(95), h = future) | |
# Build that model out ---------------------------------------------------- | |
ts_future <- cbind(y = d.forecast$mean, | |
y.lo = d.forecast$lower, | |
y.hi = d.forecast$upper) %>% | |
xts(US$melted$date %>% tk_make_future_timeseries(future)) | |
#> y y.lo y.hi | |
#> 2012-12-20 148 70.33991 225.6601 | |
#> 2012-12-22 148 70.33991 225.6601 | |
#> 2012-12-23 148 70.33991 225.6601 | |
# Format original xts object | |
ts_final <- cbind(y = US$xts, | |
y.lo = NA, | |
y.hi = NA) %>% rbind(ts_future) %>% tk_tbl | |
# Plot forecast - Note ggplot uses data frames, tk_tbl() converts to df | |
ts_final$Status <- "Confirmed Case Count" | |
ts_final$Status[which(!is.na(ts_final$y.lo))] <- "Predicted Case Count" | |
ts_final %>% | |
ggplot(aes(x = index, y = counts, fill = Status)) + | |
geom_bar(stat="identity") + | |
geom_ribbon(aes(ymin = y.lo, ymax = y.hi), alpha = 0.6, fill = "red") + | |
theme_fivethirtyeight() + | |
scale_fill_manual(values = c("#666666","#B00000")) + | |
theme(axis.title = element_text()) + | |
scale_x_date(date_breaks = "7 days", date_labels = "%b %d") + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + | |
labs(title = "Confirmed Cases of the Corona Virus in the United States", | |
subtitle = "ARIMA model used for future predictions", | |
caption = "\n@appupio") + | |
ylab("Number of Confirmed Cases\n") + xlab("\nDate") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment