Created
March 17, 2020 01:37
-
-
Save MattSandy/6fa5c0c689e01b18bfab50bd5324e5b6 to your computer and use it in GitHub Desktop.
Plot confirmed COVID-19 cases in SK
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(lubridate) | |
library(ggthemes) | |
library(forecast) | |
library(xts) | |
library(timetk) | |
future <- 7 | |
confirmed <- read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv") | |
South_Korea <- list() | |
# Bind into single dataframe ---------------------------------------------- | |
South_Korea$Confirmed <- confirmed %>% | |
filter(`Country/Region`=="Korea, South") %>% | |
select(-(`Province/State`:Long),`Country/Region`) %>% | |
summarise_if(is.numeric, sum) | |
# Future ------------------------------------------------------------------ | |
South_Korea$melted <- South_Korea$Confirmed %>% | |
gather %>% | |
mutate(date = key %>% mdy) %>% | |
rename(counts = value) | |
South_Korea$xts <- xts(South_Korea$melted %>% select(counts),South_Korea$melted$date) | |
d.arima <- auto.arima(South_Korea$xts) | |
d.forecast <- forecast(d.arima, level = c(95), h = future) | |
# Build that model out ---------------------------------------------------- | |
ts_future <- cbind(y = d.forecast$mean, | |
y.lo = d.forecast$lower, | |
y.hi = d.forecast$upper) %>% | |
xts(South_Korea$melted$date %>% tk_make_future_timeseries(future)) | |
# Format original xts object | |
ts_final <- cbind(y = South_Korea$xts, | |
y.lo = NA, | |
y.hi = NA) %>% rbind(ts_future) %>% | |
tk_tbl %>% mutate(counts = round(counts)) | |
# Plot forecast - Note ggplot South_Korea data frames, tk_tbl() converts to df | |
ts_final$StatSouth_Korea <- "Confirmed Case Count" | |
ts_final$StatSouth_Korea[which(!is.na(ts_final$y.lo))] <- "Predicted Case Count" | |
# +(x)% ------------------------------------------------------------------- | |
ts_final$diff <- 1:nrow(ts_final) %>% sapply(function(index){ | |
if(index==1) { return(0) } | |
(((ts_final$counts[index] - ts_final$counts[index-1]) / ts_final$counts[index-1]) * 100) %>% | |
round(digits = 1) %>% return | |
}) | |
ts_final$label <- paste0(scales::comma(ts_final$counts,big.mark=","), '\n(+',ts_final$diff,'%)') | |
ts_final$label[-seq(1,length(ts_final$label),3)] <- NA | |
ts_final %>% filter(index >= ymd("2020-02-01")) %>% | |
ggplot(aes(x = index, y = counts, fill = StatSouth_Korea, label = label)) + | |
geom_bar(stat="identity") + | |
geom_label(color = "#ffffff", nudge_y = 500) + | |
theme_fivethirtyeight() + | |
#theme_blue() + | |
scale_fill_manual(values = c("#E16036","#D6A99A")) + | |
theme(axis.title = element_text()) + | |
scale_y_continuous(expand = c(0, 0),limits = c(0,max(ts_final$counts)*1.2)) + | |
scale_x_date(date_breaks = "1 days", date_labels = "%b %d") + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1), | |
legend.direction = "horizontal", | |
legend.title = element_blank()) + | |
labs(title = "Confirmed Cases of COVID-19 in South Korea", | |
subtitle = "ARIMA model South Korea for future predictions. Percentages are based on increase from previous day.", | |
caption = "\n@appupio") + | |
ylab("Number of Confirmed Cases\n") + xlab("\n\nDate") | |
ggsave("last.png",width = 16,height = 8,units = "in") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment