Skip to content

Instantly share code, notes, and snippets.

@simoncozens
Last active March 6, 2020 21:27
Show Gist options
  • Save simoncozens/0bd19949634b9e3cf942dc1998c6f949 to your computer and use it in GitHub Desktop.
Save simoncozens/0bd19949634b9e3cf942dc1998c6f949 to your computer and use it in GitHub Desktop.
library(readr)
library(dplyr)
library(ggplot2)
library(tidyr)
logTicks <- function(n = 5, base = 10){
# Divisors of the logarithm base. E.g. for base 10: 1, 2, 5, 10.
divisors <- which((base / seq_len(base)) %% 1 == 0)
mkTcks <- function(min, max, base, divisor){
f <- seq(divisor, base, by = divisor)
return(unique(c(base^min, as.vector(outer(f, base^(min:max), `*`)))))
}
function(x) {
rng <- range(x, na.rm = TRUE)
lrng <- log(rng, base = base)
min <- floor(lrng[1])
max <- ceiling(lrng[2])
tck <- function(divisor){
t <- mkTcks(min, max, base, divisor)
t[t >= rng[1] & t <= rng[2]]
}
# For all possible divisors, produce a set of ticks and count how many ticks
# result
tcks <- lapply(divisors, function(d) tck(d))
l <- vapply(tcks, length, numeric(1))
# Take the set of ticks which is nearest to the desired number of ticks
i <- which.min(abs(n - l))
if(l[i] < 2){
# The data range is too small to show more than 1 logarithm tick, fall
# back to linear interpolation
ticks <- pretty(x, n = n, min.n = 2)
}else{
ticks <- tcks[[i]]
}
return(ticks)
}
}
covid <- read_csv(url("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")) %>% gather(ends_with("/20"), key="Date", value="Cases") %>% filter(!(`Country/Region` == 'Mainland China')) %>% group_by(Date) %>% summarise(Cases=sum(Cases))
covid$Date <- parse_date(covid$Date,"%m/%d/%y")
model <- lm(log(Cases) ~ Date,data=covid,na.action=na.exclude)
newdata <- data.frame(Date=seq(as.Date("2020/02/15"),by="day",length.out=40))
newdata$Cases <- exp(predict(model,data=tail(covid,25), newdata=newdata))
ggplot(covid,aes(x=Date,y=Cases)) + theme_grey(base_size=20) +geom_point() + geom_line(data=newdata,color='red') + scale_x_date(date_breaks="7 days") + theme(axis.text.x = element_text(angle = 70,vjust=0.5)) + scale_y_log10(breaks = logTicks(n = 10), minor_breaks = logTicks(n = 40),labels=scales::comma) + theme(plot.margin=unit(c(10,30,10,10),"pt"),axis.title.x=element_blank())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment