Created
September 20, 2021 18:41
-
-
Save MattSandy/65c99f25c58a3cd4706c217536dc04ba to your computer and use it in GitHub Desktop.
Plots and gives some info on correlation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(lubridate) | |
library(zoo) | |
library(scales) | |
# Read Data --------------------------------------------------------------- | |
confirmed <- read_csv("https://github.com/nytimes/covid-19-data/raw/master/us-states.csv") | |
# Get New Deaths + Cases -------------------------------------------------- | |
df <- confirmed | |
df$new_deaths <- df$deaths - df$deaths[c(1,1:(length(df$deaths)-1))] | |
df$new_cases <- df$cases - df$cases[c(1,1:(length(df$cases)-1))] | |
df$date <- ymd(df$date) | |
# Filtering out where new_cases is less than 0 | |
df_dates <- data.frame( | |
date = seq(ymd("2021-01-01"),max(df$date),1) | |
) %>% | |
left_join(df %>% filter(new_cases>=0)) %>% | |
mutate( | |
roll_mean_deaths = new_deaths %>% rollmean(7,fill = 0, align = "right"), | |
roll_mean_cases = new_cases %>% rollmean(7,fill = 0, align = "right") | |
) | |
# Set Offset | |
offset <- 12 | |
df_dates$offset_deaths <- df_dates$roll_mean_deaths | |
df_dates$offset_deaths[1:(nrow(df_dates)-offset)] <- df_dates$offset_deaths[((offset+1):nrow(df_dates))+offset] | |
df_dates$offset_deaths[(nrow(df_dates)-(offset-1)):nrow(df_dates)] <- NA | |
# Plot -------------------------------------------------------------------- | |
ggplot(df_dates,aes(x=roll_mean_cases,y=offset_deaths)) + geom_point() + ggtitle(offset) + geom_smooth() | |
lm(offset_deaths~roll_mean_cases,data = df_dates)%>% summary | |
for(offset in 10:20) { | |
df_dates$offset_deaths <- df_dates$roll_mean_deaths | |
df_dates$offset_deaths[1:(nrow(df_dates)-offset)] <- df_dates$offset_deaths[((offset+1):nrow(df_dates))+offset] | |
df_dates$offset_deaths[(nrow(df_dates)-(offset-1)):nrow(df_dates)] <- NA | |
# Plot -------------------------------------------------------------------- | |
p1 <- ggplot(df_dates,aes(x=roll_mean_cases,y=offset_deaths)) + geom_point() + ggtitle(offset) + geom_smooth() | |
print(p1) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment