Last active
August 28, 2020 20:14
-
-
Save thoughtfulbloke/5c961fea0d3afada83dfbaf64023e9ea to your computer and use it in GitHub Desktop.
Imputes the 11th and 12th of May in Apple Mobility Data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(lubridate) | |
library(tidyr) | |
library(dplyr) | |
apple <- read.csv("~/Desktop/applemobilitytrends-2020-08-26.csv", | |
stringsAsFactors = FALSE) | |
# columns X2020.05.11 X2020.05.12 are missing, so for that Mon and Tue | |
# working out the median relationships between Sun and Wed and Mon and Tue | |
# and applying them using the Sun and Wed of that week. | |
interpolations <- apple %>% | |
mutate(row_n = row_number()) %>% | |
gather(key=date_as_text, value=routing, 7:ncol(apple)) %>% | |
mutate(date_as_date = ymd(gsub("X","",date_as_text)), | |
date_as_wday = wday(date_as_date, label=TRUE)) %>% | |
filter(date_as_wday %in% c("Sun", "Mon","Tue","Wed")) %>% | |
arrange(row_n,date_as_date) %>% | |
group_by(row_n) %>% | |
mutate(week_seq = cumsum(date_as_wday == "Sun")) %>% | |
group_by(row_n,week_seq) %>% | |
mutate(seq_size=n()) %>% | |
ungroup() %>% | |
filter(seq_size == 4) %>% #only want full Sun-Wed sequences | |
group_by(row_n,week_seq) %>% | |
summarise(Mon_Sun = routing[2]/routing[1], | |
Tue_Sun = routing[3]/routing[1], | |
Mon_Wed = routing[2]/routing[4], | |
Tue_Wed = routing[3]/routing[4]) %>% | |
group_by(row_n) %>% | |
summarise(med_MS = median(Mon_Sun, na.rm=TRUE), | |
med_TS = median(Tue_Sun, na.rm=TRUE), | |
med_MW = median(Mon_Wed, na.rm=TRUE), | |
med_TW = median(Tue_Wed, na.rm=TRUE)) | |
#use the interpolations to reconstruct the missing dates | |
# 2/3rds of the weighting going to the known date 1 day away | |
# one third of the weighting going to the date 2 days away | |
before <- apple #only needed for demonstration purposes | |
apple$X2020.05.11 <- (2* apple$X2020.05.10 * interpolations$med_MS + | |
apple$X2020.05.13 * interpolations$med_MW) / 3 | |
apple$X2020.05.12 <- (apple$X2020.05.10 * interpolations$med_TS + | |
2* apple$X2020.05.13 * interpolations$med_TW) / 3 | |
### | |
# Example of graph with interpolated data | |
library(ggplot2) | |
without_inter <- before %>% | |
filter(region == "Auckland", transportation_type=="driving") %>% | |
gather(key=date_as_text, value=routing, 7:ncol(apple)) %>% | |
mutate(date_as_date = ymd(gsub("X","",date_as_text))) | |
with_inter <- apple %>% | |
filter(region == "Auckland", transportation_type=="driving") %>% | |
gather(key=date_as_text, value=routing, 7:ncol(apple)) %>% | |
mutate(date_as_date = ymd(gsub("X","",date_as_text))) | |
ggplot(with_inter, aes(x=date_as_date, y=routing)) + | |
geom_line(colour="red") + | |
geom_line(data=without_inter)+ | |
ggtitle("Apple Mobility Data: Auckland Driving") + theme_minimal() | |
Author
thoughtfulbloke
commented
Aug 28, 2020
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment