Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Last active August 28, 2020 20:14
Show Gist options
  • Save thoughtfulbloke/5c961fea0d3afada83dfbaf64023e9ea to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/5c961fea0d3afada83dfbaf64023e9ea to your computer and use it in GitHub Desktop.
Imputes the 11th and 12th of May in Apple Mobility Data
library(lubridate)
library(tidyr)
library(dplyr)
apple <- read.csv("~/Desktop/applemobilitytrends-2020-08-26.csv",
stringsAsFactors = FALSE)
# columns X2020.05.11 X2020.05.12 are missing, so for that Mon and Tue
# working out the median relationships between Sun and Wed and Mon and Tue
# and applying them using the Sun and Wed of that week.
interpolations <- apple %>%
mutate(row_n = row_number()) %>%
gather(key=date_as_text, value=routing, 7:ncol(apple)) %>%
mutate(date_as_date = ymd(gsub("X","",date_as_text)),
date_as_wday = wday(date_as_date, label=TRUE)) %>%
filter(date_as_wday %in% c("Sun", "Mon","Tue","Wed")) %>%
arrange(row_n,date_as_date) %>%
group_by(row_n) %>%
mutate(week_seq = cumsum(date_as_wday == "Sun")) %>%
group_by(row_n,week_seq) %>%
mutate(seq_size=n()) %>%
ungroup() %>%
filter(seq_size == 4) %>% #only want full Sun-Wed sequences
group_by(row_n,week_seq) %>%
summarise(Mon_Sun = routing[2]/routing[1],
Tue_Sun = routing[3]/routing[1],
Mon_Wed = routing[2]/routing[4],
Tue_Wed = routing[3]/routing[4]) %>%
group_by(row_n) %>%
summarise(med_MS = median(Mon_Sun, na.rm=TRUE),
med_TS = median(Tue_Sun, na.rm=TRUE),
med_MW = median(Mon_Wed, na.rm=TRUE),
med_TW = median(Tue_Wed, na.rm=TRUE))
#use the interpolations to reconstruct the missing dates
# 2/3rds of the weighting going to the known date 1 day away
# one third of the weighting going to the date 2 days away
before <- apple #only needed for demonstration purposes
apple$X2020.05.11 <- (2* apple$X2020.05.10 * interpolations$med_MS +
apple$X2020.05.13 * interpolations$med_MW) / 3
apple$X2020.05.12 <- (apple$X2020.05.10 * interpolations$med_TS +
2* apple$X2020.05.13 * interpolations$med_TW) / 3
###
# Example of graph with interpolated data
library(ggplot2)
without_inter <- before %>%
filter(region == "Auckland", transportation_type=="driving") %>%
gather(key=date_as_text, value=routing, 7:ncol(apple)) %>%
mutate(date_as_date = ymd(gsub("X","",date_as_text)))
with_inter <- apple %>%
filter(region == "Auckland", transportation_type=="driving") %>%
gather(key=date_as_text, value=routing, 7:ncol(apple)) %>%
mutate(date_as_date = ymd(gsub("X","",date_as_text)))
ggplot(with_inter, aes(x=date_as_date, y=routing)) +
geom_line(colour="red") +
geom_line(data=without_inter)+
ggtitle("Apple Mobility Data: Auckland Driving") + theme_minimal()
@thoughtfulbloke
Copy link
Author

Screen Shot 2020-08-29 at 8 09 02 AM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment