Skip to content

Instantly share code, notes, and snippets.

@jebyrnes
Created April 7, 2021 19:00
Show Gist options
  • Save jebyrnes/14743f9b42b8eae23a0be717ce0ad816 to your computer and use it in GitHub Desktop.
Save jebyrnes/14743f9b42b8eae23a0be717ce0ad816 to your computer and use it in GitHub Desktop.
Analyze email traffic between two people and animate it
#'--------------------------------------------------
#' Script for gmail analysis between two people
#' using gmailr
#'--------------------------------------------------
library(gmailr)
library(tidyverse)
library(rties)
library(lubridate)
library(gganimate)
#'-----------------------
## Connect to gmail ####
#'-----------------------
## Here are some steps for doing this:
## 1. Go to https://console.developers.google.com/
## 2. Create a new project
## 3. Copy-and-paste the Client ID and Client Secret into the fields below
#'-----------------------
## authenticate ####
#'-----------------------
client_id = "{YOUR CLIENT ID}"
client_secret = "{YOUR CLIENT SECRET}"
gm_auth_configure(key = client_id, secret = client_secret)
#'-----------------------
## get emails ####
#'-----------------------
#this can be done better and in a more robust way. meh. it's a demo
from_lr <- gm_messages(search="from:[email protected] to:[email protected] before:2020/12/31 after:2019/12/31", num_results = 1e4)
from_me <- gm_messages(search="from:[email protected] to:[email protected] before:2020/12/31 after:2019/12/31", num_results = 1e4)
#'-----------------------------------------------------------
## a function to get email properties ####
## that can be expanded in the future for other things
#'-----------------------------------------------------------
gm_get_properties <- function(input, a_string){
#get raw messages
msgs <- gm_id(input) %>%
map(gm_message)
#get dates from messages
dates <- map_chr(msgs, gm_date) %>%
dmy_hms()
#other properties can go here....
#assemble a return tibble
return(tibble(from = a_string, dates = dates))
}
#'-----------------------------------------------------------
## Make dat data ####
#'-----------------------------------------------------------
email_dat <- bind_rows(gm_get_properties(from_lr, "lilarose"),
gm_get_properties(from_me, "me"))
#'-----------------------------------------------------------
## Plot the raw emails ####
## This can probably be done better
#'-----------------------------------------------------------
ggplot(email_dat,
aes(x = dates, y = 1, color = from)) +
geom_jitter(position = position_jitter(width = 0, height = 0.02)) +
scale_y_continuous(limits = c(0.95,1.05))
#'-----------------------------------------------------------
## Make an hourly timseries of an email heartbeat ####
#'-----------------------------------------------------------
email_dat_summary <- email_dat %>%
group_by(from,
dates = floor_date(dates, "hour")) %>%
summarize(emails = n()) %>%
ungroup()
#what? I liked this typo variable name
heartbeet <- email_dat_summary %>%
group_by(from) %>%
padr::pad(interval = "hour") %>%
padr::fill_by_value(emails, value = 0) %>%
ungroup()
#an animation of the timeseries
ggplot(heartbeet %>%
filter(dates > floor_date(mdy("11/1/2020"))),
aes(x = dates, y = emails, color = from)) +
geom_line() +
geom_point(size = 2) +
labs(y = "# of emails", x = "", color = "from who") +
scale_color_manual(values = c("red", "blue")) +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, hjust = 1)) +
transition_reveal(dates)
anim_save("email_me_lr_series.gif")
heartbeet %>%
group_by(from) %>%
arrange(dates) %>%
mutate(cummulative_emails = cumsum(emails)) %>%
ggplot(aes(x = dates, y = cummulative_emails, color = from)) +
geom_line() +
geom_point(size = 2) +
labs(y = "cummulative\n# of emails", x = "", color = "from who") +
scale_color_manual(values = c("red", "blue")) +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, hjust = 1)) +
transition_reveal(dates)
anim_save("email_me_lr.gif")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment