Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created April 8, 2021 08:02
Show Gist options
  • Save thoughtfulbloke/01f3775f4b263ee57b9a0a39189b3200 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/01f3775f4b263ee57b9a0a39189b3200 to your computer and use it in GitHub Desktop.
############ collection
## assumes the working directory is the folder the script is in, and is set to the same for future runs of the file.
# This also assumes you have twitter developer credientals, and have run the create_token() function in the rtweet package
# to authorise r to access Twitter. This stores the credentials in an environmental variable loaded at startup so they are
# not exposed in the script
# as an alternative, if using a Mac or PC, and the httpuv package is installed, you can interactively authorise the script
# at run time
# these packages need to be already installed in order to be loaded and used
library(rtweet) # access twitter API
library(dplyr) # general data handling commands
library(lubridate) # specialist date handling such as timezone shifts
library(ggplot2) #graphmaking
library(ggbeeswarm) # contains the specialist kind of graph wanted
library(scales) # formats date appearance on axis
#this line should be customised with the accounts data is being gather about
subjects <- c("accountnames", "go_here", "as_doublequoted", "set_of_accounts")
the_now <- gsub("\\.", "_", make.names(Sys.time())) #files datestamped so if script used again the new files get new filenames
local_tz <- "Pacific/Auckland"
# storage folder for account details in working directory
if(!dir.exists("twitter_accounts")){
dir.create("twitter_accounts")
}
for(target_account in subjects){
primary_path <- paste0("twitter_accounts/", target_account)
if(!dir.exists(paste0(primary_path))){
dir.create(paste0(primary_path))
}
filepath <- paste0(primary_path,"/", the_now)
# get timeline
result <- tryCatch(get_timeline(target_account, n=3199),
error = function(e) NULL, warning = function(w) NULL)
if(!is.null(result)){
write_as_csv(result, file_name=paste0(filepath,"TL.csv"))
# since there are tweets, make a graph of activity in local time
# there are three basic patterns, all based on most people mostly sleeping 12am-6am
# 1) tweet generally: start about 7am, stop at bedtime
# 2) tweet outside of work: big gap for workdays, but can tell it is work because tweet
# during this time at weekends
# 3) tweet only at work- 8 hour block 5 days a week.
graphlimits = c(ISOdatetime(2018,7,1,0,0,0,tz = local_tz),
ISOdatetime(2018,7,2,0,0,0,tz = local_tz))
#graphlimits are because y axis is when tweets would be if they all took place on the same day
g1 <- result %>% mutate(inNZ = with_tz(created_at, tz=local_tz),
dayw = wday(inNZ, label = TRUE),
inday = ISOdatetime(2018,7,1,hour(inNZ), minute(inNZ),
second(inNZ), tz=local_tz)) %>%
ggplot(aes(x=dayw, y=inday, colour=source)) + geom_quasirandom(size=0.2, alpha=0.5) +
scale_y_datetime(date_breaks = "6 hours",
labels = date_format("%H:%M",tz = local_tz),
limits = graphlimits, expand=c(0,0)) + theme_minimal() +
xlab("By weekday") + ylab("Hour of Day (local)") + ggtitle("Temporal arrangement of Tweets")
ggsave(g1, filename=paste0(filepath,"TL_graph.jpg"), width=7,height=5, units="in")
}
# get friends
result <- tryCatch(get_friends(target_account, retryonratelimit = TRUE),
error = function(e) NULL, warning = function(w) NULL)
if(!is.null(result)){
write_as_csv(result, file_name=paste0(filepath,"FR.csv"))
}
# get followers
result <- tryCatch(get_followers(target_account, retryonratelimit = TRUE),
error = function(e) NULL, warning = function(w) NULL)
if(!is.null(result)){
write_as_csv(result, file_name=paste0(filepath,"FL.csv"))
}
# get favourites
result <- tryCatch(get_favorites(target_account, n=3000),
error = function(e) NULL, warning = function(w) NULL)
if(!is.null(result)){
write_as_csv(result, file_name=paste0(filepath,"LK.csv"))
}
# get mentions for account (approx last 7 days) by using the 7 day search for @mentions
result <- tryCatch(search_tweets(q=paste0("@",target_account), n=30000, retryonratelimit = TRUE),
error = function(e) NULL, warning = function(w) NULL)
if(!is.null(result)){
write_as_csv(result, file_name=paste0(filepath,"MN.csv"))
}
print(paste(target_account, "complete"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment