Created
April 8, 2021 08:02
-
-
Save thoughtfulbloke/01f3775f4b263ee57b9a0a39189b3200 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############ collection | |
## assumes the working directory is the folder the script is in, and is set to the same for future runs of the file. | |
# This also assumes you have twitter developer credientals, and have run the create_token() function in the rtweet package | |
# to authorise r to access Twitter. This stores the credentials in an environmental variable loaded at startup so they are | |
# not exposed in the script | |
# as an alternative, if using a Mac or PC, and the httpuv package is installed, you can interactively authorise the script | |
# at run time | |
# these packages need to be already installed in order to be loaded and used | |
library(rtweet) # access twitter API | |
library(dplyr) # general data handling commands | |
library(lubridate) # specialist date handling such as timezone shifts | |
library(ggplot2) #graphmaking | |
library(ggbeeswarm) # contains the specialist kind of graph wanted | |
library(scales) # formats date appearance on axis | |
#this line should be customised with the accounts data is being gather about | |
subjects <- c("accountnames", "go_here", "as_doublequoted", "set_of_accounts") | |
the_now <- gsub("\\.", "_", make.names(Sys.time())) #files datestamped so if script used again the new files get new filenames | |
local_tz <- "Pacific/Auckland" | |
# storage folder for account details in working directory | |
if(!dir.exists("twitter_accounts")){ | |
dir.create("twitter_accounts") | |
} | |
for(target_account in subjects){ | |
primary_path <- paste0("twitter_accounts/", target_account) | |
if(!dir.exists(paste0(primary_path))){ | |
dir.create(paste0(primary_path)) | |
} | |
filepath <- paste0(primary_path,"/", the_now) | |
# get timeline | |
result <- tryCatch(get_timeline(target_account, n=3199), | |
error = function(e) NULL, warning = function(w) NULL) | |
if(!is.null(result)){ | |
write_as_csv(result, file_name=paste0(filepath,"TL.csv")) | |
# since there are tweets, make a graph of activity in local time | |
# there are three basic patterns, all based on most people mostly sleeping 12am-6am | |
# 1) tweet generally: start about 7am, stop at bedtime | |
# 2) tweet outside of work: big gap for workdays, but can tell it is work because tweet | |
# during this time at weekends | |
# 3) tweet only at work- 8 hour block 5 days a week. | |
graphlimits = c(ISOdatetime(2018,7,1,0,0,0,tz = local_tz), | |
ISOdatetime(2018,7,2,0,0,0,tz = local_tz)) | |
#graphlimits are because y axis is when tweets would be if they all took place on the same day | |
g1 <- result %>% mutate(inNZ = with_tz(created_at, tz=local_tz), | |
dayw = wday(inNZ, label = TRUE), | |
inday = ISOdatetime(2018,7,1,hour(inNZ), minute(inNZ), | |
second(inNZ), tz=local_tz)) %>% | |
ggplot(aes(x=dayw, y=inday, colour=source)) + geom_quasirandom(size=0.2, alpha=0.5) + | |
scale_y_datetime(date_breaks = "6 hours", | |
labels = date_format("%H:%M",tz = local_tz), | |
limits = graphlimits, expand=c(0,0)) + theme_minimal() + | |
xlab("By weekday") + ylab("Hour of Day (local)") + ggtitle("Temporal arrangement of Tweets") | |
ggsave(g1, filename=paste0(filepath,"TL_graph.jpg"), width=7,height=5, units="in") | |
} | |
# get friends | |
result <- tryCatch(get_friends(target_account, retryonratelimit = TRUE), | |
error = function(e) NULL, warning = function(w) NULL) | |
if(!is.null(result)){ | |
write_as_csv(result, file_name=paste0(filepath,"FR.csv")) | |
} | |
# get followers | |
result <- tryCatch(get_followers(target_account, retryonratelimit = TRUE), | |
error = function(e) NULL, warning = function(w) NULL) | |
if(!is.null(result)){ | |
write_as_csv(result, file_name=paste0(filepath,"FL.csv")) | |
} | |
# get favourites | |
result <- tryCatch(get_favorites(target_account, n=3000), | |
error = function(e) NULL, warning = function(w) NULL) | |
if(!is.null(result)){ | |
write_as_csv(result, file_name=paste0(filepath,"LK.csv")) | |
} | |
# get mentions for account (approx last 7 days) by using the 7 day search for @mentions | |
result <- tryCatch(search_tweets(q=paste0("@",target_account), n=30000, retryonratelimit = TRUE), | |
error = function(e) NULL, warning = function(w) NULL) | |
if(!is.null(result)){ | |
write_as_csv(result, file_name=paste0(filepath,"MN.csv")) | |
} | |
print(paste(target_account, "complete")) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment