Last active
September 19, 2018 13:25
-
-
Save matt-dray/6aa4c2abf6bcc64d605a498f8c8c1517 to your computer and use it in GitHub Desktop.
Function to automate generation of RDS, simple CSV and plot using rtweet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to automate generation of RDS, simple CSV and plot using rtweet | |
# Matt Dray | |
# March 2018 | |
# Purpose: create an RDS, simplified CSV and plot of tweets containing search term | |
# from rtweet::get_tweets function and save to to folder with unique descriptive | |
# name related to search term. Assumes you have an 'output' folder in your home | |
# directory to store these files. Assumes you've already sorted out a twitter | |
# token as per http://rtweet.info/articles/auth.html | |
extract_tweets <- function(search_term = NULL, ...){ | |
# packages | |
library(rtweet) # for getting tweets | |
library(ggplot2) # for plotting | |
# create folder to hold outputs | |
main_dir <- getwd() # home directory hopefully | |
file_name <- paste0( # create format for file name | |
format(Sys.time(), "%y%m%d"), # today's date in form 180314 | |
"_", | |
gsub("[[:punct:][:blank:]]+", "", search_term) # search term no punctuation | |
) | |
dir.create(file.path(main_dir, "outputs", file_name)) # create folder | |
# perform search | |
temp_search <- rtweet::search_tweets( | |
q = search_term, | |
n = ..., | |
include_rts = FALSE | |
) | |
# save full dataset as RDS | |
saveRDS( | |
object = temp_search, | |
file = paste0(file.path(main_dir, "outputs", file_name, file_name), ".RDS") | |
) | |
# simplify object | |
temp_simple <- temp_search[, c("screen_name", "created_at", "text")] | |
# write simplified object to csv | |
write.csv( | |
x = temp_simple, | |
file = paste0(file.path(main_dir, "outputs", file_name, file_name), ".csv") | |
) | |
# create simple plot | |
temp_plot <- rtweet::ts_plot(data = temp_search, by = "3 hours") + | |
ggplot2::theme_minimal() + | |
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) + | |
ggplot2::labs( | |
x = NULL, y = NULL, | |
title = paste0( | |
"Frequency of \"", | |
gsub(":[punct]:", "", search_term), | |
"\" occurrences in tweets" | |
), | |
subtitle = "Aggregated Twitter status (tweet) counts", | |
caption = "\nSource: Data collected from Twitter's REST API via rtweet" | |
) | |
# save simple plot | |
ggplot2::ggsave( | |
file = paste0(file.path(main_dir, "outputs", file_name, file_name), ".png"), | |
plot = temp_plot, | |
width = 10, | |
height = 7 | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment