Skip to content

Instantly share code, notes, and snippets.

@AdamSpannbauer
Created March 10, 2017 02:58
Show Gist options
  • Save AdamSpannbauer/a5f0bf7f815fca7035828466911cacf9 to your computer and use it in GitHub Desktop.
Save AdamSpannbauer/a5f0bf7f815fca7035828466911cacf9 to your computer and use it in GitHub Desktop.
Get tweets from a user's timeline
get_timeline_df <- function(user, n_tweets=200, oauth_sig) {
i <- 0
n_left <- n_tweets
timeline_df <- NULL
#loop until n_tweets are all got
while (n_left > 0) {
n_to_get <- min(200, n_left)
i <- i+1
#incorporae max id in get_url (so as not to download same 200 tweets repeatedly)
if (i==1) {
get_url <- paste0("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=",
user,"&count=", n_to_get)
} else {
get_url <- paste0("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=",
user,"&count=",n_to_get,"&max_id=", max_id)
}
#GET tweets
response <- httr::GET(get_url, oauth_sig)
#extract content and clean up
response_content <- httr::content(response)
json_content <- jsonlite::toJSON(response_content)
#clean out evil special chars
json_conv <- iconv(json_content, "UTF-8", "ASCII", sub = "") %>%
stringr::str_replace_all("\003", "") #special character (^C) not caught by above clean
timeline_list <- jsonlite::fromJSON(json_conv)
#extract desired fields
fields_i_care_about <- c("id", "text", "favorite_count", "retweet_count", "created_at")
timeline_df <- purrr::map(fields_i_care_about, ~unlist(timeline_list[[.x]])) %>%
purrr::set_names(fields_i_care_about) %>%
dplyr::as_data_frame() %>%
dplyr::bind_rows(timeline_df) %>%
dplyr::distinct()
#store min id (oldest tweet) to set as max id for next GET
max_id <- min(purrr::map_dbl(timeline_list$id, 1))
#update number of tweets left
n_left <- n_left-n_to_get
}
return(timeline_df)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment