-
-
Save meefen/5126431 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#@author Michael J Bommarito | |
#@contact [email protected] | |
#@date Feb 20, 2011 | |
#@ip Simplified BSD, (C) 2011. | |
# This is a simple example of an R script that will retrieve | |
# public tweets from a given hashtag. | |
library(RJSONIO) | |
# This function loads stored tag data to determine the current max_id. | |
loadTag <- function(tag) { | |
# Set the filename | |
fileName <- sprintf("tweet_%s.csv", tag) | |
tweets <- read.table(file=fileName, sep="\t", header=TRUE, comment.char="", stringsAsFactors=FALSE) | |
return (tweets) | |
} | |
# This function downloads | |
downloadTag <- function(tag) { | |
# Set the filename | |
fileName <- sprintf("tweet_%s.csv", tag) | |
# Check to see if the file exists. If it does, load it. | |
if (file.exists(fileName)) { | |
tweets <- loadTag(tag) | |
maxID <- min(tweets$id) | |
} else { | |
tweets <- NULL | |
maxID <- 0 | |
} | |
# Record the nextPage query when provided. | |
nextPage <- NULL | |
# Loop until we receive 0 results | |
while (1) { | |
if (!is.null(nextPage)) { | |
queryURL <- sprintf("http://search.twitter.com/search.json%s", nextPage) | |
} else { | |
if (maxID != 0) { | |
queryURL <- sprintf("http://search.twitter.com/search.json?q=%%23%s&rpp=100&max_id=%s", tag, maxID) | |
} else { | |
queryURL <- sprintf("http://search.twitter.com/search.json?q=%%23%s&rpp=100&", tag) | |
} | |
} | |
# Execute the query | |
response <- fromJSON(queryURL) | |
newTweets <- response$results | |
# Check to make sure that there are tweets left. | |
if (length(newTweets) <= 1) { | |
print(sprintf("No new tweets: %s %s", maxID, queryURL)) | |
break | |
} | |
# Now check for a nextPage query. | |
if ("next_page" %in% names(response)) { | |
nextPage <- response$next_page | |
} else { | |
nextPage <- NULL | |
} | |
# These lines do not include text because no JSON libraries support | |
# Unicode at the moment. Therefore, it is not safe to use R | |
# and Twitter together on live data. | |
# Write out the current tweets. | |
dfTweets <- as.data.frame(t(sapply(newTweets, function(x) c(x$id, x$created_at, x$from_user)))) | |
names(dfTweets) <- c("id", "date", "user") | |
dfTweets$id <- as.character(dfTweets$id) | |
dfTweets$date <- as.POSIXct(strptime(dfTweets$date, "%a, %d %b %Y %H:%M:%S %z", tz = "GMT")) | |
dfTweets$user <- as.character(dfTweets$user) | |
# Append these tweets to the list. | |
if (is.null(tweets)) { | |
tweets <- dfTweets | |
} else { | |
tweets <- rbind(tweets, dfTweets) | |
} | |
# Now update our maxID variable. | |
maxID <- min(tweets$id) | |
# Store the current set of tweets. | |
write.table(tweets, sep="\t", file=fileName, row.names=FALSE) | |
# Output some debug info and sleep to be nice to Twitter. | |
print(sprintf("%s, %s", maxID, dim(tweets)[1])) | |
flush.console() | |
Sys.sleep(10) | |
} | |
return (tweets) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment