-
-
Save rossmounce/4444291 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#N.B. On *ubuntu RCurl may not install for you off the bat. If so read: http://www.omegahat.org/RCurl/FAQ.html & sudo apt-get install libcurl4-openssl-dev | |
install.packages(c("RCurl","twitteR","wordcloud","tm","stringr")) | |
library(twitteR); library(wordcloud); library(tm); library(stringr); | |
# Search for #mooc tweets | |
mooctweets <- searchTwitter("#mooc", n=2000) | |
length(mooctweets) # ends up with 713 as of 03-Jan-13 at 15:42 London time | |
# make into a data.frame | |
mooctweets_df <- twListToDF(mooctweets) | |
tail(mooctweets_df) | |
# Words used | |
cleaned <- sapply(mooctweets_df$text, function(x) str_trim(gsub("\"|@[A-Za-z.-_]+|(RT)|(MT)|[!:;]\\s+|http[s]?://[A-Za-z0-9]+\\.?[A-Za-z0-9]+/[A-Za-z0-9]+\\.?[A-Za-z0-9]+|#[A-Za-z0-9]+", "", x), "both"), USE.NAMES=F) | |
cleaned_coll <- paste(cleaned, collapse=" ") | |
corpus <- Corpus(VectorSource(cleaned_coll)) | |
moocCorpus <- tm_map(corpus, function(x)removeWords(x,stopwords())) | |
mooc_ <- TermDocumentMatrix(moocCorpus) | |
ap.m <- as.matrix(mooc_) | |
ap.v <- sort(rowSums(ap.m), decreasing=TRUE) | |
ap.d <- data.frame(word = names(ap.v),freq=ap.v) | |
table(ap.d$freq) | |
pal2 <- brewer.pal(8,"Dark2") | |
png("~/mooctweets.png", width=800, height=600) | |
wordcloud(ap.d$word,ap.d$freq, scale=c(8,.2),min.freq=2, max.words=100, | |
random.order=FALSE, rot.per=.15, colors=pal2) | |
dev.off() | |
# Users | |
userscorpus <- Corpus(VectorSource(mooctweets_df$screenName)) | |
userscorpus_ <- tm_map(userscorpus, function(x)removeWords(x,stopwords())) | |
mooc_ <- TermDocumentMatrix(userscorpus_) | |
ap.m <- as.matrix(mooc_) | |
ap.v <- sort(rowSums(ap.m), decreasing=TRUE) | |
ap.d <- data.frame(word = names(ap.v),freq=ap.v) | |
table(ap.d$freq) | |
pal2 <- brewer.pal(8,"Dark2") | |
png("~/mooctweeters.png", width=800,height=600) | |
wordcloud(ap.d$word,ap.d$freq, scale=c(8,.2),min.freq=2, max.words=100, | |
random.order=FALSE, rot.per=.15, colors=pal2) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you get "Error in object[[i]] : object of type 'closure' is not subsettable"
then there's too many tweets.
1500 is the maximum if you're not specially authenticated by Twitter
http://stackoverflow.com/questions/10328066/twitter-package-in-r-maximum-tweets-using-searchtwitter