Skip to content

Instantly share code, notes, and snippets.

@hanxue
Created March 13, 2016 08:25
Show Gist options
  • Save hanxue/0c9c266b63efbb28896a to your computer and use it in GitHub Desktop.
Save hanxue/0c9c266b63efbb28896a to your computer and use it in GitHub Desktop.
Twitter WordCloud for Najib Using R
library(twitteR)
library(ROAuth)
library(tm)
library(plyr)
library(ggplot2)
library(wordcloud)
library(RColorBrewer)
library(XML)
my.key <- Sys.getenv("TWITTER_KEY")
my.secret <- Sys.getenv("TWITTER_SECRET")
access.token <- Sys.getenv("TWITTER_TOKEN")
access.token.secret <- Sys.getenv("TWITTER_TOKEN_SECRET")
setup_twitter_oauth(my.key, my.secret, access.token, access.token.secret)
register_sqlite_backend("/usr/local/var/sqlite/sentiment_najib")
table_name <- "najib"
latest_tweet <- get_latest_tweet_id(table_name = table_name)
tweets <- searchTwitteR("Najib", n = 3000, sinceID = latest_tweet)
store_tweets_db(tweets, table_name = table_name)
# Uncomment next line to process all tweets in database
# tweets <- load_tweets_db(table_name = table_name)
# Prepare malay stopwords
df1 <- readHTMLTable('http://blog.kerul.net/2014/01/list-of-malay-stop-words.html')
df1 <- df1[[1]]
malaystopwords <- as.character(unlist(df1))[-c(320, 321)]
malaystopwords <- c(malaystopwords, "najib", "razak", "...")
remove_url <- function(x) {
gsub("\\bhttp(s?)://(.*)+|@(.*)+|[,.:]$", "", x)
}
corpus = Corpus(VectorSource(sapply(tweets, function(x) x$getText())))
corpus <- tm_map(corpus, function(x) iconv(x, to='UTF-8', sub=''))
corpus = tm_map(corpus, tolower)
corpus = tm_map(corpus, remove_url)
corpus = tm_map(corpus, function(x) removeWords(x, stopwords("english")))
corpus = tm_map(corpus, function(x) removeWords(x, malaystopwords))
corpus = tm_map(corpus, removeNumbers)
corpus = tm_map(corpus, removePunctuation)
corpus = tm_map(corpus, PlainTextDocument)
col = brewer.pal(6, "Dark2")
wordcloud(corpus, min.freq = 25, scale = c(3, 0.3), rot.per = 0.25,
random.color = T, max.words = 100, random.order = FALSE,
colors = col)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment