epijim · January 10, 2014 12:25
diff --git a/TwitterFromR.r b/TwitterFromR.r
 setwd("~/R/RStudio/twitterwordcloud")
 
 #install the necessary packages
 #install.packages("ROAuth")
 #install.packages("twitteR")
 #install.packages("wordcloud")
 #install.packages("tm")
 
 library("ROAuth")
 library("twitteR")
 library("wordcloud")
 library("tm")
 
 #necessary step for Windows
 download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")
 
 #to get your consumerKey and consumerSecret see the twitteR documentation for instructions
 cred <- OAuthFactory$new(consumerKey='GET FROM TWITTER',
                         consumerSecret='GET FROM TWITTER',
                         requestURL='https://api.twitter.com/oauth/request_token',
                         accessURL='http://api.twitter.com/oauth/access_token',
                         authURL='http://api.twitter.com/oauth/authorize')
 
 #necessary step for Windows
 #cred$handshake(cainfo="cacert.pem")
 #save for later use for Windows
 save(cred, file="twitter authentication.Rdata")
 registerTwitterOAuth(cred)
 
 #the cainfo parameter is necessary on Windows
 r_stats<- searchTwitter("#Whisky", n=1500, cainfo="cacert.pem")
 #save text
 r_stats_text <- sapply(r_stats, function(x) x$getText())
 #create corpus
 r_stats_text_corpus <- Corpus(VectorSource(r_stats_text))
 #clean up
 r_stats_text_corpus <- tm_map(r_stats_text_corpus, tolower)
 r_stats_text_corpus <- tm_map(r_stats_text_corpus, removePunctuation)
 r_stats_text_corpus <- tm_map(r_stats_text_corpus, function(x)removeWords(x,stopwords()))
 library(RColorBrewer)
 pal2 <- brewer.pal(8,"Dark2")
 wordcloud(r_stats_text_corpus,min.freq=20,max.words=100, random.order=T, colors=pal2)
 
 #NEW ANALAYSIS
 #Retrieve text
 collTweets <- userTimeline("JesusCaff", n=1500 , cainfo="cacert.pem")
 n <- length(collTweets)
 collTweets[1:3]
 
 #Transforming Text
 df <- do.call("rbind", lapply(collTweets, as.data.frame))
 dim(df)
 library(tm)
 # build a corpus, which is a collection of text documents
 # VectorSource specifies that the source is character vectors.
 myCorpus <- Corpus(VectorSource(df$text))
 
 #After that, the corpus needs a couple of transformations,
 #including changing letters to lower case, removing punctuations/numbers
 #and removing stop words. The general English stop-word list is tailored
 #by adding "available" and "via" and removing "r".
 
 myCorpus <- tm_map(myCorpus, tolower)
 # remove punctuation
 myCorpus <- tm_map(myCorpus, removePunctuation)
 # remove numbers
 myCorpus <- tm_map(myCorpus, removeNumbers)
 #remove links
 removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
 # remove stopwords
 myStopwords <- c(stopwords('english'),"part","new","dont","will","close","along","held","get","current", "available", "via", "the","see","now","amp","yet")
 myCorpus <- tm_map(myCorpus, removeURL)
 myCorpus <- tm_map(myCorpus,  removeWords, myStopwords)
 
 #In many cases, words need to be stemmed to retrieve their radicals.
 #For instance, "example" and "examples" are both stemmed to "exampl".
 #However, after that, one may want to complete the stems to their
 #original forms, so that the words would look "normal".
 
 dictCorpus <- myCorpus
 # stem words in a text document with the snowball stemmers,
 # which requires packages Snowball, RWeka, rJava, RWekajars
 #out as working weird...
 #myCorpus <- tm_map(myCorpus, stemDocument)
 # inspect the first three ``documents"
 inspect(myCorpus[1:3])
 
 # stem completion
 #myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)
 
 inspect(myCorpus[1:3])
 
 myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
 inspect(myDtm[266:270,31:40])
 
 #Frequent terms and associations
 
 findFreqTerms(myDtm, lowfreq=10)
 
 
 # which words are associated with "fellow"?
 findAssocs(myDtm, 'fellow', 0.30)
 
 m <- as.matrix(myDtm)
 # calculate the frequency of words
 v <- sort(rowSums(m), decreasing=TRUE)
 myNames <- names(v)
 d <- data.frame(word=myNames, freq=v)
 wordcloud(d$word, d$freq,min.freq=3,max.words=100, random.order=T, colors=pal2)
 
 #NEW ANALAYSIS
 #Retrieve text
 collTweets <- searchTwitter("#Whisky", n=1500 , cainfo="cacert.pem")
 n <- length(collTweets)
 collTweets[1:3]
 
 #Transforming Text
 df <- do.call("rbind", lapply(collTweets, as.data.frame))
 dim(df)
 library(tm)
 # build a corpus, which is a collection of text documents
 # VectorSource specifies that the source is character vectors.
 myCorpus <- Corpus(VectorSource(df$text))
 
 #After that, the corpus needs a couple of transformations,
 #including changing letters to lower case, removing punctuations/numbers
 #and removing stop words. The general English stop-word list is tailored
 #by adding "available" and "via" and removing "r".
 
 myCorpus <- tm_map(myCorpus, tolower)
 # remove punctuation
 myCorpus <- tm_map(myCorpus, removePunctuation)
 # remove numbers
 myCorpus <- tm_map(myCorpus, removeNumbers)
 #remove links
 removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
 # remove stopwords
 myStopwords <- c(stopwords('english'),"part","whisky","whiskey","new","dont","will","close","along","held","get","current", "available", "via", "the","see","now","amp","yet")
 myCorpus <- tm_map(myCorpus, removeURL)
 myCorpus <- tm_map(myCorpus,  removeWords, myStopwords)
 
 #In many cases, words need to be stemmed to retrieve their radicals.
 #For instance, "example" and "examples" are both stemmed to "exampl".
 #However, after that, one may want to complete the stems to their
 #original forms, so that the words would look "normal".
 
 dictCorpus <- myCorpus
 # stem words in a text document with the snowball stemmers,
 # which requires packages Snowball, RWeka, rJava, RWekajars
 #out as working weird...
 #myCorpus <- tm_map(myCorpus, stemDocument)
 # inspect the first three ``documents"
 inspect(myCorpus[1:3])
 
 # stem completion
 #myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)
 
 inspect(myCorpus[1:3])
 
 myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
 inspect(myDtm[266:270,31:40])
 
 #Frequent terms and associations
 
 findFreqTerms(myDtm, lowfreq=10)
 
 
 m <- as.matrix(myDtm)
 # calculate the frequency of words
 v <- sort(rowSums(m), decreasing=TRUE)
 myNames <- names(v)
 d <- data.frame(word=myNames, freq=v)
 wordcloud(d$word, d$freq,min.freq=18,max.words=100, random.order=T, colors=pal2)
 
 #NEW ANALAYSIS
 #Retrieve text
 collTweets <- searchTwitter("#Whiskey", n=1500 , cainfo="cacert.pem")
 n <- length(collTweets)
 collTweets[1:3]
 
 #Transforming Text
 df <- do.call("rbind", lapply(collTweets, as.data.frame))
 dim(df)
 library(tm)
 # build a corpus, which is a collection of text documents
 # VectorSource specifies that the source is character vectors.
 myCorpus <- Corpus(VectorSource(df$text))
 
 #After that, the corpus needs a couple of transformations,
 #including changing letters to lower case, removing punctuations/numbers
 #and removing stop words. The general English stop-word list is tailored
 #by adding "available" and "via" and removing "r".
 
 myCorpus <- tm_map(myCorpus, tolower)
 # remove punctuation
 myCorpus <- tm_map(myCorpus, removePunctuation)
 # remove numbers
 myCorpus <- tm_map(myCorpus, removeNumbers)
 #remove links
 removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
 # remove stopwords
 myStopwords <- c(stopwords('english'),"part","whisky","whiskey","new","dont","will","close","along","held","get","current", "available", "via", "the","see","now","amp","yet")
 myCorpus <- tm_map(myCorpus, removeURL)
 myCorpus <- tm_map(myCorpus,  removeWords, myStopwords)
 
 #In many cases, words need to be stemmed to retrieve their radicals.
 #For instance, "example" and "examples" are both stemmed to "exampl".
 #However, after that, one may want to complete the stems to their
 #original forms, so that the words would look "normal".
 
 dictCorpus <- myCorpus
 # stem words in a text document with the snowball stemmers,
 # which requires packages Snowball, RWeka, rJava, RWekajars
 #out as working weird...
 #myCorpus <- tm_map(myCorpus, stemDocument)
 # inspect the first three ``documents"
 inspect(myCorpus[1:3])
 
 # stem completion
 #myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)
 
 inspect(myCorpus[1:3])
 
 myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
 inspect(myDtm[266:270,31:40])
 
 #Frequent terms and associations
 
 findFreqTerms(myDtm, lowfreq=10)
 
 
 m <- as.matrix(myDtm)
 # calculate the frequency of words
 v <- sort(rowSums(m), decreasing=TRUE)
 myNames <- names(v)
 d <- data.frame(word=myNames, freq=v)
 wordcloud(d$word, d$freq,min.freq=10,max.words=100, random.order=T, colors=pal2)
	setwd("~/R/RStudio/twitterwordcloud")

	#install the necessary packages
	#install.packages("ROAuth")
	#install.packages("twitteR")
	#install.packages("wordcloud")
	#install.packages("tm")

	library("ROAuth")
	library("twitteR")
	library("wordcloud")
	library("tm")

	#necessary step for Windows
	download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")

	#to get your consumerKey and consumerSecret see the twitteR documentation for instructions
	cred <- OAuthFactory$new(consumerKey='GET FROM TWITTER',
	consumerSecret='GET FROM TWITTER',
	requestURL='https://api.twitter.com/oauth/request_token',
	accessURL='http://api.twitter.com/oauth/access_token',
	authURL='http://api.twitter.com/oauth/authorize')

	#necessary step for Windows
	#cred$handshake(cainfo="cacert.pem")
	#save for later use for Windows
	save(cred, file="twitter authentication.Rdata")
	registerTwitterOAuth(cred)

	#the cainfo parameter is necessary on Windows
	r_stats<- searchTwitter("#Whisky", n=1500, cainfo="cacert.pem")
	#save text
	r_stats_text <- sapply(r_stats, function(x) x$getText())
	#create corpus
	r_stats_text_corpus <- Corpus(VectorSource(r_stats_text))
	#clean up
	r_stats_text_corpus <- tm_map(r_stats_text_corpus, tolower)
	r_stats_text_corpus <- tm_map(r_stats_text_corpus, removePunctuation)
	r_stats_text_corpus <- tm_map(r_stats_text_corpus, function(x)removeWords(x,stopwords()))
	library(RColorBrewer)
	pal2 <- brewer.pal(8,"Dark2")
	wordcloud(r_stats_text_corpus,min.freq=20,max.words=100, random.order=T, colors=pal2)

	#NEW ANALAYSIS
	#Retrieve text
	collTweets <- userTimeline("JesusCaff", n=1500 , cainfo="cacert.pem")
	n <- length(collTweets)
	collTweets[1:3]

	#Transforming Text
	df <- do.call("rbind", lapply(collTweets, as.data.frame))
	dim(df)
	library(tm)
	# build a corpus, which is a collection of text documents
	# VectorSource specifies that the source is character vectors.
	myCorpus <- Corpus(VectorSource(df$text))

	#After that, the corpus needs a couple of transformations,
	#including changing letters to lower case, removing punctuations/numbers
	#and removing stop words. The general English stop-word list is tailored
	#by adding "available" and "via" and removing "r".

	myCorpus <- tm_map(myCorpus, tolower)
	# remove punctuation
	myCorpus <- tm_map(myCorpus, removePunctuation)
	# remove numbers
	myCorpus <- tm_map(myCorpus, removeNumbers)
	#remove links
	removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
	# remove stopwords
	myStopwords <- c(stopwords('english'),"part","new","dont","will","close","along","held","get","current", "available", "via", "the","see","now","amp","yet")
	myCorpus <- tm_map(myCorpus, removeURL)
	myCorpus <- tm_map(myCorpus, removeWords, myStopwords)

	#In many cases, words need to be stemmed to retrieve their radicals.
	#For instance, "example" and "examples" are both stemmed to "exampl".
	#However, after that, one may want to complete the stems to their
	#original forms, so that the words would look "normal".

	dictCorpus <- myCorpus
	# stem words in a text document with the snowball stemmers,
	# which requires packages Snowball, RWeka, rJava, RWekajars
	#out as working weird...
	#myCorpus <- tm_map(myCorpus, stemDocument)
	# inspect the first three ``documents"
	inspect(myCorpus[1:3])

	# stem completion
	#myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)

	inspect(myCorpus[1:3])

	myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
	inspect(myDtm[266:270,31:40])

	#Frequent terms and associations

	findFreqTerms(myDtm, lowfreq=10)


	# which words are associated with "fellow"?
	findAssocs(myDtm, 'fellow', 0.30)

	m <- as.matrix(myDtm)
	# calculate the frequency of words
	v <- sort(rowSums(m), decreasing=TRUE)
	myNames <- names(v)
	d <- data.frame(word=myNames, freq=v)
	wordcloud(d$word, d$freq,min.freq=3,max.words=100, random.order=T, colors=pal2)

	#NEW ANALAYSIS
	#Retrieve text
	collTweets <- searchTwitter("#Whisky", n=1500 , cainfo="cacert.pem")
	n <- length(collTweets)
	collTweets[1:3]

	#Transforming Text
	df <- do.call("rbind", lapply(collTweets, as.data.frame))
	dim(df)
	library(tm)
	# build a corpus, which is a collection of text documents
	# VectorSource specifies that the source is character vectors.
	myCorpus <- Corpus(VectorSource(df$text))

	#After that, the corpus needs a couple of transformations,
	#including changing letters to lower case, removing punctuations/numbers
	#and removing stop words. The general English stop-word list is tailored
	#by adding "available" and "via" and removing "r".

	myCorpus <- tm_map(myCorpus, tolower)
	# remove punctuation
	myCorpus <- tm_map(myCorpus, removePunctuation)
	# remove numbers
	myCorpus <- tm_map(myCorpus, removeNumbers)
	#remove links
	removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
	# remove stopwords
	myStopwords <- c(stopwords('english'),"part","whisky","whiskey","new","dont","will","close","along","held","get","current", "available", "via", "the","see","now","amp","yet")
	myCorpus <- tm_map(myCorpus, removeURL)
	myCorpus <- tm_map(myCorpus, removeWords, myStopwords)

	#In many cases, words need to be stemmed to retrieve their radicals.
	#For instance, "example" and "examples" are both stemmed to "exampl".
	#However, after that, one may want to complete the stems to their
	#original forms, so that the words would look "normal".

	dictCorpus <- myCorpus
	# stem words in a text document with the snowball stemmers,
	# which requires packages Snowball, RWeka, rJava, RWekajars
	#out as working weird...
	#myCorpus <- tm_map(myCorpus, stemDocument)
	# inspect the first three ``documents"
	inspect(myCorpus[1:3])

	# stem completion
	#myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)

	inspect(myCorpus[1:3])

	myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
	inspect(myDtm[266:270,31:40])

	#Frequent terms and associations

	findFreqTerms(myDtm, lowfreq=10)


	m <- as.matrix(myDtm)
	# calculate the frequency of words
	v <- sort(rowSums(m), decreasing=TRUE)
	myNames <- names(v)
	d <- data.frame(word=myNames, freq=v)
	wordcloud(d$word, d$freq,min.freq=18,max.words=100, random.order=T, colors=pal2)

	#NEW ANALAYSIS
	#Retrieve text
	collTweets <- searchTwitter("#Whiskey", n=1500 , cainfo="cacert.pem")
	n <- length(collTweets)
	collTweets[1:3]

	#Transforming Text
	df <- do.call("rbind", lapply(collTweets, as.data.frame))
	dim(df)
	library(tm)
	# build a corpus, which is a collection of text documents
	# VectorSource specifies that the source is character vectors.
	myCorpus <- Corpus(VectorSource(df$text))

	#After that, the corpus needs a couple of transformations,
	#including changing letters to lower case, removing punctuations/numbers
	#and removing stop words. The general English stop-word list is tailored
	#by adding "available" and "via" and removing "r".

	myCorpus <- tm_map(myCorpus, tolower)
	# remove punctuation
	myCorpus <- tm_map(myCorpus, removePunctuation)
	# remove numbers
	myCorpus <- tm_map(myCorpus, removeNumbers)
	#remove links
	removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
	# remove stopwords
	myStopwords <- c(stopwords('english'),"part","whisky","whiskey","new","dont","will","close","along","held","get","current", "available", "via", "the","see","now","amp","yet")
	myCorpus <- tm_map(myCorpus, removeURL)
	myCorpus <- tm_map(myCorpus, removeWords, myStopwords)

	#In many cases, words need to be stemmed to retrieve their radicals.
	#For instance, "example" and "examples" are both stemmed to "exampl".
	#However, after that, one may want to complete the stems to their
	#original forms, so that the words would look "normal".

	dictCorpus <- myCorpus
	# stem words in a text document with the snowball stemmers,
	# which requires packages Snowball, RWeka, rJava, RWekajars
	#out as working weird...
	#myCorpus <- tm_map(myCorpus, stemDocument)
	# inspect the first three ``documents"
	inspect(myCorpus[1:3])

	# stem completion
	#myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)

	inspect(myCorpus[1:3])

	myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
	inspect(myDtm[266:270,31:40])

	#Frequent terms and associations

	findFreqTerms(myDtm, lowfreq=10)


	m <- as.matrix(myDtm)
	# calculate the frequency of words
	v <- sort(rowSums(m), decreasing=TRUE)
	myNames <- names(v)
	d <- data.frame(word=myNames, freq=v)
	wordcloud(d$word, d$freq,min.freq=10,max.words=100, random.order=T, colors=pal2)