shravan-kuchkula · March 31, 2017 08:57
diff --git a/generateWordCloud.R b/generateWordCloud.R
 # Takes a list of status/twitter objects, extracts the text,
 # cleans the text, calculates word frequencies and generates 
 # a word cloud.
 generateWordCloud <- function(tweets){
  
  #Get the text from the status/twitter object
  tweets_list <- sapply(tweets, function(x) x$getText())
  
  #Remove any weird symbols from the text
  tweets_list <- str_replace_all(tweets_list, "[^[:graph:]]", " ")
  
  #Convert it to a Corpus object. Use VectorSource.
  tweetsCorpus <- Corpus(VectorSource(tweets_list))
  
  # Convert to lower
  tweetsCorpus <- tm_map(tweetsCorpus, tolower)
  
  # Remove punctuations
  tweetsCorpus <- tm_map(tweetsCorpus, removePunctuation)
  
  # Remove stop words
  tweetsCorpus <- tm_map(tweetsCorpus, function(x) removeWords(x, stopwords("english")))
  
  # Create a TermDocumentMatrix object
  dtm <- TermDocumentMatrix(tweetsCorpus)
  
  # Calclate words and their freq's
  m <- as.matrix(dtm)
  v <- sort(rowSums(m), decreasing = TRUE)
  d <- data.frame(word = names(v), freq=v)
  
  # Create wordcloud
  wordcloud(d$word, freq=d$freq, max.words = 25, random.order = FALSE,
            random.color = TRUE, colors=brewer.pal(6, "Dark2"))
  
 }
	# Takes a list of status/twitter objects, extracts the text,
	# cleans the text, calculates word frequencies and generates
	# a word cloud.
	generateWordCloud <- function(tweets){

	#Get the text from the status/twitter object
	tweets_list <- sapply(tweets, function(x) x$getText())

	#Remove any weird symbols from the text
	tweets_list <- str_replace_all(tweets_list, "[^[:graph:]]", " ")

	#Convert it to a Corpus object. Use VectorSource.
	tweetsCorpus <- Corpus(VectorSource(tweets_list))

	# Convert to lower
	tweetsCorpus <- tm_map(tweetsCorpus, tolower)

	# Remove punctuations
	tweetsCorpus <- tm_map(tweetsCorpus, removePunctuation)

	# Remove stop words
	tweetsCorpus <- tm_map(tweetsCorpus, function(x) removeWords(x, stopwords("english")))

	# Create a TermDocumentMatrix object
	dtm <- TermDocumentMatrix(tweetsCorpus)

	# Calclate words and their freq's
	m <- as.matrix(dtm)
	v <- sort(rowSums(m), decreasing = TRUE)
	d <- data.frame(word = names(v), freq=v)

	# Create wordcloud
	wordcloud(d$word, freq=d$freq, max.words = 25, random.order = FALSE,
	random.color = TRUE, colors=brewer.pal(6, "Dark2"))

	}