monogenea · October 7, 2019 16:51
diff --git a/10-poissonGoT.R b/10-poissonGoT.R
 # Wordcloud 
 # Remove potential bots w/ > 100 tweets in the dataset
 bots <- rownames(rtStats)[which(rtStats$num_tweets > 100)]
 reducedTweet <- allTweets[!allTweets$screen_name %in% bots,]
 reducedTweet$text <- texts(reducedTweet$text) %>% 
      iconv(from = "UTF-8", to = "ASCII", sub = "") %>% 
      gsub(pattern = "<[A-Z+0-9]+>", repl = " ")

 # Tokenize words
 tkn <- tokens(reducedTweet$text,
              remove_twitter = T,
              remove_separators = T,
              remove_symbols = T,
              remove_punct = T,
              remove_url = T,
              remove_hyphens = T,
              remove_numbers = T)

 # Remove stopwords and stem words
 gotDfm <- dfm(tkn, tolower = T, 
              remove = stopwords("en"),
              stem = T)

 # Remove irrelevant terms incl. single-character words
 badWords <- c("game", "throne", "gameofthron", "got",
              "watch", "episod", "season", "show",
              "just", "like")
 gotDfm <- gotDfm[,nchar(colnames(gotDfm)) > 1 &
                 !colnames(gotDfm) %in% badWords]

 epAirTime  <- ymd_hms("2019-04-14 21:00:00", tz = "EST") + dweeks(0:5)
 wcLists <- lapply(1:6, function(x){
      idx <- tweetReduced$created_at > epAirTime[x] + dhours(2) &
            tweetReduced$created_at < epAirTime[x] + ddays(4)
      return(gotDfm[idx,])
 })

 par(mar = rep(0, 4))
 for(i in 1:length(wcLists)){
      set.seed(100)
      textplot_wordcloud(wcLists[[i]],
                         max_words = 100)        
 }
	# Wordcloud
	# Remove potential bots w/ > 100 tweets in the dataset
	bots <- rownames(rtStats)[which(rtStats$num_tweets > 100)]
	reducedTweet <- allTweets[!allTweets$screen_name %in% bots,]
	reducedTweet$text <- texts(reducedTweet$text) %>%
	iconv(from = "UTF-8", to = "ASCII", sub = "") %>%
	gsub(pattern = "<[A-Z+0-9]+>", repl = " ")

	# Tokenize words
	tkn <- tokens(reducedTweet$text,
	remove_twitter = T,
	remove_separators = T,
	remove_symbols = T,
	remove_punct = T,
	remove_url = T,
	remove_hyphens = T,
	remove_numbers = T)

	# Remove stopwords and stem words
	gotDfm <- dfm(tkn, tolower = T,
	remove = stopwords("en"),
	stem = T)

	# Remove irrelevant terms incl. single-character words
	badWords <- c("game", "throne", "gameofthron", "got",
	"watch", "episod", "season", "show",
	"just", "like")
	gotDfm <- gotDfm[,nchar(colnames(gotDfm)) > 1 &
	!colnames(gotDfm) %in% badWords]

	epAirTime <- ymd_hms("2019-04-14 21:00:00", tz = "EST") + dweeks(0:5)
	wcLists <- lapply(1:6, function(x){
	idx <- tweetReduced$created_at > epAirTime[x] + dhours(2) &
	tweetReduced$created_at < epAirTime[x] + ddays(4)
	return(gotDfm[idx,])
	})

	par(mar = rep(0, 4))
	for(i in 1:length(wcLists)){
	set.seed(100)
	textplot_wordcloud(wcLists[[i]],
	max_words = 100)
	}