Francisco Lima monogenea

Data scientist and blogger

monogenea / 3-poissonBA.R

Created October 7, 2019 17:45

	lik <- dbinom(x = 8, prob = rangeP, size = 10)
	prior <- dnorm(x = rangeP, mean = .5, sd = .1)
	lines(rangeP, lik * prior, col = "green")

monogenea / 2-poissonBA.R

Created October 7, 2019 17:44

	lines(rangeP, dnorm(x = rangeP, mean = .5, sd = .1) / 15,
	col = "red")

monogenea / 1-poissonBA.R

Created October 7, 2019 17:43

	rangeP <- seq(0, 1, length.out = 100)
	plot(rangeP, dbinom(x = 8, prob = rangeP, size = 10),
	type = "l", xlab = "P(Black)", ylab = "Density")

monogenea / 11-poissonGoT.R

Created October 7, 2019 16:53

	# Sentiment analysis
	tknDct <- tokens_lookup(tkn, dictionary = data_dictionary_LSD2015)
	saDfm <- dfm(tknDct,
	remove = stopwords("en"),
	stem = T)

	summ <- do.call("rbind", by(convert(saDfm, to="data.frame")[,-1],
	INDICES = date(tweetReduced$created_at),
	FUN = colSums))

monogenea / 10-poissonGoT.R

Created October 7, 2019 16:51

	# Wordcloud
	# Remove potential bots w/ > 100 tweets in the dataset
	bots <- rownames(rtStats)[which(rtStats$num_tweets > 100)]
	reducedTweet <- allTweets[!allTweets$screen_name %in% bots,]
	reducedTweet$text <- texts(reducedTweet$text) %>%
	iconv(from = "UTF-8", to = "ASCII", sub = "") %>%
	gsub(pattern = "<[A-Z+0-9]+>", repl = " ")

	# Tokenize words
	tkn <- tokens(reducedTweet$text,

monogenea / 9-poissonGoT.R

Created October 7, 2019 16:48

	# Sat Oct 5 10:06:01 2019 ------------------------------
	# Bonus - rm bots, time-dependend wordclouds & sentiment analysis
	rtStats <- do.call("rbind", by(allTweets, INDICES = allTweets$screen_name, function(x){
	return(data.frame(num_tweets = nrow(x),
	mean_followers = mean(x$followers_count),
	median_rt = median(x$retweet_count)))
	}))

	# Plot log10(num_tweets) vs. log10(median_rt)
	with(log10(rtStats+1), plot(num_tweets, median_rt,

monogenea / 8-poissonGoT.R

Created October 7, 2019 16:01

	# Identify tweets containing any of the characters names (0/1)
	popularity <- as.data.frame(lapply(gotChars, function(x){
	as.integer(sapply(tkn, function(k){any(k %in% x)}))
	}))

	# Write colnames
	colnames(popularity) <- gotChars

	# Add column with corresponding EST time
	popularity$created_at <- allTweets$created_at

monogenea / 7-poissonGoT.R

Created October 7, 2019 16:00

monogenea / 6-poissonGoT.R

Created October 7, 2019 15:58

	# Convert UTC to EDT
	allTweets %<>% dplyr::mutate(created_at = as_datetime(created_at, tz = "UTC")) %>%
	dplyr::mutate(created_at = with_tz(created_at, tzone = "America/New_York"))

	# Produce lat and lng coordinates
	allTweets <- lat_lng(allTweets)
	# Plot
	par(mar = rep(12, 4))
	map("state", lwd = .25)
	# plot lat and lng points onto state map

monogenea / 5-poissonGoT.R

Created October 7, 2019 15:57