Skip to content

Instantly share code, notes, and snippets.

View monogenea's full-sized avatar

Francisco Lima monogenea

View GitHub Profile
lik <- dbinom(x = 8, prob = rangeP, size = 10)
prior <- dnorm(x = rangeP, mean = .5, sd = .1)
lines(rangeP, lik * prior, col = "green")
lines(rangeP, dnorm(x = rangeP, mean = .5, sd = .1) / 15,
col = "red")
rangeP <- seq(0, 1, length.out = 100)
plot(rangeP, dbinom(x = 8, prob = rangeP, size = 10),
type = "l", xlab = "P(Black)", ylab = "Density")
# Sentiment analysis
tknDct <- tokens_lookup(tkn, dictionary = data_dictionary_LSD2015)
saDfm <- dfm(tknDct,
remove = stopwords("en"),
stem = T)
summ <- do.call("rbind", by(convert(saDfm, to="data.frame")[,-1],
INDICES = date(tweetReduced$created_at),
FUN = colSums))
# Wordcloud
# Remove potential bots w/ > 100 tweets in the dataset
bots <- rownames(rtStats)[which(rtStats$num_tweets > 100)]
reducedTweet <- allTweets[!allTweets$screen_name %in% bots,]
reducedTweet$text <- texts(reducedTweet$text) %>%
iconv(from = "UTF-8", to = "ASCII", sub = "") %>%
gsub(pattern = "<[A-Z+0-9]+>", repl = " ")
# Tokenize words
tkn <- tokens(reducedTweet$text,
# Sat Oct 5 10:06:01 2019 ------------------------------
# Bonus - rm bots, time-dependend wordclouds & sentiment analysis
rtStats <- do.call("rbind", by(allTweets, INDICES = allTweets$screen_name, function(x){
return(data.frame(num_tweets = nrow(x),
mean_followers = mean(x$followers_count),
median_rt = median(x$retweet_count)))
}))
# Plot log10(num_tweets) vs. log10(median_rt)
with(log10(rtStats+1), plot(num_tweets, median_rt,
# Identify tweets containing any of the characters names (0/1)
popularity <- as.data.frame(lapply(gotChars, function(x){
as.integer(sapply(tkn, function(k){any(k %in% x)}))
}))
# Write colnames
colnames(popularity) <- gotChars
# Add column with corresponding EST time
popularity$created_at <- allTweets$created_at
# Tokenize words
tkn <- tokens(allTweets$text,
remove_twitter = T,
remove_separators = T,
remove_symbols = T,
remove_punct = T,
remove_url = T,
remove_hyphens = T,
remove_numbers = T) %>%
tokens_ngrams(n = 1:2)
# Convert UTC to EDT
allTweets %<>% dplyr::mutate(created_at = as_datetime(created_at, tz = "UTC")) %>%
dplyr::mutate(created_at = with_tz(created_at, tzone = "America/New_York"))
# Produce lat and lng coordinates
allTweets <- lat_lng(allTweets)
# Plot
par(mar = rep(12, 4))
map("state", lwd = .25)
# plot lat and lng points onto state map
# Load libraries
library(tidyverse)
library(reshape2)
library(ggplot2)
library(ggridges)
library(lubridate)
library(rtweet)
library(maps)
library(quanteda)