This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lik <- dbinom(x = 8, prob = rangeP, size = 10) | |
prior <- dnorm(x = rangeP, mean = .5, sd = .1) | |
lines(rangeP, lik * prior, col = "green") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lines(rangeP, dnorm(x = rangeP, mean = .5, sd = .1) / 15, | |
col = "red") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rangeP <- seq(0, 1, length.out = 100) | |
plot(rangeP, dbinom(x = 8, prob = rangeP, size = 10), | |
type = "l", xlab = "P(Black)", ylab = "Density") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sentiment analysis | |
tknDct <- tokens_lookup(tkn, dictionary = data_dictionary_LSD2015) | |
saDfm <- dfm(tknDct, | |
remove = stopwords("en"), | |
stem = T) | |
summ <- do.call("rbind", by(convert(saDfm, to="data.frame")[,-1], | |
INDICES = date(tweetReduced$created_at), | |
FUN = colSums)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Wordcloud | |
# Remove potential bots w/ > 100 tweets in the dataset | |
bots <- rownames(rtStats)[which(rtStats$num_tweets > 100)] | |
reducedTweet <- allTweets[!allTweets$screen_name %in% bots,] | |
reducedTweet$text <- texts(reducedTweet$text) %>% | |
iconv(from = "UTF-8", to = "ASCII", sub = "") %>% | |
gsub(pattern = "<[A-Z+0-9]+>", repl = " ") | |
# Tokenize words | |
tkn <- tokens(reducedTweet$text, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sat Oct 5 10:06:01 2019 ------------------------------ | |
# Bonus - rm bots, time-dependend wordclouds & sentiment analysis | |
rtStats <- do.call("rbind", by(allTweets, INDICES = allTweets$screen_name, function(x){ | |
return(data.frame(num_tweets = nrow(x), | |
mean_followers = mean(x$followers_count), | |
median_rt = median(x$retweet_count))) | |
})) | |
# Plot log10(num_tweets) vs. log10(median_rt) | |
with(log10(rtStats+1), plot(num_tweets, median_rt, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Identify tweets containing any of the characters names (0/1) | |
popularity <- as.data.frame(lapply(gotChars, function(x){ | |
as.integer(sapply(tkn, function(k){any(k %in% x)})) | |
})) | |
# Write colnames | |
colnames(popularity) <- gotChars | |
# Add column with corresponding EST time | |
popularity$created_at <- allTweets$created_at |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tokenize words | |
tkn <- tokens(allTweets$text, | |
remove_twitter = T, | |
remove_separators = T, | |
remove_symbols = T, | |
remove_punct = T, | |
remove_url = T, | |
remove_hyphens = T, | |
remove_numbers = T) %>% | |
tokens_ngrams(n = 1:2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Convert UTC to EDT | |
allTweets %<>% dplyr::mutate(created_at = as_datetime(created_at, tz = "UTC")) %>% | |
dplyr::mutate(created_at = with_tz(created_at, tzone = "America/New_York")) | |
# Produce lat and lng coordinates | |
allTweets <- lat_lng(allTweets) | |
# Plot | |
par(mar = rep(12, 4)) | |
map("state", lwd = .25) | |
# plot lat and lng points onto state map |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load libraries | |
library(tidyverse) | |
library(reshape2) | |
library(ggplot2) | |
library(ggridges) | |
library(lubridate) | |
library(rtweet) | |
library(maps) | |
library(quanteda) |