Skip to content

Instantly share code, notes, and snippets.

View wesslen's full-sized avatar

Ryan Wesslen wesslen

View GitHub Profile
@wesslen
wesslen / mongodb-load.py
Created February 8, 2018 17:58
Load raw json files to MongoDB
import glob
import json
from pymongo import MongoClient
# fill in hostname and port
HOST = "hostname"
PORT = 27017
client = MongoClient(HOST, PORT)
# fill in dbname and colname
@wesslen
wesslen / tidyquant-demo.R
Created February 16, 2018 03:03
R tidyquant script for workshop
# install tidyverse if you don't have it
# install.packages("tidyverse")
library(tidyverse)
## Read the csv from a URL
url <- "http://assets.datacamp.com/course/compfin/sbuxPrices.csv"
df <- read_csv(url)
## lubridate package to format the date
# if you get an error below, are you sure you have lubridate?
@wesslen
wesslen / twitter-trolls.R
Created February 20, 2018 21:30
twitter trolls
library(tidyverse); library(lubridate)
url <- "http://nodeassets.nbcnews.com/russian-twitter-trolls/tweets.csv"
tweets <- read_csv(url)
user.url <- "http://nodeassets.nbcnews.com/russian-twitter-trolls/users.csv"
users <- read_csv(user.url)
tweets %>%
count(Date = as.Date(created_str)) %>%
@wesslen
wesslen / twitter-trolls.Rmd
Last active March 19, 2018 20:26
troll-tweets Rmd
---
title: "Analyzing Russian Trolls: Tidyverse & Text"
author: "Ryan Wesslen"
date: "2/21/2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
```
@wesslen
wesslen / stm-labels-five-words.R
Created February 28, 2018 03:12
get five words for stm topics
labels <- sageLabels(ctmFit, n = 5)
topicsNames <- sapply(1:30, function(x) paste0(labels$marginal$frex[x,], collapse = " + "))
@wesslen
wesslen / get_replies.R
Last active March 1, 2021 20:30
get replies for a specific tweet using rtweet
## issues:
## -only gets replies within last ~7 days to the post due to public REST API limits
## -counts don't necessarily align with total replies via browser, perhaps due to private accounts (?)
get_replies <- function(tweetid){
# get status information for given tweet
t <- rtweet::lookup_statuses(statuses = tweetid, token = ryan_rtweets)
# use search API to find all tweets directed to the poster
# and keep only replied to that status
@wesslen
wesslen / ggplot-wordcloud.R
Created May 25, 2018 20:07
ggplot wordcloud via quanteda, ggrepel, and tidyverse
library(quanteda); library(ggrepel); library(tidyverse)
ggplotWordcloud <- function(df, maxWords = 50){
corpus(df$text) %>%
dfm(remove_punct = TRUE, remove = stopwords("English")) %>%
topfeatures(n = maxWords) %>%
as.tibble() %>%
rownames_to_column(var = "word") %>%
slice(1:maxWords) %>%
@wesslen
wesslen / app-prep.R
Last active November 10, 2018 19:45
solution to iviz-workshop charlotte protest shiny app: see https://ryanwesslen.shinyapps.io/protestApp/
# Project 1: Charlotte Protest
library(tidyverse); library(lubridate); library(xts); library(dygraphs); library(quanteda)
# 10% sample
protestData <- readRDS("../Protest.RData") %>%
mutate(time = paste0(substr(postedTime, 1, 13), "00:00 EDT")) %>% # convert to hourly
mutate(time = ymd_hms(time)) %>%
select(time, verb, postedTime, body)
# get daily counts
@wesslen
wesslen / rtweet-streams.R
Created July 20, 2018 16:15
combine multiple rtweet streams
library(rtweet)
library(dplyr)
files <- list.files()
files <- files[grep(".json", files)]
getPoints <- function(file){
parse_stream(file) %>%
lat_lng("bbox_coords") %>% # keep bounding box coords
filter(is_retweet == FALSE & !is.na(lat)) %>% # keep posts and point lat/longs
@wesslen
wesslen / tillis-tweets.R
Created August 8, 2018 12:02
thom tillis tweets
library(tidyverse); library(rtweet)
get_timeline(c("senthomtillis","thomtillis"), n = 3200) %>%
mutate(keyword = grepl("bipartisan", text, ignore.case = TRUE)) %>%
filter(keyword) %>%
group_by(screen_name) %>%
ts_plot("3 months", trim = 1L) +
labs(x= "Date", y= NULL, title = "Thom Tillis' Tweets mentioning `bipartisan*` by Twitter account") +
theme(text = element_text(size = 12),
legend.position = c(0.3,0.5))