Skip to content

Instantly share code, notes, and snippets.

View juliasilge's full-sized avatar
🐛

Julia Silge juliasilge

🐛
View GitHub Profile
@juliasilge
juliasilge / words_cooccur.R
Created May 18, 2019 20:27
Word co-occurrence network graph
library(tidytext)
library(widyr)
library(igraph)
library(ggraph)
words_cooccur <- tidy_words %>%
group_by(word) %>%
filter(n() > 250) %>%
ungroup() %>%
pairwise_cor(word, Respondent, sort = TRUE, upper = FALSE) %>%
@juliasilge
juliasilge / checking_out_casting.R
Created August 28, 2019 01:31
For Johan Braeken
library(tidyverse)
library(tidytext)
dat <- tibble(
a = c("row1", "row1", "row2", "row2", "row2"),
b = c("col1", "col2", "col1", "col3", "col4"),
val = 1:5
)
d <- cast_dtm(dat, a, b, val)
@juliasilge
juliasilge / potus.R
Created December 19, 2019 04:32
Proportion of tweeted characters that are exclamation points
library(tidyverse)
library(rtweet)
potus <- get_timeline("realDonaldTrump", n = 3200)
potus %>%
transmute(creation_date = as.Date(created_at),
text = str_remove_all(text, "https://t.co/[A-Za-z\\d]+"),
exclamations = str_extract_all(text, "\\!"),
exclamations = map_int(exclamations, length),
@juliasilge
juliasilge / nest_tidydo.R
Last active December 29, 2019 23:44
Log odds with nesting for Tyler
library(tidyverse)
library(tidylo)
library(babynames)
top_names <- babynames %>%
filter(year >= 1950,
year < 1990) %>%
mutate(decade = (year %/% 10) * 10,
decade = paste0(decade, "s")) %>%
group_by(decade) %>%
@juliasilge
juliasilge / plot_caribou.R
Last active June 24, 2020 00:43
#TidyTuesday for caribou tracking
## https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-06-23/readme.md
library(tidyverse)
library(bcmaps)
locations <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-23/locations.csv')
locations_sf <- locations %>%
select(animal_id, longitude, latitude) %>%
st_as_sf(coords = c("longitude", "latitude")) %>%
st_set_crs(4326) %>%
transform_bc_albers()
@juliasilge
juliasilge / demo_boot.md
Last active August 5, 2020 14:55
Explain that bootstraps() is not upsampling or downsampling when using strata
library(rsample)
library(tidyverse)
library(palmerpenguins)
data("penguins")

penguins %>%
  count(species)
#> # A tibble: 3 x 2
#>   species       n
@juliasilge
juliasilge / step_normalize.md
Created August 17, 2020 19:32
Apply recipe to resamples
library(tidymodels)

car_rec <- recipe(~ ., data = mtcars) %>%
  step_normalize(disp, qsec)

car_prep <- prep(car_rec)

juice(car_prep)
#> # A tibble: 32 x 11
@juliasilge
juliasilge / join_kmeans.md
Created August 22, 2020 23:28
Join output of tidy and augment to get centers of each cluster for each point
library(tidymodels)

centers <- tibble(
  cluster = factor(1:3), 
  num_points = c(100, 150, 50),  # number points in each cluster
  x1 = c(5, 0, -3),              # x1 coordinate of cluster center
  x2 = c(-1, 1, -2)              # x2 coordinate of cluster center
)
@juliasilge
juliasilge / skipping.md
Created August 26, 2020 02:50
When do bake() and juice() skip?
library(tidymodels)
data(ames)

set.seed(833961)
ames_split <- initial_split(ames, prob = 0.80, strata = Sale_Price)
ames_train <- training(ames_split)
ames_test  <-  testing(ames_split)

ames_rec <- recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type,
@juliasilge
juliasilge / song_lyrics_log_odds.md
Last active December 1, 2020 06:00
Beyoncé and Taylor Swift Lyrics
library(tidyverse)
library(tidytext)
library(tidylo)
library(silgelib)

beyonce_lyrics <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv')
#> Parsed with column specification:
#> cols(
#>   line = col_character(),