Skip to content

Instantly share code, notes, and snippets.

View juliasilge's full-sized avatar
πŸ›

Julia Silge juliasilge

πŸ›
View GitHub Profile
@juliasilge
juliasilge / rmsle.md
Created June 30, 2021 01:25
Root Mean Squared Log Error (RMSLE)
library(rlang)
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#>   method                   from   
#>   required_pkgs.model_spec parsnip

rmsle <- function(data, ...) {
    UseMethod("rmsle")
}
@juliasilge
juliasilge / internet_access.md
Created May 12, 2021 03:34
#TidyTuesday internet access in the US
library(tidyverse)
library(tidycensus)
library(sf)
#> Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(viridis)
#> Loading required package: viridisLite
library(patchwork)
theme_set(silgelib::theme_plex())
@juliasilge
juliasilge / tune_bagged_tree.md
Last active April 27, 2021 02:31
Demo tuning bagged tree
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#>   method                   from   
#>   required_pkgs.model_spec parsnip
library(baguette)

set.seed(123)
car_folds <- bootstraps(mtcars, times = 5)
@juliasilge
juliasilge / benchmark_logistic_reg.R
Created March 22, 2021 15:38
Benchmark LiblineaR and glmnet engines for logistic regression in tidymodels
library(tidymodels)
library(textrecipes)
data("small_fine_foods")
sparse_bp <- hardhat::default_recipe_blueprint(composition = "dgCMatrix")
text_rec <-
recipe(score ~ review, data = training_data) %>%
step_tokenize(review) %>%
step_stopwords(review) %>%
@juliasilge
juliasilge / steam_games.md
Last active December 4, 2022 04:55
#TidyTuesday STEAM video games
library(tidyverse)
library(silgelib)
theme_set(theme_plex())

games <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-16/games.csv')
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   gamename = col_character(),
@juliasilge
juliasilge / ninja.md
Last active December 15, 2020 19:14
Ninja Warrior obstacles #TidyTuesday
library(tidyverse)
library(tidylo)
library(tidytext)
theme_set(silgelib::theme_plex())

ninja_raw <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-12-15/ninja_warrior.csv')
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
@juliasilge
juliasilge / washington_hikes.md
Created November 25, 2020 15:39
Washington hikes #TidyTuesday
library(tidyverse)
library(broom)
theme_set(silgelib::theme_plex())

raw_hikes <- read_rds(url('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-11-24/hike_data.rds'))

hikes <- raw_hikes %>%
    mutate(across(c(length, gain, highpoint, rating), parse_number))
@juliasilge
juliasilge / song_lyrics_log_odds.md
Last active December 1, 2020 06:00
BeyoncΓ© and Taylor Swift Lyrics
library(tidyverse)
library(tidytext)
library(tidylo)
library(silgelib)

beyonce_lyrics <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv')
#> Parsed with column specification:
#> cols(
#>   line = col_character(),
@juliasilge
juliasilge / skipping.md
Created August 26, 2020 02:50
When do bake() and juice() skip?
library(tidymodels)
data(ames)

set.seed(833961)
ames_split <- initial_split(ames, prob = 0.80, strata = Sale_Price)
ames_train <- training(ames_split)
ames_test  <-  testing(ames_split)

ames_rec <- recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type,
@juliasilge
juliasilge / join_kmeans.md
Created August 22, 2020 23:28
Join output of tidy and augment to get centers of each cluster for each point
library(tidymodels)

centers <- tibble(
  cluster = factor(1:3), 
  num_points = c(100, 150, 50),  # number points in each cluster
  x1 = c(5, 0, -3),              # x1 coordinate of cluster center
  x2 = c(-1, 1, -2)              # x2 coordinate of cluster center
)