library(rsample)
library(tidyverse)
library(palmerpenguins)
data("penguins")
penguins %>%
count(species)
#> # A tibble: 3 x 2
#> species n
#> <fct> <int>
#> 1 Adelie 152
#> 2 Chinstrap 68
#> 3 Gentoo 124
penguin_folds <- bootstraps(penguins, strata = species)
penguin_folds
#> # Bootstrap sampling using stratification
#> # A tibble: 25 x 2
#> splits id
#> <list> <chr>
#> 1 <split [344/130]> Bootstrap01
#> 2 <split [344/121]> Bootstrap02
#> 3 <split [344/121]> Bootstrap03
#> 4 <split [344/135]> Bootstrap04
#> 5 <split [344/125]> Bootstrap05
#> 6 <split [344/128]> Bootstrap06
#> 7 <split [344/120]> Bootstrap07
#> 8 <split [344/130]> Bootstrap08
#> 9 <split [344/132]> Bootstrap09
#> 10 <split [344/132]> Bootstrap10
#> # … with 15 more rows
penguin_folds %>%
mutate(species_counts = map(splits, ~ analysis(.) %>% count(species))) %>%
unnest(species_counts) %>%
pivot_wider(names_from = species, values_from = n)
#> # A tibble: 25 x 5
#> splits id Adelie Chinstrap Gentoo
#> <list> <chr> <int> <int> <int>
#> 1 <split [344/130]> Bootstrap01 152 68 124
#> 2 <split [344/121]> Bootstrap02 152 68 124
#> 3 <split [344/121]> Bootstrap03 152 68 124
#> 4 <split [344/135]> Bootstrap04 152 68 124
#> 5 <split [344/125]> Bootstrap05 152 68 124
#> 6 <split [344/128]> Bootstrap06 152 68 124
#> 7 <split [344/120]> Bootstrap07 152 68 124
#> 8 <split [344/130]> Bootstrap08 152 68 124
#> 9 <split [344/132]> Bootstrap09 152 68 124
#> 10 <split [344/132]> Bootstrap10 152 68 124
#> # … with 15 more rows
Created on 2020-08-05 by the reprex package (v0.3.0.9001)
Notice that the number of species in each bootstrap is the same because we used strata
. No upsampling or downsampling happened.