Skip to content

Instantly share code, notes, and snippets.

@brshallo
Last active November 17, 2021 21:13
Show Gist options
  • Select an option

  • Save brshallo/00926e6f669cddaeee568af9a1dc3593 to your computer and use it in GitHub Desktop.

Select an option

Save brshallo/00926e6f669cddaeee568af9a1dc3593 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(Hmisc)

data(lending_club, package = "modeldata")
lending_club <- mutate(lending_club, funded_amnt = as.double(funded_amnt))

bootstap_grouped_weighted <- function(df, target, groups, wt){
  options(dplyr.summarise.inform = FALSE)
  
  df %>% 
    group_by(across({{ groups }})) %>%
    slice_sample(prop = 1, replace = TRUE) %>% 
    summarise(avg = weighted.mean({{ target }}, {{ wt }}),
              wt = sum({{ wt }})) %>% 
    ungroup()
}

sims_interest_rates <- tibble(index = 1:1000) %>% 
  mutate(sims = map(index, 
                    ~bootstap_grouped_weighted(
                      df = lending_club,
                      target = int_rate,
                      groups = c(Class, verification_status),
                      wt = funded_amnt))
  ) %>% 
  unnest(sims)

sims_interest_rates %>% 
  ggplot(aes(x = avg, wt = wt, fill = verification_status))+
  geom_density()+
  facet_wrap(~Class, ncol = 1)+
  theme_minimal()+
  scale_y_continuous(labels = NULL)+
  labs(title = "Simulated Distribution of Expected Interest Rates is Wider in Defaulted Loans",
       subtitle = " Regardless of Verification Status",
       x = "Distribution of Expected Interest Rate % (Weighted by Loan Amount)",
       y = "Density")

# Check 80% intervals
# And note that interval_width has interaction between Class and verification_status
sims_interest_rates %>% 
  group_by(Class, verification_status) %>% 
  summarise(lower = wtd.quantile(avg, wt, 0.10),
            upper = wtd.quantile(avg, wt, 0.90),
            width = upper - lower) %>% 
  ungroup()
#> # A tibble: 6 x 5
#>   Class verification_status lower upper width
#>   <fct> <fct>               <dbl> <dbl> <dbl>
#> 1 bad   Not_Verified         14.4  15.7 1.31 
#> 2 bad   Source_Verified      16.6  17.7 1.15 
#> 3 bad   Verified             18.2  19.1 0.937
#> 4 good  Not_Verified         10.9  11.1 0.229
#> 5 good  Source_Verified      12.6  12.9 0.239
#> 6 good  Verified             14.1  14.4 0.316

Created on 2021-11-17 by the reprex package (v2.0.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment