library(tidyverse)
library(Hmisc)
data(lending_club, package = "modeldata")
lending_club <- mutate(lending_club, funded_amnt = as.double(funded_amnt))
bootstap_grouped_weighted <- function(df, target, groups, wt){
options(dplyr.summarise.inform = FALSE)
df %>%
group_by(across({{ groups }})) %>%
slice_sample(prop = 1, replace = TRUE) %>%
summarise(avg = weighted.mean({{ target }}, {{ wt }}),
wt = sum({{ wt }})) %>%
ungroup()
}
sims_interest_rates <- tibble(index = 1:1000) %>%
mutate(sims = map(index,
~bootstap_grouped_weighted(
df = lending_club,
target = int_rate,
groups = c(Class, verification_status),
wt = funded_amnt))
) %>%
unnest(sims)
sims_interest_rates %>%
ggplot(aes(x = avg, wt = wt, fill = verification_status))+
geom_density()+
facet_wrap(~Class, ncol = 1)+
theme_minimal()+
scale_y_continuous(labels = NULL)+
labs(title = "Simulated Distribution of Expected Interest Rates is Wider in Defaulted Loans",
subtitle = " Regardless of Verification Status",
x = "Distribution of Expected Interest Rate % (Weighted by Loan Amount)",
y = "Density")# Check 80% intervals
# And note that interval_width has interaction between Class and verification_status
sims_interest_rates %>%
group_by(Class, verification_status) %>%
summarise(lower = wtd.quantile(avg, wt, 0.10),
upper = wtd.quantile(avg, wt, 0.90),
width = upper - lower) %>%
ungroup()
#> # A tibble: 6 x 5
#> Class verification_status lower upper width
#> <fct> <fct> <dbl> <dbl> <dbl>
#> 1 bad Not_Verified 14.4 15.7 1.31
#> 2 bad Source_Verified 16.6 17.7 1.15
#> 3 bad Verified 18.2 19.1 0.937
#> 4 good Not_Verified 10.9 11.1 0.229
#> 5 good Source_Verified 12.6 12.9 0.239
#> 6 good Verified 14.1 14.4 0.316Created on 2021-11-17 by the reprex package (v2.0.0)
