Last active
February 27, 2020 20:05
-
-
Save BioSciEconomist/27a6b57fe4521c4e26db242f0508a228 to your computer and use it in GitHub Desktop.
Percentile based scorecard for predictive model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------ | |
# |PROGRAM NAME: percentiles.R | |
# |DATE: 2/25/20 | |
# |CREATED BY: MATT BOGARD | |
# |PROJECT FILE: | |
# |---------------------------------------------------------------- | |
# | PURPOSE: quickly divide predicted probabilities into groups based on percentiles | |
# | and assess model performance | |
# |------------------------------------------------------------------ | |
# this assumes a data frame wiht training/validation data (tmp1), predicted or scored probabilities (predprob), and an outcome variable (target) | |
# get percentiles for splitting data into groups | |
cuts <- tmp1%>% | |
summarize( P10 = quantile (True, probs=0.10,na.rm = TRUE), | |
P20= quantile(predprob, probs=0.20,na.rm = TRUE), | |
P30 = quantile (predprob, probs=0.30,na.rm = TRUE), | |
P40 = quantile (predprob, probs=0.40,na.rm = TRUE), | |
P50 = quantile (predprob, probs=0.50,na.rm = TRUE), | |
P60 = quantile (predprob, probs=0.60,na.rm = TRUE), | |
P70 = quantile (predprob, probs=0.70,na.rm = TRUE), | |
P80 = quantile (predprob, probs=0.80,na.rm = TRUE), | |
P90 = quantile (predprob, probs=0.90,na.rm = TRUE) | |
) | |
print(cuts) # check values | |
# use cutoffs from above 'cuts' file to segregate data into 10 groups | |
tmp1$risk_group <- ifelse(tmp1$predprob <= cuts$P10,"Group 1", | |
ifelse(tmp1$predprob > cuts$P10 & tmp1$predprob <= cuts$P20, "Group 2", | |
ifelse(tmp1$predprob > cuts$P20 & tmp1$predprob <= cuts$P30, "Group 3", | |
ifelse(tmp1$predprob > cuts$P30 & tmp1$predprob <= cuts$P40, "Group 4", | |
ifelse(tmp1$predprob > cuts$P40 & tmp1$predprob <= cuts$P50, "Group 5", | |
ifelse(tmp1$predprob > cuts$P50 & tmp1$predprob <= cuts$P60, "Group 6", | |
ifelse(tmp1$predprob > cuts$P60 & tmp1$predprob <= cuts$P70, "Group 7", | |
ifelse(tmp1$predprob > cuts$P70 & tmp1$predprob <= cuts$P80, "Group 8", | |
ifelse(tmp1$predprob > cuts$P80 & tmp1$predprob <= cuts$P90, "Group 9", | |
ifelse(tmp1$predprob > cuts$P90, "Group 10","other")))))))))) | |
# assess average target outcome within each risk group | |
tmp1%>% | |
group_by(risk_group) %>% | |
summarize(meanTarget = mean(target), | |
meanProb = mean(predprob), | |
total = n()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment