Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Last active February 27, 2020 20:05
Show Gist options
  • Save BioSciEconomist/27a6b57fe4521c4e26db242f0508a228 to your computer and use it in GitHub Desktop.
Save BioSciEconomist/27a6b57fe4521c4e26db242f0508a228 to your computer and use it in GitHub Desktop.
Percentile based scorecard for predictive model
# ------------------------------------------------------------------
# |PROGRAM NAME: percentiles.R
# |DATE: 2/25/20
# |CREATED BY: MATT BOGARD
# |PROJECT FILE:
# |----------------------------------------------------------------
# | PURPOSE: quickly divide predicted probabilities into groups based on percentiles
# | and assess model performance
# |------------------------------------------------------------------
# this assumes a data frame wiht training/validation data (tmp1), predicted or scored probabilities (predprob), and an outcome variable (target)
# get percentiles for splitting data into groups
cuts <- tmp1%>%
summarize( P10 = quantile (True, probs=0.10,na.rm = TRUE),
P20= quantile(predprob, probs=0.20,na.rm = TRUE),
P30 = quantile (predprob, probs=0.30,na.rm = TRUE),
P40 = quantile (predprob, probs=0.40,na.rm = TRUE),
P50 = quantile (predprob, probs=0.50,na.rm = TRUE),
P60 = quantile (predprob, probs=0.60,na.rm = TRUE),
P70 = quantile (predprob, probs=0.70,na.rm = TRUE),
P80 = quantile (predprob, probs=0.80,na.rm = TRUE),
P90 = quantile (predprob, probs=0.90,na.rm = TRUE)
)
print(cuts) # check values
# use cutoffs from above 'cuts' file to segregate data into 10 groups
tmp1$risk_group <- ifelse(tmp1$predprob <= cuts$P10,"Group 1",
ifelse(tmp1$predprob > cuts$P10 & tmp1$predprob <= cuts$P20, "Group 2",
ifelse(tmp1$predprob > cuts$P20 & tmp1$predprob <= cuts$P30, "Group 3",
ifelse(tmp1$predprob > cuts$P30 & tmp1$predprob <= cuts$P40, "Group 4",
ifelse(tmp1$predprob > cuts$P40 & tmp1$predprob <= cuts$P50, "Group 5",
ifelse(tmp1$predprob > cuts$P50 & tmp1$predprob <= cuts$P60, "Group 6",
ifelse(tmp1$predprob > cuts$P60 & tmp1$predprob <= cuts$P70, "Group 7",
ifelse(tmp1$predprob > cuts$P70 & tmp1$predprob <= cuts$P80, "Group 8",
ifelse(tmp1$predprob > cuts$P80 & tmp1$predprob <= cuts$P90, "Group 9",
ifelse(tmp1$predprob > cuts$P90, "Group 10","other"))))))))))
# assess average target outcome within each risk group
tmp1%>%
group_by(risk_group) %>%
summarize(meanTarget = mean(target),
meanProb = mean(predprob),
total = n())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment