Skip to content

Instantly share code, notes, and snippets.

@sadimanna
Last active June 21, 2020 08:29
Show Gist options
  • Select an option

  • Save sadimanna/d57a1bf02baf909fcd387a24badb1e55 to your computer and use it in GitHub Desktop.

Select an option

Save sadimanna/d57a1bf02baf909fcd387a24badb1e55 to your computer and use it in GitHub Desktop.
def bootstrap_metric(y, pred, classes, metric='auc',bootstraps = 100, fold_size = 1000):
statistics = np.zeros((len(classes), bootstraps))
if metric=='AUC':
metric_func = roc_auc_score
if metric=='Sensitivity':
metric_func = sensitivity
if metric=='Specificity':
metric_func = specificity
if metric=='Accuracy':
metric_func = get_accuracy
for c in range(len(classes)):
df = pd.DataFrame(columns=['y', 'pred'])
df.loc[:, 'y'] = y[:, c]
df.loc[:, 'pred'] = pred[:, c]
# get positive examples for stratified sampling
df_pos = df[df.y == 1]
df_neg = df[df.y == 0]
prevalence = len(df_pos) / len(df)
for i in range(bootstraps):
# stratified sampling of positive and negative examples
pos_sample = df_pos.sample(n = int(fold_size * prevalence), replace=True)
neg_sample = df_neg.sample(n = int(fold_size * (1-prevalence)), replace=True)
y_sample = np.concatenate([pos_sample.y.values, neg_sample.y.values])
pred_sample = np.concatenate([pos_sample.pred.values, neg_sample.pred.values])
score = metric_func(y_sample, pred_sample)
statistics[c][i] = score
return statistics
def get_confidence_intervals(y,pred,class_labels):
metric_dfs = {}
for metric in ['AUC','Sensitivity','Specificity','Accuracy']:
statistics = bootstrap_metric(y,pred,class_labels,metric)
df = pd.DataFrame(columns=["Mean "+metric+" (CI 5%-95%)"])
for i in range(len(class_labels)):
mean = statistics.mean(axis=1)[i]
max_ = np.quantile(statistics, .95, axis=1)[i]
min_ = np.quantile(statistics, .05, axis=1)[i]
df.loc[class_labels[i]] = ["%.2f (%.2f-%.2f)" % (mean, min_, max_)]
metric_dfs[metric] = df
return metric_dfs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment