Created
July 28, 2022 06:44
-
-
Save danstowell/e29ebccc02ca6da38228749d6520d1fc to your computer and use it in GitHub Desktop.
example of bootstrap sampling to estimate confidence intervals on an accuracy measure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# example of bootstrap sampling to estimate confidence intervals on an accuracy measure | |
import numpy as np | |
nbootstraps = 500 # 50 # 500 # note that 50 is fast enough for development purposes, but I use 500 for final evaluation | |
# here's a VERY SHORT list of outcomes, each one reflecting whether sound X was correctly predicted or not | |
outcomes = np.array([1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0]) | |
# here we can generate the indexing arrays for bootstrap sampling | |
np.random.seed(1234) | |
bootstrappers = [np.random.randint(0, len(outcomes), len(outcomes), int) for _ in range(nbootstraps)] | |
accuracy = np.mean(outcomes) | |
boo_accuracies = [] | |
for bootstrapper in bootstrappers: | |
boo_accuracies.append(np.mean(outcomes[bootstrapper])) | |
accuracy_lo = np.percentile(boo_accuracies, 2.5) | |
accuracy_hi = np.percentile(boo_accuracies, 97.5) | |
print(f"For {len(outcomes)} outcomes, the accuracy is {accuracy}, confidence interval [{accuracy_lo}, {accuracy_hi}]") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment