Skip to content

Instantly share code, notes, and snippets.

@davidclarance
Last active August 8, 2024 11:45
Show Gist options
  • Save davidclarance/57bdcf0669c6fdcd229ddc6a4fdc7fbe to your computer and use it in GitHub Desktop.
Save davidclarance/57bdcf0669c6fdcd229ddc6a4fdc7fbe to your computer and use it in GitHub Desktop.
Bootstrap the nth percentile
def bootstrap_nth_percentile_and_ci(
data: List[float], n_iter: int = 1000, alpha: float = 0.95, percentile: float = 95
) -> Tuple[float, float, float, float]:
"""Compute the nth percentile and confidence interval using bootstrapping
Args:
data (List[float]): List of data
n_iter (int, optional): Number of bootstrap samples. Defaults to 1000.
alpha (float, optional): Confidence level. Defaults to 0.95.
percentile (float, optional): Percentile to compute. Defaults to 95.
Returns:
Tuple[float, float, float, float]: Tuple containing the percentile, mean,
lower bound, and upper bound
"""
n = len(data)
bootstrap_samples = np.random.choice(data, (n_iter, n), replace=True)
percentiles = np.percentile(bootstrap_samples, percentile, axis=1)
mean = percentiles.mean()
lower = np.percentile(percentiles, (1 - alpha) / 2 * 100)
upper = np.percentile(percentiles, (1 + alpha) / 2 * 100)
return percentiles, mean, lower, upper
# population specifics
mean_time = 2.0
std_dev = 0.5
processing_time = np.random.normal(loc=mean_time, scale=std_dev, size=1_000_000)
# sample from processing time
sample = np.random.choice(processing_time, 10000) # note we only sample 1/10th of the population
percentiles, mean, lower_bound, upper_bound = bootstrap_nth_percentile_and_ci(
sample, n_iter=10000, percentile=95
)
@davidclarance
Copy link
Author

The graph is generated using

fig = px.histogram(
    percentiles,
    color_discrete_sequence=["black"],
    labels={"value": "95th Percentile", "count": "Frequency"},
    title="Bootstrap Distribution of the 95th Percentile",
)
fig.add_vline(
    x=np.percentile(processing_time, 95),
    opacity=1,
    line_width=2,
    line_dash="dash",
    line_color="Blue",
    annotation_text="True 95th Percentile",
)
fig.add_vline(
    x=lower_bound,
    opacity=1,
    line_width=2,
    line_dash="dash",
    line_color="Red",
)
fig.add_vline(
    x=upper_bound,
    opacity=1,
    line_width=2,
    line_dash="dash",
    line_color="Red",
)
fig.update_layout(showlegend=False)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment