Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Created October 16, 2020 21:03
Show Gist options
  • Save BioSciEconomist/0dd3b96b05c47c43bd5cc588f6eb9086 to your computer and use it in GitHub Desktop.
Save BioSciEconomist/0dd3b96b05c47c43bd5cc588f6eb9086 to your computer and use it in GitHub Desktop.
Python bootstrapped confidence interval
# *-----------------------------------------------------------------
# | PROGRAM NAME: ex mean bootstrap CI.py
# | DATE: 10/16/20
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: example calculation of bootstrapped confidence interval for mean
# *----------------------------------------------------------------
import numpy as np
import pandas as pd
# simulate fake data
N = 100
Location = ['Florida','Texas']
df = pd.DataFrame(columns=['State', 'Cost'])
df['State'] = np.random.choice(Location, size=N)
df['Cost'] = np.random.uniform(50, 1000, df.shape[0])
df.Cost.describe() # check
#
# define function for creating bootstrap samples
#
def draw_bs_replicates(data,func,size):
"""creates a bootstrap sample, computes replicates and returns replicates array"""
# Create an empty array to store replicates
bs_replicates = np.empty(size)
# pull bootstrapped samples
for i in range(size):
# Create a bootstrap sample
bs_sample = np.random.choice(data,size=len(data))
# Get bootstrap replicate and append to bs_replicates
bs_replicates[i] = func(bs_sample)
return bs_replicates
# draw bootstrap samples of the statistic or parameter estimate of interest
bs_replicates = draw_bs_replicates(df.Cost,np.mean,500)
#
# analysis
#
# original sample mean
df.Cost.mean()
# mean of bs samples
np.mean(bs_replicates)
# bootsrapped standard error
(np.std(bs_replicates))
# 95% confidence interval based on 2.5th and 97.5th percentiles
np.percentile(bs_replicates,[2.5,97.5])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment