Skip to content

Instantly share code, notes, and snippets.

@dmyersturnbull
Last active November 21, 2016 23:19
Show Gist options
  • Save dmyersturnbull/f8f1111d8815133514658fc3aef34d3c to your computer and use it in GitHub Desktop.
Save dmyersturnbull/f8f1111d8815133514658fc3aef34d3c to your computer and use it in GitHub Desktop.
Bootstrap mean(X) - mean(Y) from a Pandas DataFrame.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import pandas as pd
import numpy as np
from typing import Optional
def bootstrap_subtract(X: pd.DataFrame, Y: pd.DataFrame, n_bootstrap_samples: int=200,
random_seed: Optional[int]=None) -> pd.DataFrame:
"""Bootstraps over mean(X) - mean(Y).
X and Y are n*k Pandas DataFrames of k replicates.
**NOTE:** X and Y must be single-indexed, each with an unnamed index and unnamed columns.
The DataFrame returned will have columns 'index' and 'value' of length n*n_bootstrap_samples.
"""
def _boot() -> pd.DataFrame:
"""Bootstraps over X and Y individually, takes the mean, and subtracts.
The DataFrame will have bad column names."""
X_boot = X.sample(len(X), replace=True, random_state=random_seed).reset_index().drop('index', axis=1)
Y_boot = Y.sample(len(Y), replace=True, random_state=random_seed).reset_index().drop('index', axis=1)
return (X_boot.apply(np.mean, axis=0) - Y_boot.apply(np.mean, axis=0)).to_frame().reset_index()
R = pd.concat([_boot() for _ in range(0, n_bootstrap_samples)])
R.columns = ['index', 'value']
return R
@dmyersturnbull
Copy link
Author

z = range(1, 3)
X = pd.DataFrame({'0': [1 for _ in z], '1': [2 for _ in z], '2': [2 for _ in z]})
Y = pd.DataFrame({'0': [2 for _ in z], '1': [2 for _ in z], '2': [10 for _ in z]})
R = bootstrap_subtract(X, Y, n_bootstrap_samples=1, random_seed=1)
assert R['value'].values.tolist() == [-1, 0, -8]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment