Skip to content

Instantly share code, notes, and snippets.

@christopherlovell
Created May 11, 2018 11:07
Show Gist options
  • Save christopherlovell/c14767681cbc7f9f0f243d008af76fbd to your computer and use it in GitHub Desktop.
Save christopherlovell/c14767681cbc7f9f0f243d008af76fbd to your computer and use it in GitHub Desktop.
Resample a distribution to match another
from scipy.stats import binned_statistic
import random
def resample_distribution(x, y, k=300, bin_limits=None):
"""
Resample y to fit distribution of x
Args:
x, y (array) distributions
k (int) number of samples
bin_limits (array)
"""
if bin_limits is None:
bin_limits = np.linspace(0,1.3,30)
# _min = np.max([np.min(x), np.min(y)])
# _max = np.min([np.max(x), np.max(y)])
# _min += (_max - _min) / 5
# _max -= (_max - _min) / 5
# bin_limits = np.linspace(_min, _max, 20)
count_x, binedges, binnumber_x = binned_statistic(x=x, values=np.ones(len(x)),
statistic='count', bins=bin_limits)
count_y, binedges, binnumber_y = binned_statistic(x=y, values=np.ones(len(y)),
statistic='count', bins=bin_limits)
prob = np.array([count_x[binnumber_y[i] - 1] for i in np.arange(len(y))])
prob2 = np.array([count_y[binnumber_y[i] - 1] for i in np.arange(len(y))])
probabilities = prob / prob2
indices = random.choices(np.arange(len(y)), k = k, weights=probabilities)
return indices, bin_limits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment