Last active
January 17, 2020 13:26
-
-
Save christopherlovell/d0b199d1926fee878b2004866f676048 to your computer and use it in GitHub Desktop.
Calculate the binned, weighted percentile
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def weighted_quantile(values, quantiles, sample_weight=None, | |
values_sorted=False, old_style=False): | |
""" | |
Taken from From https://stackoverflow.com/a/29677616/1718096 | |
Very close to numpy.percentile, but supports weights. | |
NOTE: quantiles should be in [0, 1]! | |
:param values: numpy.array with data | |
:param quantiles: array-like with many quantiles needed | |
:param sample_weight: array-like of the same length as `array` | |
:param values_sorted: bool, if True, then will avoid sorting of | |
initial array | |
:param old_style: if True, will correct output to be consistent | |
with numpy.percentile. | |
:return: numpy.array with computed quantiles. | |
""" | |
# do some housekeeping | |
values = np.array(values) | |
quantiles = np.array(quantiles) | |
if sample_weight is None: | |
sample_weight = np.ones(len(values)) | |
sample_weight = np.array(sample_weight) | |
assert np.all(quantiles >= 0) and np.all(quantiles <= 1), \ | |
'quantiles should be in [0, 1]' | |
# if not sorted, sort values array | |
if not values_sorted: | |
sorter = np.argsort(values) | |
values = values[sorter] | |
sample_weight = sample_weight[sorter] | |
weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight | |
if old_style: | |
# To be convenient with numpy.percentile | |
weighted_quantiles -= weighted_quantiles[0] | |
weighted_quantiles /= weighted_quantiles[-1] | |
else: | |
weighted_quantiles /= np.sum(sample_weight) | |
return np.interp(quantiles, weighted_quantiles, values) | |
def binned_weighted_quantile(x,y,weights,bins,quantiles): | |
if ~isinstance(quantiles,list): | |
quantiles = [quantiles] | |
out = np.full((len(bins)-1,len(quantiles)),None) | |
for i,(b1,b2) in enumerate(zip(bins[:-1],bins[1:])): | |
mask = (x >= b1) & (x < b2) | |
if np.sum(mask) > 0: | |
out[i,:] = weighted_quantile(y[mask],quantiles,sample_weight=weights[mask]) | |
return np.squeeze(out) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment