Last active
March 18, 2019 21:53
-
-
Save seberg/4966984 to your computer and use it in GitHub Desktop.
Vectorized version of percentile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from numpy import asarray, add, rollaxis, sort, arange | |
def percentile(a, q, limit=None, interpolation='linear', axis=None, | |
out=None, overwrite_input=False): | |
""" | |
Compute the qth percentile of the data along the specified axis. | |
Returns the qth percentile of the array elements. | |
Parameters | |
---------- | |
a : array_like | |
Input array or object that can be converted to an array. | |
q : array_like in the range of [0,100] | |
Percentile to compute which must be between 0 and 100 inclusive. If | |
`q` is an array, its dimensions are added at the start of the result. | |
limit : tuple, optional | |
Tuple of two scalars, the lower and upper limits within which to | |
compute the percentile. Values outside of this range are ommitted from | |
the percentile calculation. None includes all values in calculation. | |
interpolation : {'linear', 'lower', 'higher', 'midpoint'}, optional | |
This optional parameter specifies the interpolation method to use, | |
when the desired quantile lies between two data points `i` and `j`: | |
* linear: `i + (j - i) * fraction`, where `fraction` is the | |
fractional part of the index surrounded by `i` and `j`. | |
* lower: `i`. | |
* higher: `j`. | |
axis : int, optional | |
Axis along which the percentiles are computed. The default (None) | |
is to compute the median along a flattened version of the array. | |
out : ndarray, optional | |
Alternative output array in which to place the result. It must | |
have the same shape and buffer length as the expected output, | |
but the type (of the output) will be cast if necessary. | |
overwrite_input : bool, optional | |
If True, then allow use of memory of input array `a` for | |
calculations. The input array will be modified by the call to | |
median. This will save memory when you do not need to preserve | |
the contents of the input array. Treat the input as undefined, | |
but it will probably be fully or partially sorted. | |
Default is False. Note that, if `overwrite_input` is True and the | |
input is not already an array, an error will be raised. | |
Returns | |
------- | |
percentile : ndarray | |
A new array holding the result (unless `out` is specified, in | |
which case that array is returned instead). If the input contains | |
integers, or floats of smaller precision than 64, then the output | |
data-type is float64. Otherwise, the output data-type is the same | |
as that of the input. | |
See Also | |
-------- | |
mean, median | |
Notes | |
----- | |
Given a vector V of length N, the qth percentile of V is the qth ranked | |
value in a sorted copy of V. A weighted average of the two nearest | |
neighbors is used if the normalized ranking does not match q exactly. | |
The same as the median if ``q=50``, the same as the minimum if ``q=0`` | |
and the same as the maximum if ``q=100``. | |
Examples | |
-------- | |
>>> a = np.array([[10, 7, 4], [3, 2, 1]]) | |
>>> a | |
array([[10, 7, 4], | |
[ 3, 2, 1]]) | |
>>> np.percentile(a, 50) | |
3.5 | |
>>> np.percentile(a, 0.5, axis=0) | |
array([ 6.5, 4.5, 2.5]) | |
>>> np.percentile(a, 50, axis=1) | |
array([ 7., 2.]) | |
>>> m = np.percentile(a, 50, axis=0) | |
>>> out = np.zeros_like(m) | |
>>> np.percentile(a, 50, axis=0, out=m) | |
array([ 6.5, 4.5, 2.5]) | |
>>> m | |
array([ 6.5, 4.5, 2.5]) | |
>>> b = a.copy() | |
>>> np.percentile(b, 50, axis=1, overwrite_input=True) | |
array([ 7., 2.]) | |
>>> assert not np.all(a==b) | |
>>> b = a.copy() | |
>>> np.percentile(b, 50, axis=None, overwrite_input=True) | |
3.5 | |
""" | |
a = asarray(a) | |
if limit: | |
a = a[(limit[0] <= a) & (a <= limit[1])] | |
if overwrite_input: | |
if axis is None: | |
sorted = a.ravel() | |
sorted.sort() | |
else: | |
a.sort(axis=axis) | |
sorted = a | |
else: | |
sorted = sort(a, axis=axis) | |
if axis is None: | |
axis = 0 | |
# The new axes should be added at the front: | |
sorted = rollaxis(sorted, axis, 0) | |
q = asarray(q) | |
q = q.reshape(q.shape + (1,)) | |
q = q / 100.0 | |
if (q < 0).any() or (q > 1).any(): | |
raise ValueError("percentile must be either in the range [0,100]") | |
Nx = sorted.shape[0] | |
index = q * (Nx - 1) | |
# round fractional indices according to interpolation method | |
if interpolation == 'lower': | |
index = np.floor(index).astype(np.intp) | |
elif interpolation == 'higher': | |
index = np.ceil(index).astype(np.intp) | |
elif interpolation == 'linear': | |
pass # keep index as fraction and interpolate | |
else: | |
raise ValueError("interpolation can only be 'linear', 'lower' " | |
"or 'higher'") | |
if index.dtype == np.intp: | |
i = index | |
indexer = (i, Ellipsis) | |
weights = array(1) | |
sumval = 1.0 | |
else: | |
i = index.astype(np.intp) + arange(2) | |
indexer = (i, Ellipsis) | |
weights = index - i[...,::-1] | |
weights[..., 0] *= -1 | |
weights.shape = weights.shape + (1,) * (sorted.ndim - 1) | |
sumval = weights.sum(i.ndim-1) # numerical accuracy reasons? | |
# Use add.reduce in both cases to coerce data type as well as | |
# check and use out array. | |
res = add.reduce(sorted[indexer] * weights, axis=i.ndim-1, out=out) | |
res /= sumval | |
return res |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment