Skip to content

Instantly share code, notes, and snippets.

@tansey
Created July 13, 2022 11:02
Show Gist options
  • Save tansey/876f4b8aa6be983eb4454624a07feb6b to your computer and use it in GitHub Desktop.
Save tansey/876f4b8aa6be983eb4454624a07feb6b to your computer and use it in GitHub Desktop.
code to cap values at median deviations
import numpy as np
import numpy.ma as ma
def cap_outliers(points, thresh=3.5, data=None, median=None, med_abs_deviation=None):
'''
Cap outliers to be within a certain number of median deviations.
'''
if type(points) is np.float64:
points = np.array([points])
if len(points.shape) == 1:
points = points[:,None]
# Get the median deviations from the median
med_abs_deviation, diff, median = median_abs_deviation(points, data=data, median=median, med_abs_deviation=med_abs_deviation)
max_point = thresh * med_abs_deviation / 0.6745 + median
result = points.clip(0, max_point)[:,0]
if len(result) == 1:
return result[0]
return result
def median_z_score(points, data=None, median=None, med_abs_deviation=None):
'''
Calculate the z-score using medians instead of means.
If data is specified, then points will not be used to calculate the
median.
'''
if type(points) is np.float64:
points = np.array([points])
if len(points.shape) == 1:
points = points[:,None]
# Get the median deviations from the median
med_abs_deviation, diff, median = median_abs_deviation(points, data=data, median=median, med_abs_deviation=med_abs_deviation)
modified_z_score = 0.6745 * diff / med_abs_deviation
# Handle whether this value is a positive or negative z-score
over_under = ((points > median) * 2. - 1.)[:,0]
# Return the resulting median z-score
result = modified_z_score * over_under
if len(result) == 1:
return result[0]
return result
def median_abs_deviation(points, data=None, median=None, med_abs_deviation=None):
'''
Calculate the median absolute deviation of the points from the median.
If median is None, it will be calcluated from the data.
'''
if data is None:
data = points
if median is None:
median = ma.median(data, axis=0)
if len(points.shape) == 1:
points = points[:,None]
diff = ma.sum((points - median)**2, axis=-1)
diff = ma.sqrt(diff)
if med_abs_deviation is None:
med_abs_deviation = ma.median(diff)
# Handle the case where the data is more than 50% zeros
if med_abs_deviation == 0.:
med_abs_deviation = min(diff[diff > 0.])
return med_abs_deviation, diff, median
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment