Skip to content

Instantly share code, notes, and snippets.

@mistycheney
Last active September 13, 2018 22:10
Show Gist options
  • Save mistycheney/e321683d3cfc9a05fe89c02d415878a0 to your computer and use it in GitHub Desktop.
Save mistycheney/e321683d3cfc9a05fe89c02d415878a0 to your computer and use it in GitHub Desktop.
Count anomalies based on scores
# Various ways to find peaks in Python
# https://github.com/MonsieurV/py-findpeaks
def count_anomalies_from_scores(scores, plot=False):
from scipy.signal import find_peaks
is_outlier = detect_outlier(scores)
peaks, _ = find_peaks(is_outlier, distance=60) # 60 means peaks must be separated by 60 mins
num_anomalies = np.count_nonzero(peaks)
if plot:
fig, axes = plt.subplots(3, 1, figsize=(16,5), squeeze=True)
axes[0].plot((score-score.min())/(score.max()-score.min()));
axes[0].set_title('Score (normalized)');
axes[1].plot(is_outlier);
axes[1].set_title('Is outlier');
peak_mask = np.zeros_like(is_outlier, dtype=bool)
peak_mask[peaks] = 1
axes[2].plot(peak_mask);
axes[2].set_title('Is peak');
plt.tight_layout();
print('Found %d anomalies' % num_anomalies)
return num_anomalies, peaks, is_outlier
def detect_outlier(points, thresh=3.5):
"""
Returns a boolean array with True if points are outliers and False
otherwise.
Parameters:
-----------
points : An numobservations by numdimensions array of observations
thresh : The modified z-score to use as a threshold. Observations with
a modified z-score (based on the median absolute deviation) greater
than this value will be classified as outliers.
Returns:
--------
mask : A numobservations-length boolean array.
References:
----------
Boris Iglewicz and David Hoaglin (1993), "Volume 16: How to Detect and
Handle Outliers", The ASQC Basic References in Quality Control:
Statistical Techniques, Edward F. Mykytka, Ph.D., Editor.
"""
# https://stackoverflow.com/questions/11882393/matplotlib-disregard-outliers-when-plotting
if len(points.shape) == 1:
points = points[:,None]
median = np.median(points, axis=0)
diff = np.sum((points - median)**2, axis=-1)
diff = np.sqrt(diff)
med_abs_deviation = np.median(diff)
modified_z_score = 0.6745 * diff / med_abs_deviation
mask = modified_z_score > thresh
return mask & (points[:, 0] - median > 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment