Created
October 6, 2015 20:54
-
-
Save yogi87/e35ba1c5284c49bffff3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// anomaly detection based on Median Absolute Deviation estimates of | |
// standard deviation (robust to outliers and non-normal data) | |
// | |
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/stdj.juttle' as stdj; | |
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/math.juttle' as math; | |
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/ts.juttle' as ts; | |
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/mav.juttle' as mav; | |
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/seasonal.juttle' as seasonal; | |
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/forecast.juttle' as forecast; | |
export sub normalize_daily(in, out) { | |
seasonal.squash_daily -in in -out '_normalize_squashed' | |
| mav.normalize -over :3d: -in '_normalize_squashed' -out out | |
| remove _normalize_squashed | |
} | |
export sub normalize_daily_by(in, out, by) { | |
seasonal.squash_daily_by -in in -out '_normalize_squashed' -by by | |
| mav.normalize_by -over :3d: -in '_normalize_squashed' -out out -by by | |
| remove _normalize_squashed | |
} | |
export sub normalize_forecast_daily(in, out) { | |
seasonal.squash_daily -in in -out '_normalize_squashed' | |
| forecast.forecast_err -in '_normalize_squashed' -out '_normalize_err' | |
| mav.normalize -over :3d: -in '_normalize_err' -out out | |
| remove _normalize_squashed, _normalize_err | |
} | |
export sub normalize_forecast_daily_by(in, out, by) { | |
seasonal.squash_daily_by -in in -out '_normalize_squashed' -by by | |
| forecast.forecast_err_by -in '_normalize_squashed' -out '_normalize_err' -by by | |
| mav.normalize_by -over :3d: -in '_normalize_err' -out out -by by | |
| remove _normalize_squashed, _normalize_err | |
} | |
export sub outlier_daily(in, out, after, sigma) { | |
normalize_daily -in in -out out | |
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0 | |
} | |
export sub outlier_daily_by(in, out, after, sigma, by) { | |
normalize_daily_by -in in -out out -by by | |
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0 | |
} | |
export sub outlier_forecast_daily(in, out, after, sigma) { | |
normalize_forecast_daily -in in -out out | |
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0 | |
} | |
export sub outlier_forecast_daily_by(in, out, after, sigma, by) { | |
normalize_forecast_daily_by -in in -out out -by by | |
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0 | |
} | |
export sub normalize_weekly(in, out) { | |
seasonal.squash_weekly -in in -out '_normalize_squashed' | |
| mav.normalize -over :3w: -in '_normalize_squashed' -out out | |
| remove _normalize_squashed | |
} | |
export sub normalize_forecast_weekly(in, out) { | |
seasonal.squash_weekly -in in -out '_normalize_squashed' | |
| forecast.forecast_err -in '_normalize_squashed' -out '_normalize_err' | |
| mav.normalize -over :3w: -in '_normalize_err' -out out | |
| remove _normalize_squashed, _normalize_err | |
} | |
export sub normalize_weekly_by(in, out, by) { | |
seasonal.squash_weekly_by -in in -out '_normalize_squashed' -by by | |
| mav.normalize_by -over :3w: -in '_normalize_squashed' -out out -by by | |
| remove _normalize_squashed | |
} | |
export sub outlier_weekly(in, out, after, sigma) { | |
normalize_weekly -in in -out out | |
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0 | |
} | |
export sub outlier_weekly_by(in, out, after, sigma, by) { | |
normalize_weekly_by -in in -out out -by by | |
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0 | |
} | |
// | |
// convert the output points of, eg, outlier_daily into an event | |
// stream to be overlayed on a timechart having the specified title. a | |
// nonzero input value indicates an event condition, and the maximum | |
// nonzero input value/time in each interval will be selected for reporting. | |
// successive nonzero intervals will be suppressed as belonging to the same | |
// inital event. | |
// | |
export sub to_events(in, every, after, title) { | |
( | |
// mark the end of training window | |
put _training_data = (time < after) | |
| put _end_training = (!_training_data && stdj.previous(_training_data, true)) | |
| filter _end_training == true | |
| remove _training_data, _end_training, in | |
| put text = "End of training window" | |
| @events -on title; | |
// mark non-contiguous outlier events | |
batch -every every | percentile -p 1.0 in | unbatch // select max over each interval | |
| put _outlier_event = (*in > 0) | |
| put _deduped_event = _outlier_event && !stdj.previous(_outlier_event, false) | |
| filter _deduped_event == true | |
| remove _outlier_event, _deduped_event | |
| put text = "Outlier ("+math.roundStr(*in, 1)+"-sigma)", type = "fa-exclamation-triangle", label=in | |
| @events -on title; | |
merge; | |
) | |
} | |
export sub to_events_by(in, every, after, by, title) { | |
( | |
// mark the end of training window | |
put _training_data = (time < after) | |
| put _end_training = (!_training_data && stdj.previous(_training_data, true)) | |
| filter _end_training == true | |
| remove _training_data, _end_training, in | |
| put text = "End of training window" | |
| @events -on title; | |
// mark non-contiguous outlier events | |
batch -every every | percentile -p 1.0 in by by | unbatch // select max over each interval | |
| put _outlier_event = (*in > 0) | |
| put _deduped_event = _outlier_event && !stdj.previous(_outlier_event, false) by by | |
| filter _deduped_event == true | |
| remove _outlier_event, _deduped_event | |
| put text = "Outlier ("+math.roundStr(*in, 1)+"-sigma)", type = "fa-exclamation-triangle", label=*by | |
| @events -on title; | |
merge; | |
) | |
} | |
export sub outlier_chart_daily(in, every, after, sigma, title) { | |
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers'; | |
// normalize each input series, and flag outliers n-sigmas from the mean. | |
outlier_daily -in in -out 'sigma' -after after -sigma sigma | |
| filter time > after | |
| ts.split_dual_chart -title sig_title -secondary 'sigma' | |
// generate an event stream from the outliers just computed, and overlay it on the timechart | |
| to_events -in 'sigma' -every every -after after -title sig_title | |
| stdj.end | |
} | |
export sub outlier_chart_daily_by(in, every, after, sigma, by, title) { | |
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers'; | |
// normalize each input series, and flag outliers n-sigmas from the mean. | |
outlier_daily_by -in in -out in -after after -sigma sigma -by by | |
| filter time > after | |
| ts.chart_by -by by -title sig_title // split makes this difficult to combine with the originals | |
// generate an event stream from the outliers just computed, and overlay it on the timechart | |
| to_events_by -in in -every every -after after -by by -title title | |
| stdj.end | |
} | |
export sub outlier_chart_forecast_daily(in, every, after, sigma, title) { | |
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers'; | |
// normalize each input series, and flag outliers n-sigmas from the mean. | |
outlier_forecast_daily -in in -out 'sigma' -after after -sigma sigma | |
| filter time > after | |
| ts.split_dual_chart -title sig_title -secondary 'sigma' | |
// generate an event stream from the outliers just computed, and overlay it on the timechart | |
| to_events -in 'sigma' -every every -after after -title sig_title | |
| stdj.end | |
} | |
export sub outlier_chart_forecast_daily_by(in, every, after, sigma, title,by) { | |
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers'; | |
// normalize each input series, and flag outliers n-sigmas from the mean. | |
outlier_forecast_daily_by -in in -out 'sigma' -after after -sigma sigma -by by | |
| filter time > after | |
| ts.split_dual_chart_by -title sig_title -secondary 'sigma' -by by | |
// generate an event stream from the outliers just computed, and overlay it on the timechart | |
| to_events_by -in 'sigma' -every every -after after -title sig_title -by by | |
| stdj.end | |
} | |
export sub outlier_chart_weekly(in, every, after, sigma, title) { | |
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers'; | |
// normalize each input series, and flag outliers n-sigmas from the mean. | |
outlier_weekly -in in -out 'sigma' -after after -sigma sigma | |
| filter time > after | |
| ts.split_dual_chart -title sig_title -secondary 'sigma' | |
// generate an event stream from the outliers just computed, and overlay it on the timechart | |
| to_events -in 'sigma' -every every -after after -title sig_title | |
| stdj.end | |
} | |
export sub charts_weekly_by(in, every, after, sigma, by, title) { | |
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers'; | |
// normalize each input series, and flag outliers n-sigmas from the mean. | |
outlier_weekly_by -in in -out in -after after -sigma sigma -by by | |
| filter time > after | |
| ts.chart_by -by by -title sig_title // split makes this difficult to combine with the originals | |
// generate an event stream from the outliers just computed, and overlay it on the timechart | |
| to_events_by -in in -every every -after after -by by -title title | |
| stdj.end | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment