Last active
January 5, 2019 05:25
-
-
Save jaor/e54322cbd9750a5f408d58486d03d392 to your computer and use it in GitHub Desktop.
Incremental anomaly detection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "Incremental anomalies", | |
"kind": "script", | |
"description": "Adds a new dataset to a collection and computes a set of anomalies for the new, extended data", | |
"source_code": "script.whizzml", | |
"imports":[ | |
], | |
"inputs":[ | |
{ | |
"name": "url", | |
"type": "string", | |
"description": "URL of the new data to fetch (e.g., an s3:// or http:// URL)" | |
}, | |
{ | |
"name": "max-datasets", | |
"type": "number", | |
"description": "maximum number of datasets to use", | |
"default": 10 | |
}, | |
{ | |
"name": "tag", | |
"type": "string", | |
"description": "tag used to identify previous datasets to merge", | |
"default": "ongoing-anomalies" | |
}, | |
{ | |
"name": "anomalies", | |
"type": "number", | |
"description": "number of anomalies to compute", | |
"default": 10 | |
}], | |
"outputs":[ | |
{ | |
"name": "anomaly", | |
"type": "anomaly-id", | |
"description": "ID of the final anomaly detector" | |
}, | |
{ | |
"name": "top-anomalies", | |
"type": "list", | |
"description": "A list of the top anomalies in the created detector" | |
}] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Helper function: Retrieves the last n datasets with the given tag. | |
;; We're assuming n is less than 20: a more robust version would need | |
;; to paginate if we want to use more than that. | |
(define (get-datasets tag n) | |
(resource-ids (list-datasets {"limit" n "tags__in" tag}))) | |
;; Helper function: Creates a source and dataset from the remote data | |
;; fetched via the given url | |
(define (create-new-dataset url tag) | |
(let (src (create-source {"remote" url "tags" [tag]})) | |
(create-dataset src {"tags" [tag]}))) | |
;; Creates an anomaly with n top anomalies, combining up to | |
;; max-datasets, including one created from the given remote data via | |
;; url. Applicable datasets are identified by tag. | |
(define (compute-anomalies url tag n max-datasets) | |
(let (new-ds (create-new-dataset url tag) | |
dss (get-datasets tag max-datasets)) | |
;; sanity check: our list must start with the dataset just created | |
(or (= (head dss) new-ds) | |
(raise (str "Unexpected head of dataset list:" | |
(head dss) " != " new-ds))) | |
;; all that is left is creating an anomaly detector with the | |
;; dataset collection | |
(wait (create-anomaly {"datasets" dss "tags" [tag] "top_n" n})))) | |
(define anomaly (compute-anomalies url tag anomalies max-datasets)) | |
(define top-anomalies ((fetch anomaly) ["model" "top_anomalies"] [])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment