Created
July 26, 2016 19:08
-
-
Save llSourcell/7b8777ba70d9e0d51c214a7f8dc7c4cc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Remove the top n anomalies from a dataset | |
;; Given an anomaly resource, get the list of its top_anomaly row numbers | |
(define (anomalous-rows a) | |
(map (lambda (x) (x "row_number")) (a ["model" "top_anomalies"]))) | |
;; Given a list of row numbers, generate a flatline expression | |
;; that discards those rows. | |
(define (row-filter rows) | |
(let (eqs (map (lambda (n) (flatline "(= (row-number) {n})")) rows)) | |
(flatline "(not (or @{eqs}))"))) | |
;; Given a dataset and a number of anomalies, generate a new one | |
;; that removes from the original the anomalous rows. | |
(define (normalize-dataset dataset-id n) | |
(let (a-id (create-and-wait-anomaly {"dataset" dataset-id "top_n" n}) | |
anomaly (fetch a-id {"exclude" "trees,fields"}) | |
rows (anomalous-rows anomaly) | |
filter (row-filter rows)) | |
(delete a-id) | |
(create-and-wait-dataset {"origin_dataset" dataset-id | |
"lisp_filter" filter}))) | |
;; Script with parameters dataset-id and n | |
(define normalized-dataset (normalize-dataset dataset-id n)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment