-
-
Save osroca/38adeeedd3adc19dbd29385c900f6bbd to your computer and use it in GitHub Desktop.
Whizzml with bad json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "Normalize Dataset", | |
"description": "Remove the top n anomalies from a dataset", | |
"inputs": [ | |
{"name": "dataset-id", "type": "dataset-id", "description": "Dataset Id"}, | |
{"name": "top-n", "type": "number", "description": "Top N Anomalies to Remove", "default": 3} | |
], | |
"outputs": [ | |
{"name": "normalized-dataset", "type": "dataset-id", "description": "Normalized Dataset"} | |
], | |
"total_resources_created": 1, | |
"resource_to_apply": "dataset" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Remove the top n anomalies from a dataset | |
;; given an anomaly resource, get the list of its top_anomaly row numbers | |
(define (anomalous-rows a) | |
(map (lambda (x) (get x "row_number")) | |
(get-in a ["model" "top_anomalies"]))) | |
;; given a list of row numbers, generate a flatline expression | |
;; that discards those rows. | |
(define (row-filter rows) | |
(let (eqs (map (lambda (n) (flatline "(= (row-number) {n})")) rows)) | |
(flatline "(not (or @{eqs}))"))) | |
;; given a dataset and a number of anomalies, generate a new one | |
;; that removes from the original the anomalous rows. | |
(define (normalize-dataset dataset-id n) | |
(let (a-id (create-and-wait-anomaly {"dataset" dataset-id "top_n" n}) | |
anomaly (fetch a-id {"exclude" "trees,fields"}) | |
rows (anomalous-rows anomaly) | |
filter (row-filter rows)) | |
(log-info "Deleting rows " rows) | |
(log-info "Using filter " filter) | |
(delete a-id) ;; or we could keep it | |
(create-and-wait-dataset {"origin_dataset" dataset-id | |
"lisp_filter" filter}))) | |
;; script with parameters dataset-id and top-n | |
(define normalized-dataset (normalize-dataset dataset-id top-n)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment