Created
October 9, 2016 17:02
-
-
Save aficionado/7582bb59bc7050a348f4ae32798840a9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "Tree optimization", | |
"description": "Script for tree optimization using SMACdown", | |
"kind": "script", | |
"source_code": "script.whizzml", | |
"inputs":[ | |
{ | |
"name": "dataset-id", | |
"type": "dataset-id", | |
"description": "Dataset for which we are seeking an optimal tree" | |
}, | |
{ | |
"name": "objective-id", | |
"type": "string", | |
"default": "default", | |
"description": "The tree's objective field, or 'default' to use the dataset's default" | |
}, | |
{ | |
"name": "metric", | |
"type": "string", | |
"default": "average_phi", | |
"description": "Evaluation metric that we want to optimize: one of average_recall, average_phi, accuracy, average_precision, or average_f_measure." | |
}, | |
{ | |
"name": "delete-resources", | |
"type": "boolean", | |
"default": false, | |
"description": "Whether to delete all intermediate resources" | |
} | |
], | |
"outputs":[ | |
{ | |
"name": "result", | |
"type": "list" | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Here's a custom generator for creating BigML trees. | |
(define (smacdown-model-params-generator objective-type) | |
(lambda () | |
(let (max-trees 127 | |
max-nodes 1999 | |
regression (= "numeric" objective-type)) | |
{"stat_pruning" (if (< (rand) 0.5) false true) | |
"balance_objective" (if (or regression (< (rand) 0.5)) false true) | |
"node_threshold" (round (rand-range 4 max-nodes))}))) | |
;; This function takes a training and test set (and an objective field | |
;; id) and evaluates a set of parameters by training a model with | |
;; those parameters and performing an evaluation on them. We decide | |
;; that phi is the metric we'd like to opimize, so we pull 1 - phi out | |
;; of each evaluation to return as the objective, as the algorithm | |
;; seeks to *minimize* a value and we want to *maximize* phi. | |
(define (smacdown-evaluator train test obj metric name) | |
(lambda (params itr) | |
(log-info "Evaluating " (count params) " candidates...") | |
(let (train-params {"dataset" train | |
"objective_field" obj | |
"seed" "SMACdown" | |
"name" (str name " smacdown itr " itr " test model")} | |
mod-fn (lambda (p) (merge p train-params)) | |
eval-fn (lambda (m) {"model" m "dataset" test}) | |
mod-ids (create* "model" (map mod-fn params)) | |
eval-ids (create* "evaluation" (map eval-fn mod-ids)) | |
phi (lambda (ev) | |
(let (metric-value (ev ["result" "model" metric] false)) | |
(if (not (number? metric-value)) | |
(raise {"message" (str metric " is not a valid metric!") | |
"code" -30}) | |
(- 1 metric-value))))) | |
(log-info "Evaluation complete.") | |
(map (lambda (eid) (phi (fetch (wait eid)))) eval-ids)))) | |
;; Find optimal parameters using SMACdown | |
(define (find-optimal-parameters train-params objective-id objective-type) | |
(let (test-params (assoc train-params "out_of_bag" true) | |
train-id (create-dataset train-params) | |
test-id (create-dataset test-params) | |
_ (wait* [train-id test-id]) | |
eval-fn (smacdown-evaluator train-id | |
test-id | |
objective-id | |
metric | |
"smacdown-model") | |
generator (smacdown-model-params-generator objective-type) | |
output (smacdown-optimize generator eval-fn "smacdown-model")) | |
(for (p output) | |
(assoc (dissoc p smacdown--actual) | |
metric (- 1 (p smacdown--actual)))))) | |
;; Delete resources ignoring errors | |
(define (safe-delete id) | |
(try (delete id) | |
(catch e (log-info (str "Error deleting resource " id " ignored"))))) | |
;; Take a dataset, create a training and test set, and find the | |
;; optimal parameters. The function returns a list of parameters | |
;; ranked by objective. | |
(define (optimize-model dataset-id objective-id metric) | |
(let (train-params {"origin_dataset" dataset-id | |
"sample_rate" 0.8 | |
"replacement" false | |
"seed" "SMACdown"} | |
test-params (assoc train-params "out_of_bag" true) | |
obj-id (if (= objective-id "default") | |
(dataset-get-objective-id dataset-id) | |
objective-id) | |
otype (or ((fetch dataset-id) ["fields" obj-id "optype"] false) | |
(raise {"message" (str "Invalid objective field")})) | |
params (find-optimal-parameters train-params obj-id otype) | |
_ (log-info "SMACdown search complete") | |
_ (when delete-resources | |
(log-info "Deleting intermediate resources...") | |
(map safe-delete (created-resources))) | |
_ (log-info "Training model on full dataset...") | |
mod-prms (merge ((head params) "parameters" {}) | |
{"objective_field" obj-id "seed" "SMACdown"}) | |
full-mod (create-model dataset-id mod-prms) | |
train-id (create-dataset train-params) | |
test-id (create-dataset test-params) | |
best-mod (create-model train-id mod-prms) | |
best-eval (create-evaluation best-mod test-id)) | |
(wait* [best-eval full-mod]) | |
(cons (assoc (head params) | |
"full_model" full-mod | |
"model" best-mod | |
"evaluation" best-eval) | |
(tail params)))) | |
(define result (optimize-model dataset-id objective-id metric)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment