This section describes each of the services compared in the throwdown and the algorithms/models used.
Decision trees, both single and bagged.
| import os | |
| import sys | |
| URL = "https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip" | |
| def run_gpu_test(use_cuda): | |
| os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" | |
| if not use_cuda: |
| import java.io.File; | |
| import java.io.FileInputStream; | |
| import java.io.FileOutputStream; | |
| import java.io.IOException; | |
| import java.io.ObjectInputStream; | |
| import java.io.ObjectOutputStream; | |
| import java.io.Serializable; | |
| import org.junit.Test; |
| Training dataset: https://bigml.com/shared/dataset/ymIL3HLZnUZCOhVCVEOlVNIpmQE | |
| Test dataset: https://bigml.com/shared/dataset/rx2sKQFQgGuYbbtPfeI309xizBx |
| { | |
| "name": "Custom feature analyzer", | |
| "description": "Find the best features for modeling using a greedy algorithm", | |
| "kind": "script", | |
| "source_code": "analyze-features.whizzml", | |
| "inputs": [ | |
| { | |
| "name": "dataset-id", | |
| "type": "dataset-id", |
| ;; Get feature names given ids | |
| (define (feature-names dataset-id ids) | |
| (let (fields (get (fetch dataset-id) "fields")) | |
| (map (lambda (id) (get-in fields [id "name"])) ids))) | |
| (define (create-k-folds dataset-id k-folds) | |
| (let (k-fold-fn (lambda (x) (create-dataset | |
| {"origin_dataset" dataset-id | |
| "row_offset" x | |
| "row_step" k-folds |
| ;; This is a vanilla implementation of gradient boosting. The main | |
| ;; function is at the bottom of the script, where it explains the | |
| ;; algorithm in some detail. | |
| ;; A constant added to the generated field names to let us know that | |
| ;; we generated them | |
| (define boost-id "__bmlboost") | |
| ;; The names of the fields contain ground truth - if there are k | |
| ;; classes, this is k coluns, one for each class. If the true class |
| #!/bin/bash | |
| # Set credentials for BigML and for the US Census | |
| BIGML_USERNAME=bigml | |
| BIGML_API_KEY=**** | |
| CENSUS_API_KEY=**** | |
| BIGML_AUTH="username=$BIGML_USERNAME;api_key=$BIGML_API_KEY" | |
| # Download demographic data into files. Have to do it this way | |
| # because the census API doesn't seem to allow returns of more |