This section describes each of the services compared in the throwdown and the algorithms/models used.
Decision trees, both single and bagged.
import os | |
import sys | |
URL = "https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip" | |
def run_gpu_test(use_cuda): | |
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" | |
if not use_cuda: |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.ObjectInputStream; | |
import java.io.ObjectOutputStream; | |
import java.io.Serializable; | |
import org.junit.Test; |
Training dataset: https://bigml.com/shared/dataset/ymIL3HLZnUZCOhVCVEOlVNIpmQE | |
Test dataset: https://bigml.com/shared/dataset/rx2sKQFQgGuYbbtPfeI309xizBx |
{ | |
"name": "Custom feature analyzer", | |
"description": "Find the best features for modeling using a greedy algorithm", | |
"kind": "script", | |
"source_code": "analyze-features.whizzml", | |
"inputs": [ | |
{ | |
"name": "dataset-id", | |
"type": "dataset-id", |
;; Get feature names given ids | |
(define (feature-names dataset-id ids) | |
(let (fields (get (fetch dataset-id) "fields")) | |
(map (lambda (id) (get-in fields [id "name"])) ids))) | |
(define (create-k-folds dataset-id k-folds) | |
(let (k-fold-fn (lambda (x) (create-dataset | |
{"origin_dataset" dataset-id | |
"row_offset" x | |
"row_step" k-folds |
;; This is a vanilla implementation of gradient boosting. The main | |
;; function is at the bottom of the script, where it explains the | |
;; algorithm in some detail. | |
;; A constant added to the generated field names to let us know that | |
;; we generated them | |
(define boost-id "__bmlboost") | |
;; The names of the fields contain ground truth - if there are k | |
;; classes, this is k coluns, one for each class. If the true class |
#!/bin/bash | |
# Set credentials for BigML and for the US Census | |
BIGML_USERNAME=bigml | |
BIGML_API_KEY=**** | |
CENSUS_API_KEY=**** | |
BIGML_AUTH="username=$BIGML_USERNAME;api_key=$BIGML_API_KEY" | |
# Download demographic data into files. Have to do it this way | |
# because the census API doesn't seem to allow returns of more |