Charles Parker charleslparker

Services and Algorithms

This section describes each of the services compared in the throwdown and the algorithms/models used.

Decision trees, both single and bagged.

	import os
	import sys

	URL = "https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip"


	def run_gpu_test(use_cuda):
	os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

	if not use_cuda:

	import java.io.File;
	import java.io.FileInputStream;
	import java.io.FileOutputStream;
	import java.io.IOException;
	import java.io.ObjectInputStream;
	import java.io.ObjectOutputStream;
	import java.io.Serializable;

	import org.junit.Test;


	{
	"name": "Custom feature analyzer",
	"description": "Find the best features for modeling using a greedy algorithm",
	"kind": "script",
	"source_code": "analyze-features.whizzml",
	"inputs": [
	{
	"name": "dataset-id",
	"type": "dataset-id",

	;; Get feature names given ids
	(define (feature-names dataset-id ids)
	(let (fields (get (fetch dataset-id) "fields"))
	(map (lambda (id) (get-in fields [id "name"])) ids)))

	(define (create-k-folds dataset-id k-folds)
	(let (k-fold-fn (lambda (x) (create-dataset
	{"origin_dataset" dataset-id
	"row_offset" x
	"row_step" k-folds

	;; This is a vanilla implementation of gradient boosting. The main
	;; function is at the bottom of the script, where it explains the
	;; algorithm in some detail.

	;; A constant added to the generated field names to let us know that
	;; we generated them
	(define boost-id "__bmlboost")

	;; The names of the fields contain ground truth - if there are k
	;; classes, this is k coluns, one for each class. If the true class

	Training dataset: https://bigml.com/shared/dataset/ymIL3HLZnUZCOhVCVEOlVNIpmQE
	Test dataset: https://bigml.com/shared/dataset/rx2sKQFQgGuYbbtPfeI309xizBx

	#!/bin/bash

	# Set credentials for BigML and for the US Census
	BIGML_USERNAME=bigml
	BIGML_API_KEY=****
	CENSUS_API_KEY=****
	BIGML_AUTH="username=$BIGML_USERNAME;api_key=$BIGML_API_KEY"

	# Download demographic data into files. Have to do it this way
	# because the census API doesn't seem to allow returns of more