Last active
July 16, 2017 02:22
-
-
Save northface/732ea069523e01987df0fe4a262a07e6 to your computer and use it in GitHub Desktop.
GBM Example by H2O.ai
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "GBM Example\n", | |
| "http://h2o-release.s3.amazonaws.com/h2o/rel-vajda/3/docs-website/h2o-docs/starting-h2o.html#from-python" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import h2o" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Checking whether there is an H2O instance running at http://localhost:54321..... not found.\n", | |
| "Attempting to start a local H2O server...\n", | |
| " Java Version: openjdk version \"1.8.0_121\"; OpenJDK Runtime Environment (Zulu 8.20.0.5-macosx) (build 1.8.0_121-b15); OpenJDK 64-Bit Server VM (Zulu 8.20.0.5-macosx) (build 25.121-b15, mixed mode)\n", | |
| " Starting server from /Users/northface/anaconda/envs/py2/lib/python2.7/site-packages/h2o/backend/bin/h2o.jar\n", | |
| " Ice root: /var/folders/p1/j6jk6yyx48710zss8fkcht580000gn/T/tmptcb7fX\n", | |
| " JVM stdout: /var/folders/p1/j6jk6yyx48710zss8fkcht580000gn/T/tmptcb7fX/h2o_northface_started_from_python.out\n", | |
| " JVM stderr: /var/folders/p1/j6jk6yyx48710zss8fkcht580000gn/T/tmptcb7fX/h2o_northface_started_from_python.err\n", | |
| " Server is running at http://127.0.0.1:54321\n", | |
| "Connecting to H2O server at http://127.0.0.1:54321... successful.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O cluster uptime:</td>\n", | |
| "<td>03 secs</td></tr>\n", | |
| "<tr><td>H2O cluster version:</td>\n", | |
| "<td>3.10.4.8</td></tr>\n", | |
| "<tr><td>H2O cluster version age:</td>\n", | |
| "<td>1 month and 25 days </td></tr>\n", | |
| "<tr><td>H2O cluster name:</td>\n", | |
| "<td>H2O_from_python_northface_xfu3tb</td></tr>\n", | |
| "<tr><td>H2O cluster total nodes:</td>\n", | |
| "<td>1</td></tr>\n", | |
| "<tr><td>H2O cluster free memory:</td>\n", | |
| "<td>3.556 Gb</td></tr>\n", | |
| "<tr><td>H2O cluster total cores:</td>\n", | |
| "<td>4</td></tr>\n", | |
| "<tr><td>H2O cluster allowed cores:</td>\n", | |
| "<td>4</td></tr>\n", | |
| "<tr><td>H2O cluster status:</td>\n", | |
| "<td>accepting new members, healthy</td></tr>\n", | |
| "<tr><td>H2O connection url:</td>\n", | |
| "<td>http://127.0.0.1:54321</td></tr>\n", | |
| "<tr><td>H2O connection proxy:</td>\n", | |
| "<td>None</td></tr>\n", | |
| "<tr><td>H2O internal security:</td>\n", | |
| "<td>False</td></tr>\n", | |
| "<tr><td>Python version:</td>\n", | |
| "<td>2.7.13 final</td></tr></table></div>" | |
| ], | |
| "text/plain": [ | |
| "-------------------------- --------------------------------\n", | |
| "H2O cluster uptime: 03 secs\n", | |
| "H2O cluster version: 3.10.4.8\n", | |
| "H2O cluster version age: 1 month and 25 days\n", | |
| "H2O cluster name: H2O_from_python_northface_xfu3tb\n", | |
| "H2O cluster total nodes: 1\n", | |
| "H2O cluster free memory: 3.556 Gb\n", | |
| "H2O cluster total cores: 4\n", | |
| "H2O cluster allowed cores: 4\n", | |
| "H2O cluster status: accepting new members, healthy\n", | |
| "H2O connection url: http://127.0.0.1:54321\n", | |
| "H2O connection proxy:\n", | |
| "H2O internal security: False\n", | |
| "Python version: 2.7.13 final\n", | |
| "-------------------------- --------------------------------" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "from h2o.estimators.gbm import H2OGradientBoostingEstimator\n", | |
| "h2o.init()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Parse progress: |█████████████████████████████████████████████████████████| 100%\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Import the Airlines dataset\n", | |
| "# This will be used to classify whether a flight is delayed\n", | |
| "airlines = h2o.import_file(\"https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| "<tr><th style=\"text-align: right;\"> Year</th><th style=\"text-align: right;\"> Month</th><th style=\"text-align: right;\"> DayofMonth</th><th style=\"text-align: right;\"> DayOfWeek</th><th style=\"text-align: right;\"> DepTime</th><th style=\"text-align: right;\"> CRSDepTime</th><th style=\"text-align: right;\"> ArrTime</th><th style=\"text-align: right;\"> CRSArrTime</th><th>UniqueCarrier </th><th style=\"text-align: right;\"> FlightNum</th><th>TailNum </th><th style=\"text-align: right;\"> ActualElapsedTime</th><th style=\"text-align: right;\"> CRSElapsedTime</th><th style=\"text-align: right;\"> AirTime</th><th style=\"text-align: right;\"> ArrDelay</th><th style=\"text-align: right;\"> DepDelay</th><th>Origin </th><th>Dest </th><th style=\"text-align: right;\"> Distance</th><th style=\"text-align: right;\"> TaxiIn</th><th style=\"text-align: right;\"> TaxiOut</th><th style=\"text-align: right;\"> Cancelled</th><th>CancellationCode </th><th style=\"text-align: right;\"> Diverted</th><th style=\"text-align: right;\"> CarrierDelay</th><th style=\"text-align: right;\"> WeatherDelay</th><th style=\"text-align: right;\"> NASDelay</th><th style=\"text-align: right;\"> SecurityDelay</th><th style=\"text-align: right;\"> LateAircraftDelay</th><th>IsArrDelayed </th><th>IsDepDelayed </th></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 14</td><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> 741</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 912</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 91</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 23</td><td style=\"text-align: right;\"> 11</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 15</td><td style=\"text-align: right;\"> 4</td><td style=\"text-align: right;\"> 729</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 903</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 94</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 14</td><td style=\"text-align: right;\"> -1</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>NO </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 17</td><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 741</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 918</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 97</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 29</td><td style=\"text-align: right;\"> 11</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 18</td><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> 729</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 847</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 78</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> -2</td><td style=\"text-align: right;\"> -1</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>NO </td><td>NO </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 19</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 749</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 922</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 93</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 33</td><td style=\"text-align: right;\"> 19</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 21</td><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> 728</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 848</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 80</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> -1</td><td style=\"text-align: right;\"> -2</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>NO </td><td>NO </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 22</td><td style=\"text-align: right;\"> 4</td><td style=\"text-align: right;\"> 728</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 852</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 84</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> -2</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>NO </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 23</td><td style=\"text-align: right;\"> 5</td><td style=\"text-align: right;\"> 731</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 902</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 91</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 13</td><td style=\"text-align: right;\"> 1</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 24</td><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 744</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 908</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 84</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 19</td><td style=\"text-align: right;\"> 14</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
| "<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 25</td><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> 729</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 851</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 82</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 2</td><td style=\"text-align: right;\"> -1</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>NO </td></tr>\n", | |
| "</table>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "airlines.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[u'Year',\n", | |
| " u'Month',\n", | |
| " u'DayofMonth',\n", | |
| " u'DayOfWeek',\n", | |
| " u'DepTime',\n", | |
| " u'CRSDepTime',\n", | |
| " u'ArrTime',\n", | |
| " u'CRSArrTime',\n", | |
| " u'UniqueCarrier',\n", | |
| " u'FlightNum',\n", | |
| " u'TailNum',\n", | |
| " u'ActualElapsedTime',\n", | |
| " u'CRSElapsedTime',\n", | |
| " u'AirTime',\n", | |
| " u'ArrDelay',\n", | |
| " u'DepDelay',\n", | |
| " u'Origin',\n", | |
| " u'Dest',\n", | |
| " u'Distance',\n", | |
| " u'TaxiIn',\n", | |
| " u'TaxiOut',\n", | |
| " u'Cancelled',\n", | |
| " u'CancellationCode',\n", | |
| " u'Diverted',\n", | |
| " u'CarrierDelay',\n", | |
| " u'WeatherDelay',\n", | |
| " u'NASDelay',\n", | |
| " u'SecurityDelay',\n", | |
| " u'LateAircraftDelay',\n", | |
| " u'IsArrDelayed',\n", | |
| " u'IsDepDelayed']" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "airlines.columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# convert columns to factors\n", | |
| "airlines[\"Year\"] = airlines[\"Year\"].asfactor()\n", | |
| "airlines[\"Month\"] = airlines[\"Month\"].asfactor()\n", | |
| "airlines[\"DayOfWeek\"] = airlines[\"DayOfWeek\"].asfactor()\n", | |
| "airlines[\"Cancelled\"] = airlines[\"Cancelled\"].asfactor()\n", | |
| "airlines[\"FlightNum\"] = airlines[\"FlightNum\"].asfactor()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# set the predictor names and the response column name\n", | |
| "predictors = [\"Origin\",\"Dest\",\"Year\",\"Month\",\"UniqueCarrier\",\"DayOfWeek\",\"Distance\",\"FlightNum\"]\n", | |
| "response = \"IsDepDelayed\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# split into train and validation sets\n", | |
| "train, valid = airlines.split_frame(ratios= [.8], seed = 1234)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# the number of factor levels range from 2 to 2439\n", | |
| "bin_num = [8,16,32,64,128,256,512,1024,2048,4096]\n", | |
| "label = [\"8\",\"16\",\"32\",\"64\",\"128\",\"256\",\"512\",\"1024\",\"2048\",\"4096\"]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
| "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# train the models\n", | |
| "for key, num in enumerate(bin_num):\n", | |
| " #initialize the GBM estimator and set a seed for reproducibility\n", | |
| " airlines_gbm = H2OGradientBoostingEstimator(nbins_cats = num, seed = 1234)\n", | |
| " airlines_gbm.train(x = predictors, y = response, training_frame = train, validation_frame = valid)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "('4096', 'training score', 0.859260141447694)\n", | |
| "('4096', 'validation score', 0.7308873468586968)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# print AUC\n", | |
| "print(label[key], \"training score\", airlines_gbm.auc(train = True))\n", | |
| "print(label[key], \"validation score\", airlines_gbm.auc(valid = True))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.13" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment