Last active
July 16, 2017 02:22
-
-
Save northface/732ea069523e01987df0fe4a262a07e6 to your computer and use it in GitHub Desktop.
GBM Example by H2O.ai
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"GBM Example\n", | |
"http://h2o-release.s3.amazonaws.com/h2o/rel-vajda/3/docs-website/h2o-docs/starting-h2o.html#from-python" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import h2o" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Checking whether there is an H2O instance running at http://localhost:54321..... not found.\n", | |
"Attempting to start a local H2O server...\n", | |
" Java Version: openjdk version \"1.8.0_121\"; OpenJDK Runtime Environment (Zulu 8.20.0.5-macosx) (build 1.8.0_121-b15); OpenJDK 64-Bit Server VM (Zulu 8.20.0.5-macosx) (build 25.121-b15, mixed mode)\n", | |
" Starting server from /Users/northface/anaconda/envs/py2/lib/python2.7/site-packages/h2o/backend/bin/h2o.jar\n", | |
" Ice root: /var/folders/p1/j6jk6yyx48710zss8fkcht580000gn/T/tmptcb7fX\n", | |
" JVM stdout: /var/folders/p1/j6jk6yyx48710zss8fkcht580000gn/T/tmptcb7fX/h2o_northface_started_from_python.out\n", | |
" JVM stderr: /var/folders/p1/j6jk6yyx48710zss8fkcht580000gn/T/tmptcb7fX/h2o_northface_started_from_python.err\n", | |
" Server is running at http://127.0.0.1:54321\n", | |
"Connecting to H2O server at http://127.0.0.1:54321... successful.\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O cluster uptime:</td>\n", | |
"<td>03 secs</td></tr>\n", | |
"<tr><td>H2O cluster version:</td>\n", | |
"<td>3.10.4.8</td></tr>\n", | |
"<tr><td>H2O cluster version age:</td>\n", | |
"<td>1 month and 25 days </td></tr>\n", | |
"<tr><td>H2O cluster name:</td>\n", | |
"<td>H2O_from_python_northface_xfu3tb</td></tr>\n", | |
"<tr><td>H2O cluster total nodes:</td>\n", | |
"<td>1</td></tr>\n", | |
"<tr><td>H2O cluster free memory:</td>\n", | |
"<td>3.556 Gb</td></tr>\n", | |
"<tr><td>H2O cluster total cores:</td>\n", | |
"<td>4</td></tr>\n", | |
"<tr><td>H2O cluster allowed cores:</td>\n", | |
"<td>4</td></tr>\n", | |
"<tr><td>H2O cluster status:</td>\n", | |
"<td>accepting new members, healthy</td></tr>\n", | |
"<tr><td>H2O connection url:</td>\n", | |
"<td>http://127.0.0.1:54321</td></tr>\n", | |
"<tr><td>H2O connection proxy:</td>\n", | |
"<td>None</td></tr>\n", | |
"<tr><td>H2O internal security:</td>\n", | |
"<td>False</td></tr>\n", | |
"<tr><td>Python version:</td>\n", | |
"<td>2.7.13 final</td></tr></table></div>" | |
], | |
"text/plain": [ | |
"-------------------------- --------------------------------\n", | |
"H2O cluster uptime: 03 secs\n", | |
"H2O cluster version: 3.10.4.8\n", | |
"H2O cluster version age: 1 month and 25 days\n", | |
"H2O cluster name: H2O_from_python_northface_xfu3tb\n", | |
"H2O cluster total nodes: 1\n", | |
"H2O cluster free memory: 3.556 Gb\n", | |
"H2O cluster total cores: 4\n", | |
"H2O cluster allowed cores: 4\n", | |
"H2O cluster status: accepting new members, healthy\n", | |
"H2O connection url: http://127.0.0.1:54321\n", | |
"H2O connection proxy:\n", | |
"H2O internal security: False\n", | |
"Python version: 2.7.13 final\n", | |
"-------------------------- --------------------------------" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"from h2o.estimators.gbm import H2OGradientBoostingEstimator\n", | |
"h2o.init()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Parse progress: |█████████████████████████████████████████████████████████| 100%\n" | |
] | |
} | |
], | |
"source": [ | |
"# Import the Airlines dataset\n", | |
"# This will be used to classify whether a flight is delayed\n", | |
"airlines = h2o.import_file(\"https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table>\n", | |
"<tr><th style=\"text-align: right;\"> Year</th><th style=\"text-align: right;\"> Month</th><th style=\"text-align: right;\"> DayofMonth</th><th style=\"text-align: right;\"> DayOfWeek</th><th style=\"text-align: right;\"> DepTime</th><th style=\"text-align: right;\"> CRSDepTime</th><th style=\"text-align: right;\"> ArrTime</th><th style=\"text-align: right;\"> CRSArrTime</th><th>UniqueCarrier </th><th style=\"text-align: right;\"> FlightNum</th><th>TailNum </th><th style=\"text-align: right;\"> ActualElapsedTime</th><th style=\"text-align: right;\"> CRSElapsedTime</th><th style=\"text-align: right;\"> AirTime</th><th style=\"text-align: right;\"> ArrDelay</th><th style=\"text-align: right;\"> DepDelay</th><th>Origin </th><th>Dest </th><th style=\"text-align: right;\"> Distance</th><th style=\"text-align: right;\"> TaxiIn</th><th style=\"text-align: right;\"> TaxiOut</th><th style=\"text-align: right;\"> Cancelled</th><th>CancellationCode </th><th style=\"text-align: right;\"> Diverted</th><th style=\"text-align: right;\"> CarrierDelay</th><th style=\"text-align: right;\"> WeatherDelay</th><th style=\"text-align: right;\"> NASDelay</th><th style=\"text-align: right;\"> SecurityDelay</th><th style=\"text-align: right;\"> LateAircraftDelay</th><th>IsArrDelayed </th><th>IsDepDelayed </th></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 14</td><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> 741</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 912</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 91</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 23</td><td style=\"text-align: right;\"> 11</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 15</td><td style=\"text-align: right;\"> 4</td><td style=\"text-align: right;\"> 729</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 903</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 94</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 14</td><td style=\"text-align: right;\"> -1</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>NO </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 17</td><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 741</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 918</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 97</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 29</td><td style=\"text-align: right;\"> 11</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 18</td><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> 729</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 847</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 78</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> -2</td><td style=\"text-align: right;\"> -1</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>NO </td><td>NO </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 19</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 749</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 922</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 93</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 33</td><td style=\"text-align: right;\"> 19</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 21</td><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> 728</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 848</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 80</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> -1</td><td style=\"text-align: right;\"> -2</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>NO </td><td>NO </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 22</td><td style=\"text-align: right;\"> 4</td><td style=\"text-align: right;\"> 728</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 852</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 84</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> -2</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>NO </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 23</td><td style=\"text-align: right;\"> 5</td><td style=\"text-align: right;\"> 731</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 902</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 91</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 13</td><td style=\"text-align: right;\"> 1</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 24</td><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 744</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 908</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 84</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 19</td><td style=\"text-align: right;\"> 14</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>YES </td></tr>\n", | |
"<tr><td style=\"text-align: right;\"> 1987</td><td style=\"text-align: right;\"> 10</td><td style=\"text-align: right;\"> 25</td><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> 729</td><td style=\"text-align: right;\"> 730</td><td style=\"text-align: right;\"> 851</td><td style=\"text-align: right;\"> 849</td><td>PS </td><td style=\"text-align: right;\"> 1451</td><td>NA </td><td style=\"text-align: right;\"> 82</td><td style=\"text-align: right;\"> 79</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 2</td><td style=\"text-align: right;\"> -1</td><td>SAN </td><td>SFO </td><td style=\"text-align: right;\"> 447</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> 0</td><td>NA </td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td style=\"text-align: right;\"> nan</td><td>YES </td><td>NO </td></tr>\n", | |
"</table>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/plain": [] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"airlines.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[u'Year',\n", | |
" u'Month',\n", | |
" u'DayofMonth',\n", | |
" u'DayOfWeek',\n", | |
" u'DepTime',\n", | |
" u'CRSDepTime',\n", | |
" u'ArrTime',\n", | |
" u'CRSArrTime',\n", | |
" u'UniqueCarrier',\n", | |
" u'FlightNum',\n", | |
" u'TailNum',\n", | |
" u'ActualElapsedTime',\n", | |
" u'CRSElapsedTime',\n", | |
" u'AirTime',\n", | |
" u'ArrDelay',\n", | |
" u'DepDelay',\n", | |
" u'Origin',\n", | |
" u'Dest',\n", | |
" u'Distance',\n", | |
" u'TaxiIn',\n", | |
" u'TaxiOut',\n", | |
" u'Cancelled',\n", | |
" u'CancellationCode',\n", | |
" u'Diverted',\n", | |
" u'CarrierDelay',\n", | |
" u'WeatherDelay',\n", | |
" u'NASDelay',\n", | |
" u'SecurityDelay',\n", | |
" u'LateAircraftDelay',\n", | |
" u'IsArrDelayed',\n", | |
" u'IsDepDelayed']" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"airlines.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# convert columns to factors\n", | |
"airlines[\"Year\"] = airlines[\"Year\"].asfactor()\n", | |
"airlines[\"Month\"] = airlines[\"Month\"].asfactor()\n", | |
"airlines[\"DayOfWeek\"] = airlines[\"DayOfWeek\"].asfactor()\n", | |
"airlines[\"Cancelled\"] = airlines[\"Cancelled\"].asfactor()\n", | |
"airlines[\"FlightNum\"] = airlines[\"FlightNum\"].asfactor()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# set the predictor names and the response column name\n", | |
"predictors = [\"Origin\",\"Dest\",\"Year\",\"Month\",\"UniqueCarrier\",\"DayOfWeek\",\"Distance\",\"FlightNum\"]\n", | |
"response = \"IsDepDelayed\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# split into train and validation sets\n", | |
"train, valid = airlines.split_frame(ratios= [.8], seed = 1234)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# the number of factor levels range from 2 to 2439\n", | |
"bin_num = [8,16,32,64,128,256,512,1024,2048,4096]\n", | |
"label = [\"8\",\"16\",\"32\",\"64\",\"128\",\"256\",\"512\",\"1024\",\"2048\",\"4096\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", | |
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n" | |
] | |
} | |
], | |
"source": [ | |
"# train the models\n", | |
"for key, num in enumerate(bin_num):\n", | |
" #initialize the GBM estimator and set a seed for reproducibility\n", | |
" airlines_gbm = H2OGradientBoostingEstimator(nbins_cats = num, seed = 1234)\n", | |
" airlines_gbm.train(x = predictors, y = response, training_frame = train, validation_frame = valid)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"('4096', 'training score', 0.859260141447694)\n", | |
"('4096', 'validation score', 0.7308873468586968)\n" | |
] | |
} | |
], | |
"source": [ | |
"# print AUC\n", | |
"print(label[key], \"training score\", airlines_gbm.auc(train = True))\n", | |
"print(label[key], \"validation score\", airlines_gbm.auc(valid = True))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment