Last active
February 22, 2018 06:13
-
-
Save sinhrks/cc9a88f74074fc296e12 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'0.4'" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%matplotlib inline\n", | |
"import numpy as np\n", | |
"import xgboost as xgb\n", | |
"from sklearn import datasets\n", | |
"\n", | |
"import matplotlib.pyplot as plt\n", | |
"plt.style.use('ggplot')\n", | |
"\n", | |
"xgb.__version__" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Create DMatrix from pandas.DataFrame" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>SepalLength</th>\n", | |
" <th>SepalWidth</th>\n", | |
" <th>PetalLength</th>\n", | |
" <th>PetalWidth</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>146</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.5</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>147</th>\n", | |
" <td>6.5</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>148</th>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.4</td>\n", | |
" <td>2.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>5.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.1</td>\n", | |
" <td>1.8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>150 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" SepalLength SepalWidth PetalLength PetalWidth\n", | |
"0 5.1 3.5 1.4 0.2\n", | |
"1 4.9 3.0 1.4 0.2\n", | |
"2 4.7 3.2 1.3 0.2\n", | |
"3 4.6 3.1 1.5 0.2\n", | |
".. ... ... ... ...\n", | |
"146 6.3 2.5 5.0 1.9\n", | |
"147 6.5 3.0 5.2 2.0\n", | |
"148 6.2 3.4 5.4 2.3\n", | |
"149 5.9 3.0 5.1 1.8\n", | |
"\n", | |
"[150 rows x 4 columns]" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris = datasets.load_iris()\n", | |
"\n", | |
"import pandas as pd\n", | |
"pd.set_option('display.max_rows', 8)\n", | |
"\n", | |
"train = pd.DataFrame(iris.data, columns=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'])\n", | |
"train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(150L, 4L)" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dm = xgb.DMatrix(train, label=iris.target)\n", | |
"dm.num_row(), dm.num_col()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dm.feature_names" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['q', 'q', 'q', 'q']" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dm.feature_types" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### cv now returns pandas.DataFrame or np.ndarray" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>test-mlogloss-mean</th>\n", | |
" <th>test-mlogloss-std</th>\n", | |
" <th>train-mlogloss-mean</th>\n", | |
" <th>train-mlogloss-std</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.753459</td>\n", | |
" <td>0.027033</td>\n", | |
" <td>0.737631</td>\n", | |
" <td>0.003818</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.552303</td>\n", | |
" <td>0.048738</td>\n", | |
" <td>0.526929</td>\n", | |
" <td>0.005102</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.423481</td>\n", | |
" <td>0.066469</td>\n", | |
" <td>0.390115</td>\n", | |
" <td>0.005873</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.339942</td>\n", | |
" <td>0.082163</td>\n", | |
" <td>0.295637</td>\n", | |
" <td>0.006148</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.219242</td>\n", | |
" <td>0.124195</td>\n", | |
" <td>0.143760</td>\n", | |
" <td>0.006318</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>0.200365</td>\n", | |
" <td>0.137163</td>\n", | |
" <td>0.116560</td>\n", | |
" <td>0.006130</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.187477</td>\n", | |
" <td>0.145066</td>\n", | |
" <td>0.096047</td>\n", | |
" <td>0.005444</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.181228</td>\n", | |
" <td>0.156536</td>\n", | |
" <td>0.080041</td>\n", | |
" <td>0.005265</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>10 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n", | |
"0 0.753459 0.027033 0.737631 \n", | |
"1 0.552303 0.048738 0.526929 \n", | |
"2 0.423481 0.066469 0.390115 \n", | |
"3 0.339942 0.082163 0.295637 \n", | |
".. ... ... ... \n", | |
"6 0.219242 0.124195 0.143760 \n", | |
"7 0.200365 0.137163 0.116560 \n", | |
"8 0.187477 0.145066 0.096047 \n", | |
"9 0.181228 0.156536 0.080041 \n", | |
"\n", | |
" train-mlogloss-std \n", | |
"0 0.003818 \n", | |
"1 0.005102 \n", | |
"2 0.005873 \n", | |
"3 0.006148 \n", | |
".. ... \n", | |
"6 0.006318 \n", | |
"7 0.006130 \n", | |
"8 0.005444 \n", | |
"9 0.005265 \n", | |
"\n", | |
"[10 rows x 4 columns]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"params={'objective': 'multi:softprob',\n", | |
" 'eval_metric': 'mlogloss',\n", | |
" 'eta': 0.3,\n", | |
" 'num_class': 3}\n", | |
"\n", | |
"# default (returns pd.DataFrame, progress report is disabled)\n", | |
"# if pandas is not installed, it works as specifying as_pandas=False (see below)\n", | |
"xgb.cv(params, dm, num_boost_round=10, nfold=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[0]\tcv-test-mlogloss:0.7534586+0.0270330788894\tcv-train-mlogloss:0.7376308+0.00381774878168\n", | |
"[1]\tcv-test-mlogloss:0.5523035+0.0487375163775\tcv-train-mlogloss:0.5269287+0.00510218267117\n", | |
"[2]\tcv-test-mlogloss:0.4234808+0.0664692426297\tcv-train-mlogloss:0.3901153+0.00587297442613\n", | |
"[3]\tcv-test-mlogloss:0.3399421+0.082162847007\tcv-train-mlogloss:0.2956371+0.0061480195421\n", | |
"[4]\tcv-test-mlogloss:0.2822133+0.094546005664\tcv-train-mlogloss:0.2284948+0.00582542583508\n", | |
"[5]\tcv-test-mlogloss:0.2445561+0.108854130468\tcv-train-mlogloss:0.1798245+0.00614406771203\n", | |
"[6]\tcv-test-mlogloss:0.2192424+0.124194576109\tcv-train-mlogloss:0.1437596+0.0063180363595\n", | |
"[7]\tcv-test-mlogloss:0.2003654+0.137162644979\tcv-train-mlogloss:0.1165601+0.00612969169616\n", | |
"[8]\tcv-test-mlogloss:0.1874767+0.145066157809\tcv-train-mlogloss:0.0960466+0.00544370076694\n", | |
"[9]\tcv-test-mlogloss:0.1812277+0.156536125975\tcv-train-mlogloss:0.0800408+0.0052649908224\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>test-mlogloss-mean</th>\n", | |
" <th>test-mlogloss-std</th>\n", | |
" <th>train-mlogloss-mean</th>\n", | |
" <th>train-mlogloss-std</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.753459</td>\n", | |
" <td>0.027033</td>\n", | |
" <td>0.737631</td>\n", | |
" <td>0.003818</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.552303</td>\n", | |
" <td>0.048738</td>\n", | |
" <td>0.526929</td>\n", | |
" <td>0.005102</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.423481</td>\n", | |
" <td>0.066469</td>\n", | |
" <td>0.390115</td>\n", | |
" <td>0.005873</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.339942</td>\n", | |
" <td>0.082163</td>\n", | |
" <td>0.295637</td>\n", | |
" <td>0.006148</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.219242</td>\n", | |
" <td>0.124195</td>\n", | |
" <td>0.143760</td>\n", | |
" <td>0.006318</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>0.200365</td>\n", | |
" <td>0.137163</td>\n", | |
" <td>0.116560</td>\n", | |
" <td>0.006130</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.187477</td>\n", | |
" <td>0.145066</td>\n", | |
" <td>0.096047</td>\n", | |
" <td>0.005444</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.181228</td>\n", | |
" <td>0.156536</td>\n", | |
" <td>0.080041</td>\n", | |
" <td>0.005265</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>10 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n", | |
"0 0.753459 0.027033 0.737631 \n", | |
"1 0.552303 0.048738 0.526929 \n", | |
"2 0.423481 0.066469 0.390115 \n", | |
"3 0.339942 0.082163 0.295637 \n", | |
".. ... ... ... \n", | |
"6 0.219242 0.124195 0.143760 \n", | |
"7 0.200365 0.137163 0.116560 \n", | |
"8 0.187477 0.145066 0.096047 \n", | |
"9 0.181228 0.156536 0.080041 \n", | |
"\n", | |
" train-mlogloss-std \n", | |
"0 0.003818 \n", | |
"1 0.005102 \n", | |
"2 0.005873 \n", | |
"3 0.006148 \n", | |
".. ... \n", | |
"6 0.006318 \n", | |
"7 0.006130 \n", | |
"8 0.005444 \n", | |
"9 0.005265 \n", | |
"\n", | |
"[10 rows x 4 columns]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Specifying show_progress explishitly to display progress\n", | |
"xgb.cv(params, dm, num_boost_round=10, nfold=10, show_progress=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[0]\tcv-test-mlogloss:0.7534586\tcv-train-mlogloss:0.7376308\n", | |
"[1]\tcv-test-mlogloss:0.5523035\tcv-train-mlogloss:0.5269287\n", | |
"[2]\tcv-test-mlogloss:0.4234808\tcv-train-mlogloss:0.3901153\n", | |
"[3]\tcv-test-mlogloss:0.3399421\tcv-train-mlogloss:0.2956371\n", | |
"[4]\tcv-test-mlogloss:0.2822133\tcv-train-mlogloss:0.2284948\n", | |
"[5]\tcv-test-mlogloss:0.2445561\tcv-train-mlogloss:0.1798245\n", | |
"[6]\tcv-test-mlogloss:0.2192424\tcv-train-mlogloss:0.1437596\n", | |
"[7]\tcv-test-mlogloss:0.2003654\tcv-train-mlogloss:0.1165601\n", | |
"[8]\tcv-test-mlogloss:0.1874767\tcv-train-mlogloss:0.0960466\n", | |
"[9]\tcv-test-mlogloss:0.1812277\tcv-train-mlogloss:0.0800408\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>test-mlogloss-mean</th>\n", | |
" <th>test-mlogloss-std</th>\n", | |
" <th>train-mlogloss-mean</th>\n", | |
" <th>train-mlogloss-std</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.753459</td>\n", | |
" <td>0.027033</td>\n", | |
" <td>0.737631</td>\n", | |
" <td>0.003818</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.552303</td>\n", | |
" <td>0.048738</td>\n", | |
" <td>0.526929</td>\n", | |
" <td>0.005102</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.423481</td>\n", | |
" <td>0.066469</td>\n", | |
" <td>0.390115</td>\n", | |
" <td>0.005873</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.339942</td>\n", | |
" <td>0.082163</td>\n", | |
" <td>0.295637</td>\n", | |
" <td>0.006148</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.219242</td>\n", | |
" <td>0.124195</td>\n", | |
" <td>0.143760</td>\n", | |
" <td>0.006318</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>0.200365</td>\n", | |
" <td>0.137163</td>\n", | |
" <td>0.116560</td>\n", | |
" <td>0.006130</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.187477</td>\n", | |
" <td>0.145066</td>\n", | |
" <td>0.096047</td>\n", | |
" <td>0.005444</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.181228</td>\n", | |
" <td>0.156536</td>\n", | |
" <td>0.080041</td>\n", | |
" <td>0.005265</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>10 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n", | |
"0 0.753459 0.027033 0.737631 \n", | |
"1 0.552303 0.048738 0.526929 \n", | |
"2 0.423481 0.066469 0.390115 \n", | |
"3 0.339942 0.082163 0.295637 \n", | |
".. ... ... ... \n", | |
"6 0.219242 0.124195 0.143760 \n", | |
"7 0.200365 0.137163 0.116560 \n", | |
"8 0.187477 0.145066 0.096047 \n", | |
"9 0.181228 0.156536 0.080041 \n", | |
"\n", | |
" train-mlogloss-std \n", | |
"0 0.003818 \n", | |
"1 0.005102 \n", | |
"2 0.005873 \n", | |
"3 0.006148 \n", | |
".. ... \n", | |
"6 0.006318 \n", | |
"7 0.006130 \n", | |
"8 0.005444 \n", | |
"9 0.005265 \n", | |
"\n", | |
"[10 rows x 4 columns]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Specify show_stdv=False to hide stdv from progress show stdv (for back-compat)\n", | |
"# Note that result contains stdv always \n", | |
"xgb.cv(params, dm, num_boost_round=10, nfold=10, show_progress=True, show_stdv=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[0]\tcv-test-mlogloss:0.7534586+0.0270330788894\tcv-train-mlogloss:0.7376308+0.00381774878168\n", | |
"[1]\tcv-test-mlogloss:0.5523035+0.0487375163775\tcv-train-mlogloss:0.5269287+0.00510218267117\n", | |
"[2]\tcv-test-mlogloss:0.4234808+0.0664692426297\tcv-train-mlogloss:0.3901153+0.00587297442613\n", | |
"[3]\tcv-test-mlogloss:0.3399421+0.082162847007\tcv-train-mlogloss:0.2956371+0.0061480195421\n", | |
"[4]\tcv-test-mlogloss:0.2822133+0.094546005664\tcv-train-mlogloss:0.2284948+0.00582542583508\n", | |
"[5]\tcv-test-mlogloss:0.2445561+0.108854130468\tcv-train-mlogloss:0.1798245+0.00614406771203\n", | |
"[6]\tcv-test-mlogloss:0.2192424+0.124194576109\tcv-train-mlogloss:0.1437596+0.0063180363595\n", | |
"[7]\tcv-test-mlogloss:0.2003654+0.137162644979\tcv-train-mlogloss:0.1165601+0.00612969169616\n", | |
"[8]\tcv-test-mlogloss:0.1874767+0.145066157809\tcv-train-mlogloss:0.0960466+0.00544370076694\n", | |
"[9]\tcv-test-mlogloss:0.1812277+0.156536125975\tcv-train-mlogloss:0.0800408+0.0052649908224\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0.7534586 , 0.02703308, 0.7376308 , 0.00381775],\n", | |
" [ 0.5523035 , 0.04873752, 0.5269287 , 0.00510218],\n", | |
" [ 0.4234808 , 0.06646924, 0.3901153 , 0.00587297],\n", | |
" [ 0.3399421 , 0.08216285, 0.2956371 , 0.00614802],\n", | |
" [ 0.2822133 , 0.09454601, 0.2284948 , 0.00582543],\n", | |
" [ 0.2445561 , 0.10885413, 0.1798245 , 0.00614407],\n", | |
" [ 0.2192424 , 0.12419458, 0.1437596 , 0.00631804],\n", | |
" [ 0.2003654 , 0.13716264, 0.1165601 , 0.00612969],\n", | |
" [ 0.1874767 , 0.14506616, 0.0960466 , 0.0054437 ],\n", | |
" [ 0.1812277 , 0.15653613, 0.0800408 , 0.00526499]])" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# specifying as_pandas=False returns np.ndarray\n", | |
"# progress report is enabled because returned np.ndarray can't contain metadata\n", | |
"xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0.7534586 , 0.02703308, 0.7376308 , 0.00381775],\n", | |
" [ 0.5523035 , 0.04873752, 0.5269287 , 0.00510218],\n", | |
" [ 0.4234808 , 0.06646924, 0.3901153 , 0.00587297],\n", | |
" [ 0.3399421 , 0.08216285, 0.2956371 , 0.00614802],\n", | |
" [ 0.2822133 , 0.09454601, 0.2284948 , 0.00582543],\n", | |
" [ 0.2445561 , 0.10885413, 0.1798245 , 0.00614407],\n", | |
" [ 0.2192424 , 0.12419458, 0.1437596 , 0.00631804],\n", | |
" [ 0.2003654 , 0.13716264, 0.1165601 , 0.00612969],\n", | |
" [ 0.1874767 , 0.14506616, 0.0960466 , 0.0054437 ],\n", | |
" [ 0.1812277 , 0.15653613, 0.0800408 , 0.00526499]])" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# we can hide progress exlicitly\n", | |
"xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False, show_progress=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>test-error-mean</th>\n", | |
" <th>test-error-std</th>\n", | |
" <th>train-error-mean</th>\n", | |
" <th>train-error-std</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.046544</td>\n", | |
" <td>0.007774</td>\n", | |
" <td>0.046544</td>\n", | |
" <td>0.000864</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.022273</td>\n", | |
" <td>0.004821</td>\n", | |
" <td>0.022273</td>\n", | |
" <td>0.000536</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.007066</td>\n", | |
" <td>0.002678</td>\n", | |
" <td>0.007066</td>\n", | |
" <td>0.000298</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.015207</td>\n", | |
" <td>0.003791</td>\n", | |
" <td>0.015207</td>\n", | |
" <td>0.000421</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.001229</td>\n", | |
" <td>0.001339</td>\n", | |
" <td>0.001229</td>\n", | |
" <td>0.000149</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>0.001229</td>\n", | |
" <td>0.001339</td>\n", | |
" <td>0.001229</td>\n", | |
" <td>0.000149</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.001229</td>\n", | |
" <td>0.001339</td>\n", | |
" <td>0.000956</td>\n", | |
" <td>0.000496</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.001229</td>\n", | |
" <td>0.001339</td>\n", | |
" <td>0.000683</td>\n", | |
" <td>0.000566</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>10 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" test-error-mean test-error-std train-error-mean train-error-std\n", | |
"0 0.046544 0.007774 0.046544 0.000864\n", | |
"1 0.022273 0.004821 0.022273 0.000536\n", | |
"2 0.007066 0.002678 0.007066 0.000298\n", | |
"3 0.015207 0.003791 0.015207 0.000421\n", | |
".. ... ... ... ...\n", | |
"6 0.001229 0.001339 0.001229 0.000149\n", | |
"7 0.001229 0.001339 0.001229 0.000149\n", | |
"8 0.001229 0.001339 0.000956 0.000496\n", | |
"9 0.001229 0.001339 0.000683 0.000566\n", | |
"\n", | |
"[10 rows x 4 columns]" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# column name will be changed depending on eval function\n", | |
"\n", | |
"dpath = '/Users/sin/Documents/Git/xgboost/demo/data/'\n", | |
"dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')\n", | |
"param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }\n", | |
"# specify validations set to watch performance\n", | |
"xgb.cv(param, dtrain, num_boost_round=10, nfold=10)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment