Created
May 6, 2019 14:44
-
-
Save pplonski/da681470884a806662272c7d44fe6e45 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import openml\n", | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"from supervised.automl import AutoML\n", | |
"\n", | |
"import os\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import sklearn.model_selection\n", | |
"from sklearn.metrics import log_loss, f1_score" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv('https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv', skipinitialspace=True)\n", | |
"x_cols = [c for c in df.columns if c != 'income']\n", | |
"X = df[x_cols]\n", | |
"y = df['income']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>workclass</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education</th>\n", | |
" <th>education-num</th>\n", | |
" <th>marital-status</th>\n", | |
" <th>occupation</th>\n", | |
" <th>relationship</th>\n", | |
" <th>race</th>\n", | |
" <th>sex</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" <th>native-country</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>39</td>\n", | |
" <td>State-gov</td>\n", | |
" <td>77516</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>13</td>\n", | |
" <td>Never-married</td>\n", | |
" <td>Adm-clerical</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>2174</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>50</td>\n", | |
" <td>Self-emp-not-inc</td>\n", | |
" <td>83311</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>13</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Exec-managerial</td>\n", | |
" <td>Husband</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>13</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>38</td>\n", | |
" <td>Private</td>\n", | |
" <td>215646</td>\n", | |
" <td>HS-grad</td>\n", | |
" <td>9</td>\n", | |
" <td>Divorced</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>53</td>\n", | |
" <td>Private</td>\n", | |
" <td>234721</td>\n", | |
" <td>11th</td>\n", | |
" <td>7</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Husband</td>\n", | |
" <td>Black</td>\n", | |
" <td>Male</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>28</td>\n", | |
" <td>Private</td>\n", | |
" <td>338409</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>13</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Prof-specialty</td>\n", | |
" <td>Wife</td>\n", | |
" <td>Black</td>\n", | |
" <td>Female</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>Cuba</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" age workclass fnlwgt education education-num \\\n", | |
"0 39 State-gov 77516 Bachelors 13 \n", | |
"1 50 Self-emp-not-inc 83311 Bachelors 13 \n", | |
"2 38 Private 215646 HS-grad 9 \n", | |
"3 53 Private 234721 11th 7 \n", | |
"4 28 Private 338409 Bachelors 13 \n", | |
"\n", | |
" marital-status occupation relationship race sex \\\n", | |
"0 Never-married Adm-clerical Not-in-family White Male \n", | |
"1 Married-civ-spouse Exec-managerial Husband White Male \n", | |
"2 Divorced Handlers-cleaners Not-in-family White Male \n", | |
"3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", | |
"4 Married-civ-spouse Prof-specialty Wife Black Female \n", | |
"\n", | |
" capital-gain capital-loss hours-per-week native-country \n", | |
"0 2174 0 40 United-States \n", | |
"1 0 0 13 United-States \n", | |
"2 0 0 40 United-States \n", | |
"3 0 0 40 United-States \n", | |
"4 0 0 40 Cuba " | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X.head(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"seed = 1234\n", | |
"X_train, X_test, y_train, y_test = \\\n", | |
" sklearn.model_selection.train_test_split(X, y, test_size = 0.3, random_state=seed)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "could not convert string to float: 'Private'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-11-9005dc76e109>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_estimators\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0;31m# Validate or convert input data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 528\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 529\u001b[0m raise ValueError(\"Complex data not supported\\n\"\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/numpy/core/numeric.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 537\u001b[0m \"\"\"\n\u001b[0;32m--> 538\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 539\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Private'" | |
] | |
} | |
], | |
"source": [ | |
"rf = RandomForestClassifier(n_estimators = 1000)\n", | |
"rf = rf.fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# The Random Forest does not work with categorical\n", | |
"# Let's convert them, with integer encoding" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "TypeError", | |
"evalue": "'<' not supported between instances of 'str' and 'float'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-22-a05c0e4307e4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcolumn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'workclass'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'education'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'marital-status'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'occupation'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'relationship'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'race'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sex'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'native-country'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mcategorical_convert\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36mfit_transform\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m 234\u001b[0m \"\"\"\n\u001b[1;32m 235\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolumn_or_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwarn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 236\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 237\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 106\u001b[0m \"\"\"\n\u001b[1;32m 107\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 109\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;31m# only used in _encode below, see docstring there for details\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muniques\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0muniques\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0muniques\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mTypeError\u001b[0m: '<' not supported between instances of 'str' and 'float'" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.preprocessing import LabelEncoder\n", | |
"\n", | |
"for column in ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex','native-country']:\n", | |
" categorical_convert = LabelEncoder()\n", | |
" X_train[column] = categorical_convert.fit_transform(X_train[column])\n", | |
" X_test[column] = categorical_convert.transform(X_test[column])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# still something ?\n", | |
"# looks like we have missing values in the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"29700 Private\n", | |
"1529 Private\n", | |
"27477 Private\n", | |
"31950 Private\n", | |
"4732 Private\n", | |
"10858 Private\n", | |
"24518 Private\n", | |
"10035 NaN\n", | |
"1324 Private\n", | |
"26727 Private\n", | |
"10255 Self-emp-not-inc\n", | |
"25713 Private\n", | |
"32541 NaN\n", | |
"22242 Private\n", | |
"8743 State-gov\n", | |
"21018 Private\n", | |
"15462 Private\n", | |
"24948 Private\n", | |
"4630 Private\n", | |
"31931 Private\n", | |
"18589 Private\n", | |
"13984 Private\n", | |
"28047 Private\n", | |
"21419 Self-emp-not-inc\n", | |
"29795 Private\n", | |
"12366 Private\n", | |
"25362 Private\n", | |
"15778 NaN\n", | |
"18061 Private\n", | |
"16192 Private\n", | |
" ... \n", | |
"2558 State-gov\n", | |
"16630 State-gov\n", | |
"18815 Private\n", | |
"9161 Private\n", | |
"27984 Private\n", | |
"27973 Private\n", | |
"7644 Private\n", | |
"14192 State-gov\n", | |
"26744 Private\n", | |
"27066 Private\n", | |
"13686 Private\n", | |
"7962 Private\n", | |
"8060 Private\n", | |
"7916 Private\n", | |
"1182 Private\n", | |
"32427 Private\n", | |
"8222 NaN\n", | |
"23706 Private\n", | |
"9449 Private\n", | |
"8471 Private\n", | |
"23217 Private\n", | |
"32399 Private\n", | |
"22521 Private\n", | |
"17048 Federal-gov\n", | |
"23924 Private\n", | |
"27852 Private\n", | |
"23605 Private\n", | |
"1318 State-gov\n", | |
"25299 Private\n", | |
"27439 Private\n", | |
"Name: workclass, Length: 22792, dtype: object" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X_train[column]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# lets fill with mean values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"train_mode = X_train.mode().iloc[0]\n", | |
"X_train = X_train.fillna(train_mode)\n", | |
"X_test = X_test.fillna(train_mode)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"29700 Private\n", | |
"1529 Private\n", | |
"27477 Private\n", | |
"31950 Private\n", | |
"4732 Private\n", | |
"10858 Private\n", | |
"24518 Private\n", | |
"10035 Private\n", | |
"1324 Private\n", | |
"26727 Private\n", | |
"10255 Self-emp-not-inc\n", | |
"25713 Private\n", | |
"32541 Private\n", | |
"22242 Private\n", | |
"8743 State-gov\n", | |
"21018 Private\n", | |
"15462 Private\n", | |
"24948 Private\n", | |
"4630 Private\n", | |
"31931 Private\n", | |
"18589 Private\n", | |
"13984 Private\n", | |
"28047 Private\n", | |
"21419 Self-emp-not-inc\n", | |
"29795 Private\n", | |
"12366 Private\n", | |
"25362 Private\n", | |
"15778 Private\n", | |
"18061 Private\n", | |
"16192 Private\n", | |
" ... \n", | |
"2558 State-gov\n", | |
"16630 State-gov\n", | |
"18815 Private\n", | |
"9161 Private\n", | |
"27984 Private\n", | |
"27973 Private\n", | |
"7644 Private\n", | |
"14192 State-gov\n", | |
"26744 Private\n", | |
"27066 Private\n", | |
"13686 Private\n", | |
"7962 Private\n", | |
"8060 Private\n", | |
"7916 Private\n", | |
"1182 Private\n", | |
"32427 Private\n", | |
"8222 Private\n", | |
"23706 Private\n", | |
"9449 Private\n", | |
"8471 Private\n", | |
"23217 Private\n", | |
"32399 Private\n", | |
"22521 Private\n", | |
"17048 Federal-gov\n", | |
"23924 Private\n", | |
"27852 Private\n", | |
"23605 Private\n", | |
"1318 State-gov\n", | |
"25299 Private\n", | |
"27439 Private\n", | |
"Name: workclass, Length: 22792, dtype: object" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X_train[column]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "could not convert string to float: 'Private'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-31-9005dc76e109>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_estimators\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mrf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0;31m# Validate or convert input data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 528\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 529\u001b[0m raise ValueError(\"Complex data not supported\\n\"\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/numpy/core/numeric.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 537\u001b[0m \"\"\"\n\u001b[0;32m--> 538\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 539\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Private'" | |
] | |
} | |
], | |
"source": [ | |
"rf = RandomForestClassifier(n_estimators = 1000)\n", | |
"rf = rf.fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# oh, yes! still need to convert!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "y contains previously unseen labels: 'Holand-Netherlands'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mencoded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyError\u001b[0m: 'Holand-Netherlands'", | |
"\nDuring handling of the above exception, another exception occurred:\n", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-33-a05c0e4307e4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mcategorical_convert\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcategorical_convert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 106\u001b[0m \"\"\"\n\u001b[1;32m 107\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 109\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_encode_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/label.py\u001b[0m in \u001b[0;36m_encode_python\u001b[0;34m(values, uniques, encode)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m raise ValueError(\"y contains previously unseen labels: %s\"\n\u001b[0;32m---> 71\u001b[0;31m % str(e))\n\u001b[0m\u001b[1;32m 72\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoded\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: y contains previously unseen labels: 'Holand-Netherlands'" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.preprocessing import LabelEncoder\n", | |
"\n", | |
"for column in ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex','native-country']:\n", | |
" categorical_convert = LabelEncoder()\n", | |
" X_train[column] = categorical_convert.fit_transform(X_train[column])\n", | |
" X_test[column] = categorical_convert.transform(X_test[column])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# what!?@^%$^%$^ ????????\n", | |
"# looks like that there are different labels between X_train and X_test ...\n", | |
"# which can happen in real life as well :) and will break your production model!\n", | |
"# what to do in this situation ?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>workclass</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education</th>\n", | |
" <th>education-num</th>\n", | |
" <th>marital-status</th>\n", | |
" <th>occupation</th>\n", | |
" <th>relationship</th>\n", | |
" <th>race</th>\n", | |
" <th>sex</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" <th>native-country</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1055</th>\n", | |
" <td>32</td>\n", | |
" <td>3</td>\n", | |
" <td>87643</td>\n", | |
" <td>9</td>\n", | |
" <td>13</td>\n", | |
" <td>2</td>\n", | |
" <td>11</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26305</th>\n", | |
" <td>27</td>\n", | |
" <td>3</td>\n", | |
" <td>207352</td>\n", | |
" <td>9</td>\n", | |
" <td>13</td>\n", | |
" <td>2</td>\n", | |
" <td>12</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>India</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9568</th>\n", | |
" <td>73</td>\n", | |
" <td>6</td>\n", | |
" <td>74040</td>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>7</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15071</th>\n", | |
" <td>39</td>\n", | |
" <td>3</td>\n", | |
" <td>174924</td>\n", | |
" <td>11</td>\n", | |
" <td>9</td>\n", | |
" <td>5</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>14344</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012</th>\n", | |
" <td>31</td>\n", | |
" <td>3</td>\n", | |
" <td>264936</td>\n", | |
" <td>15</td>\n", | |
" <td>10</td>\n", | |
" <td>4</td>\n", | |
" <td>7</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" age workclass fnlwgt education education-num marital-status \\\n", | |
"1055 32 3 87643 9 13 2 \n", | |
"26305 27 3 207352 9 13 2 \n", | |
"9568 73 6 74040 5 4 0 \n", | |
"15071 39 3 174924 11 9 5 \n", | |
"2012 31 3 264936 15 10 4 \n", | |
"\n", | |
" occupation relationship race sex capital-gain capital-loss \\\n", | |
"1055 11 0 4 1 0 0 \n", | |
"26305 12 0 1 1 0 0 \n", | |
"9568 7 1 1 0 0 0 \n", | |
"15071 3 1 4 1 14344 0 \n", | |
"2012 7 1 4 0 0 0 \n", | |
"\n", | |
" hours-per-week native-country \n", | |
"1055 40 United-States \n", | |
"26305 40 India \n", | |
"9568 40 United-States \n", | |
"15071 40 United-States \n", | |
"2012 40 United-States " | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X_test.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# we need to assign a new integer for this new label or remove this row from predictions\n", | |
"# let's hack it ..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"column = 'native-country'\n", | |
"all_values = np.unique(list(X_test[column].values))\n", | |
"diff = np.setdiff1d(all_values, categorical_convert.classes_)\n", | |
"categorical_convert.classes_ = np.concatenate((categorical_convert.classes_, diff))\n", | |
"X_test[column] = categorical_convert.transform(X_test[column])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", | |
" 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", | |
" 34, 35, 36, 37, 38, 39])" | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.unique(X_train[column])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", | |
" 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", | |
" 34, 35, 36, 37, 38, 39, 40])" | |
] | |
}, | |
"execution_count": 47, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.unique(X_test[column])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# can we train now?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rf = RandomForestClassifier(n_estimators = 1000)\n", | |
"rf = rf.fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# :), let's do predictions! and compute some metrics!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.338857591429531" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"log_loss(y_test, rf.predict_proba(X_test)[:,1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Please remember that for production you need to save:\n", | |
"# 1. values used for missing values inputation\n", | |
"# 2. encoders to properly convert categoricals to integers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# and the AutoML way" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# let's reread the data, to have raw data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv('https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv', skipinitialspace=True)\n", | |
"x_cols = [c for c in df.columns if c != 'income']\n", | |
"X = df[x_cols]\n", | |
"y = df['income']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"seed = 1234\n", | |
"X_train, X_test, y_train, y_test = \\\n", | |
" sklearn.model_selection.train_test_split(X, y, test_size = 0.3, random_state=seed)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "8da2d19d9f4d4096b315f2ca24f55960", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(IntProgress(value=0, description='MLJAR AutoML', max=80, style=ProgressStyle(description_width=…" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Learner CatBoost final loss 0.4962725171175057 time 9.11 seconds\n", | |
"Learner Xgboost final loss 0.4756562801743394 time 5.83 seconds\n", | |
"Learner RF final loss 0.3065861813563275 time 3.08 seconds\n", | |
"Learner RF final loss 0.305368676349097 time 2.88 seconds\n", | |
"Learner RF final loss 0.3094612291478825 time 2.89 seconds\n", | |
"Learner LightGBM final loss 0.530521322787529 time 31.48 seconds\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Learner NN final loss 0.4039734161034428 time 21.71 seconds\n", | |
"Learner Ensemble final loss 0.29935576492662624 time 2.54 seconds\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"automl = AutoML(total_time_limit=60)\n", | |
"automl.fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.3114670474446203" | |
] | |
}, | |
"execution_count": 61, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"log_loss(y_test, automl.predict(X_test)['p_>50K'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>uid</th>\n", | |
" <th>model_type</th>\n", | |
" <th>metric_type</th>\n", | |
" <th>metric_value</th>\n", | |
" <th>train_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>8495f070-fe32-4c22-82d2-3f03997b90c6</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.496273</td>\n", | |
" <td>9.114185</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>7abe66a3-844f-4add-9a88-674fe6fd9b73</td>\n", | |
" <td>Xgboost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.475656</td>\n", | |
" <td>5.827809</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3756b039-2a98-459b-b4ed-c3967b0fee5c</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.306586</td>\n", | |
" <td>3.079033</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>066e83f8-42f3-4f90-8956-a1c60caa1e55</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.305369</td>\n", | |
" <td>2.877668</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>16a97fa8-4fa7-4545-b694-b74705acd759</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.309461</td>\n", | |
" <td>2.892735</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2eeabf78-4303-44da-bc0e-37a398d48ff3</td>\n", | |
" <td>LightGBM</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.530521</td>\n", | |
" <td>31.484354</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0e5848b1-394b-449f-acf2-93eca2b7e123</td>\n", | |
" <td>NN</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.403973</td>\n", | |
" <td>21.710288</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>3c86db84-c765-46d4-96aa-35ff4f18153c</td>\n", | |
" <td>Ensemble</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.299356</td>\n", | |
" <td>2.535452</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" uid model_type metric_type metric_value \\\n", | |
"0 8495f070-fe32-4c22-82d2-3f03997b90c6 CatBoost logloss 0.496273 \n", | |
"1 7abe66a3-844f-4add-9a88-674fe6fd9b73 Xgboost logloss 0.475656 \n", | |
"2 3756b039-2a98-459b-b4ed-c3967b0fee5c RF logloss 0.306586 \n", | |
"3 066e83f8-42f3-4f90-8956-a1c60caa1e55 RF logloss 0.305369 \n", | |
"4 16a97fa8-4fa7-4545-b694-b74705acd759 RF logloss 0.309461 \n", | |
"5 2eeabf78-4303-44da-bc0e-37a398d48ff3 LightGBM logloss 0.530521 \n", | |
"6 0e5848b1-394b-449f-acf2-93eca2b7e123 NN logloss 0.403973 \n", | |
"7 3c86db84-c765-46d4-96aa-35ff4f18153c Ensemble logloss 0.299356 \n", | |
"\n", | |
" train_time \n", | |
"0 9.114185 \n", | |
"1 5.827809 \n", | |
"2 3.079033 \n", | |
"3 2.877668 \n", | |
"4 2.892735 \n", | |
"5 31.484354 \n", | |
"6 21.710288 \n", | |
"7 2.535452 " | |
] | |
}, | |
"execution_count": 62, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"automl.get_leaderboard()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'best_model': {'library_version': '0.1',\n", | |
" 'algorithm_name': 'Greedy Ensemble',\n", | |
" 'algorithm_short_name': 'Ensemble',\n", | |
" 'uid': '3c86db84-c765-46d4-96aa-35ff4f18153c',\n", | |
" 'models': [{'model': {'uid': '3756b039-2a98-459b-b4ed-c3967b0fee5c',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'framework_file': '3756b039-2a98-459b-b4ed-c3967b0fee5c.framework',\n", | |
" 'framework_file_path': '/tmp/3756b039-2a98-459b-b4ed-c3967b0fee5c.framework',\n", | |
" 'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}}],\n", | |
" 'learners': [{'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': '6314141c-61a4-41a2-8ede-9f5ccfa443fd',\n", | |
" 'model_file': '6314141c-61a4-41a2-8ede-9f5ccfa443fd.rf.model',\n", | |
" 'model_file_path': '/tmp/6314141c-61a4-41a2-8ede-9f5ccfa443fd.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 3,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.6,\n", | |
" 'min_samples_split': 8,\n", | |
" 'min_samples_leaf': 20}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': '80e35a7c-51ac-445a-937e-2afa4e178d82',\n", | |
" 'model_file': '80e35a7c-51ac-445a-937e-2afa4e178d82.rf.model',\n", | |
" 'model_file_path': '/tmp/80e35a7c-51ac-445a-937e-2afa4e178d82.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 3,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.6,\n", | |
" 'min_samples_split': 8,\n", | |
" 'min_samples_leaf': 20}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': 'd4e24c6e-d7fc-44d2-92bd-f77ac1e945fe',\n", | |
" 'model_file': 'd4e24c6e-d7fc-44d2-92bd-f77ac1e945fe.rf.model',\n", | |
" 'model_file_path': '/tmp/d4e24c6e-d7fc-44d2-92bd-f77ac1e945fe.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 3,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.6,\n", | |
" 'min_samples_split': 8,\n", | |
" 'min_samples_leaf': 20}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': 'dc6e330e-eb9a-40f0-a39c-2c72551f1046',\n", | |
" 'model_file': 'dc6e330e-eb9a-40f0-a39c-2c72551f1046.rf.model',\n", | |
" 'model_file_path': '/tmp/dc6e330e-eb9a-40f0-a39c-2c72551f1046.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 3,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.6,\n", | |
" 'min_samples_split': 8,\n", | |
" 'min_samples_leaf': 20}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': '3867f64a-6a73-482b-bf45-dc33de6ff63c',\n", | |
" 'model_file': '3867f64a-6a73-482b-bf45-dc33de6ff63c.rf.model',\n", | |
" 'model_file_path': '/tmp/3867f64a-6a73-482b-bf45-dc33de6ff63c.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 3,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.6,\n", | |
" 'min_samples_split': 8,\n", | |
" 'min_samples_leaf': 20}}],\n", | |
" 'params': {'additional': {'trees_in_step': 10,\n", | |
" 'train_cant_improve_limit': 5,\n", | |
" 'max_steps': 500,\n", | |
" 'max_rows_limit': None,\n", | |
" 'max_cols_limit': None},\n", | |
" 'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n", | |
" 'categorical_to_int'],\n", | |
" 'education': ['categorical_to_int'],\n", | |
" 'marital-status': ['categorical_to_int'],\n", | |
" 'occupation': ['na_fill_median', 'categorical_to_int'],\n", | |
" 'relationship': ['categorical_to_int'],\n", | |
" 'race': ['categorical_to_int'],\n", | |
" 'sex': ['categorical_to_int'],\n", | |
" 'native-country': ['na_fill_median', 'categorical_to_int']},\n", | |
" 'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n", | |
" 'validation': {'validation_type': 'kfold',\n", | |
" 'k_folds': 5,\n", | |
" 'shuffle': True},\n", | |
" 'learner': {'model_type': 'RF',\n", | |
" 'seed': 3,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.6,\n", | |
" 'min_samples_split': 8,\n", | |
" 'min_samples_leaf': 20}}},\n", | |
" 'repeat': 2},\n", | |
" {'model': {'uid': '066e83f8-42f3-4f90-8956-a1c60caa1e55',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'framework_file': '066e83f8-42f3-4f90-8956-a1c60caa1e55.framework',\n", | |
" 'framework_file_path': '/tmp/066e83f8-42f3-4f90-8956-a1c60caa1e55.framework',\n", | |
" 'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}}],\n", | |
" 'learners': [{'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': '7c909c7e-a998-479c-a7d7-d02e584fc70e',\n", | |
" 'model_file': '7c909c7e-a998-479c-a7d7-d02e584fc70e.rf.model',\n", | |
" 'model_file_path': '/tmp/7c909c7e-a998-479c-a7d7-d02e584fc70e.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 4,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.3,\n", | |
" 'min_samples_split': 40,\n", | |
" 'min_samples_leaf': 5}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': 'd2cc8bba-ca0f-4f84-87d8-feb53e399ab0',\n", | |
" 'model_file': 'd2cc8bba-ca0f-4f84-87d8-feb53e399ab0.rf.model',\n", | |
" 'model_file_path': '/tmp/d2cc8bba-ca0f-4f84-87d8-feb53e399ab0.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 4,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.3,\n", | |
" 'min_samples_split': 40,\n", | |
" 'min_samples_leaf': 5}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': '6ddd7f19-3bcf-4482-a1b7-7c28211211d7',\n", | |
" 'model_file': '6ddd7f19-3bcf-4482-a1b7-7c28211211d7.rf.model',\n", | |
" 'model_file_path': '/tmp/6ddd7f19-3bcf-4482-a1b7-7c28211211d7.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 4,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.3,\n", | |
" 'min_samples_split': 40,\n", | |
" 'min_samples_leaf': 5}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': 'b153e6ba-ad42-402c-a249-68eb50622fa5',\n", | |
" 'model_file': 'b153e6ba-ad42-402c-a249-68eb50622fa5.rf.model',\n", | |
" 'model_file_path': '/tmp/b153e6ba-ad42-402c-a249-68eb50622fa5.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 4,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.3,\n", | |
" 'min_samples_split': 40,\n", | |
" 'min_samples_leaf': 5}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': 'd9939f94-22bf-40b2-8fcc-df6cf7a35dea',\n", | |
" 'model_file': 'd9939f94-22bf-40b2-8fcc-df6cf7a35dea.rf.model',\n", | |
" 'model_file_path': '/tmp/d9939f94-22bf-40b2-8fcc-df6cf7a35dea.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 4,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.3,\n", | |
" 'min_samples_split': 40,\n", | |
" 'min_samples_leaf': 5}}],\n", | |
" 'params': {'additional': {'trees_in_step': 10,\n", | |
" 'train_cant_improve_limit': 5,\n", | |
" 'max_steps': 500,\n", | |
" 'max_rows_limit': None,\n", | |
" 'max_cols_limit': None},\n", | |
" 'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n", | |
" 'categorical_to_int'],\n", | |
" 'education': ['categorical_to_int'],\n", | |
" 'marital-status': ['categorical_to_int'],\n", | |
" 'occupation': ['na_fill_median', 'categorical_to_int'],\n", | |
" 'relationship': ['categorical_to_int'],\n", | |
" 'race': ['categorical_to_int'],\n", | |
" 'sex': ['categorical_to_int'],\n", | |
" 'native-country': ['na_fill_median', 'categorical_to_int']},\n", | |
" 'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n", | |
" 'validation': {'validation_type': 'kfold',\n", | |
" 'k_folds': 5,\n", | |
" 'shuffle': True},\n", | |
" 'learner': {'model_type': 'RF',\n", | |
" 'seed': 4,\n", | |
" 'criterion': 'entropy',\n", | |
" 'max_features': 0.3,\n", | |
" 'min_samples_split': 40,\n", | |
" 'min_samples_leaf': 5}}},\n", | |
" 'repeat': 3},\n", | |
" {'model': {'uid': '16a97fa8-4fa7-4545-b694-b74705acd759',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'framework_file': '16a97fa8-4fa7-4545-b694-b74705acd759.framework',\n", | |
" 'framework_file_path': '/tmp/16a97fa8-4fa7-4545-b694-b74705acd759.framework',\n", | |
" 'preprocessing': [{'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}},\n", | |
" {'missing_values': [{'fill_method': 'na_fill_median',\n", | |
" 'fill_params': {'workclass': 'Private',\n", | |
" 'occupation': 'Prof-specialty',\n", | |
" 'native-country': 'United-States'}}],\n", | |
" 'categorical': [{'convert_method': 'categorical_to_int',\n", | |
" 'convert_params': {'workclass': {'Federal-gov': 0,\n", | |
" 'Local-gov': 1,\n", | |
" 'Never-worked': 2,\n", | |
" 'Private': 3,\n", | |
" 'Self-emp-inc': 4,\n", | |
" 'Self-emp-not-inc': 5,\n", | |
" 'State-gov': 6,\n", | |
" 'Without-pay': 7},\n", | |
" 'education': {'10th': 0,\n", | |
" '11th': 1,\n", | |
" '12th': 2,\n", | |
" '1st-4th': 3,\n", | |
" '5th-6th': 4,\n", | |
" '7th-8th': 5,\n", | |
" '9th': 6,\n", | |
" 'Assoc-acdm': 7,\n", | |
" 'Assoc-voc': 8,\n", | |
" 'Bachelors': 9,\n", | |
" 'Doctorate': 10,\n", | |
" 'HS-grad': 11,\n", | |
" 'Masters': 12,\n", | |
" 'Preschool': 13,\n", | |
" 'Prof-school': 14,\n", | |
" 'Some-college': 15},\n", | |
" 'marital-status': {'Divorced': 0,\n", | |
" 'Married-AF-spouse': 1,\n", | |
" 'Married-civ-spouse': 2,\n", | |
" 'Married-spouse-absent': 3,\n", | |
" 'Never-married': 4,\n", | |
" 'Separated': 5,\n", | |
" 'Widowed': 6},\n", | |
" 'occupation': {'Adm-clerical': 0,\n", | |
" 'Armed-Forces': 1,\n", | |
" 'Craft-repair': 2,\n", | |
" 'Exec-managerial': 3,\n", | |
" 'Farming-fishing': 4,\n", | |
" 'Handlers-cleaners': 5,\n", | |
" 'Machine-op-inspct': 6,\n", | |
" 'Other-service': 7,\n", | |
" 'Priv-house-serv': 8,\n", | |
" 'Prof-specialty': 9,\n", | |
" 'Protective-serv': 10,\n", | |
" 'Sales': 11,\n", | |
" 'Tech-support': 12,\n", | |
" 'Transport-moving': 13},\n", | |
" 'relationship': {'Husband': 0,\n", | |
" 'Not-in-family': 1,\n", | |
" 'Other-relative': 2,\n", | |
" 'Own-child': 3,\n", | |
" 'Unmarried': 4,\n", | |
" 'Wife': 5},\n", | |
" 'race': {'Amer-Indian-Eskimo': 0,\n", | |
" 'Asian-Pac-Islander': 1,\n", | |
" 'Black': 2,\n", | |
" 'Other': 3,\n", | |
" 'White': 4},\n", | |
" 'sex': {'Female': 0, 'Male': 1},\n", | |
" 'native-country': {'Cambodia': 0,\n", | |
" 'Canada': 1,\n", | |
" 'China': 2,\n", | |
" 'Columbia': 3,\n", | |
" 'Cuba': 4,\n", | |
" 'Dominican-Republic': 5,\n", | |
" 'Ecuador': 6,\n", | |
" 'El-Salvador': 7,\n", | |
" 'England': 8,\n", | |
" 'France': 9,\n", | |
" 'Germany': 10,\n", | |
" 'Greece': 11,\n", | |
" 'Guatemala': 12,\n", | |
" 'Haiti': 13,\n", | |
" 'Honduras': 14,\n", | |
" 'Hong': 15,\n", | |
" 'Hungary': 16,\n", | |
" 'India': 17,\n", | |
" 'Iran': 18,\n", | |
" 'Ireland': 19,\n", | |
" 'Italy': 20,\n", | |
" 'Jamaica': 21,\n", | |
" 'Japan': 22,\n", | |
" 'Laos': 23,\n", | |
" 'Mexico': 24,\n", | |
" 'Nicaragua': 25,\n", | |
" 'Outlying-US(Guam-USVI-etc)': 26,\n", | |
" 'Peru': 27,\n", | |
" 'Philippines': 28,\n", | |
" 'Poland': 29,\n", | |
" 'Portugal': 30,\n", | |
" 'Puerto-Rico': 31,\n", | |
" 'Scotland': 32,\n", | |
" 'South': 33,\n", | |
" 'Taiwan': 34,\n", | |
" 'Thailand': 35,\n", | |
" 'Trinadad&Tobago': 36,\n", | |
" 'United-States': 37,\n", | |
" 'Vietnam': 38,\n", | |
" 'Yugoslavia': 39}}}],\n", | |
" 'categorical_y': {'<=50K': 0, '>50K': 1}}],\n", | |
" 'learners': [{'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': '46ecdf0b-a48a-43fa-87c0-47679a360e06',\n", | |
" 'model_file': '46ecdf0b-a48a-43fa-87c0-47679a360e06.rf.model',\n", | |
" 'model_file_path': '/tmp/46ecdf0b-a48a-43fa-87c0-47679a360e06.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 5,\n", | |
" 'criterion': 'gini',\n", | |
" 'max_features': 0.5,\n", | |
" 'min_samples_split': 20,\n", | |
" 'min_samples_leaf': 16}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': 'cd981d70-2064-4d7f-b184-fee61cd23ff0',\n", | |
" 'model_file': 'cd981d70-2064-4d7f-b184-fee61cd23ff0.rf.model',\n", | |
" 'model_file_path': '/tmp/cd981d70-2064-4d7f-b184-fee61cd23ff0.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 5,\n", | |
" 'criterion': 'gini',\n", | |
" 'max_features': 0.5,\n", | |
" 'min_samples_split': 20,\n", | |
" 'min_samples_leaf': 16}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': '3ea42453-4bcf-4a93-8009-9ac09598bc29',\n", | |
" 'model_file': '3ea42453-4bcf-4a93-8009-9ac09598bc29.rf.model',\n", | |
" 'model_file_path': '/tmp/3ea42453-4bcf-4a93-8009-9ac09598bc29.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 5,\n", | |
" 'criterion': 'gini',\n", | |
" 'max_features': 0.5,\n", | |
" 'min_samples_split': 20,\n", | |
" 'min_samples_leaf': 16}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': 'fb9a8c23-d45b-4509-be2a-2f4f39dab67b',\n", | |
" 'model_file': 'fb9a8c23-d45b-4509-be2a-2f4f39dab67b.rf.model',\n", | |
" 'model_file_path': '/tmp/fb9a8c23-d45b-4509-be2a-2f4f39dab67b.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 5,\n", | |
" 'criterion': 'gini',\n", | |
" 'max_features': 0.5,\n", | |
" 'min_samples_split': 20,\n", | |
" 'min_samples_leaf': 16}},\n", | |
" {'library_version': '0.20.3',\n", | |
" 'algorithm_name': 'Random Forest',\n", | |
" 'algorithm_short_name': 'RF',\n", | |
" 'uid': '493f0185-e331-4310-91c0-9a8cec5179ed',\n", | |
" 'model_file': '493f0185-e331-4310-91c0-9a8cec5179ed.rf.model',\n", | |
" 'model_file_path': '/tmp/493f0185-e331-4310-91c0-9a8cec5179ed.rf.model',\n", | |
" 'params': {'model_type': 'RF',\n", | |
" 'seed': 5,\n", | |
" 'criterion': 'gini',\n", | |
" 'max_features': 0.5,\n", | |
" 'min_samples_split': 20,\n", | |
" 'min_samples_leaf': 16}}],\n", | |
" 'params': {'additional': {'trees_in_step': 10,\n", | |
" 'train_cant_improve_limit': 5,\n", | |
" 'max_steps': 500,\n", | |
" 'max_rows_limit': None,\n", | |
" 'max_cols_limit': None},\n", | |
" 'preprocessing': {'columns_preprocessing': {'workclass': ['na_fill_median',\n", | |
" 'categorical_to_int'],\n", | |
" 'education': ['categorical_to_int'],\n", | |
" 'marital-status': ['categorical_to_int'],\n", | |
" 'occupation': ['na_fill_median', 'categorical_to_int'],\n", | |
" 'relationship': ['categorical_to_int'],\n", | |
" 'race': ['categorical_to_int'],\n", | |
" 'sex': ['categorical_to_int'],\n", | |
" 'native-country': ['na_fill_median', 'categorical_to_int']},\n", | |
" 'target_preprocessing': ['na_exclude', 'categorical_to_int']},\n", | |
" 'validation': {'validation_type': 'kfold',\n", | |
" 'k_folds': 5,\n", | |
" 'shuffle': True},\n", | |
" 'learner': {'model_type': 'RF',\n", | |
" 'seed': 5,\n", | |
" 'criterion': 'gini',\n", | |
" 'max_features': 0.5,\n", | |
" 'min_samples_split': 20,\n", | |
" 'min_samples_leaf': 16}}},\n", | |
" 'repeat': 1}]},\n", | |
" 'threshold': 0.3792094447282226}" | |
] | |
}, | |
"execution_count": 63, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"automl.to_json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "a8482f483fec443fa3bf6fff4a68f5ae", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(IntProgress(value=0, description='MLJAR AutoML', max=80, style=ProgressStyle(description_width=…" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Learner CatBoost final loss 0.4962725171175057 time 8.7 seconds\n", | |
"Learner CatBoost final loss 0.298971902901766 time 7.78 seconds\n", | |
"Learner CatBoost final loss 0.3687963242745068 time 6.72 seconds\n", | |
"Learner CatBoost final loss 0.5241412727811743 time 6.98 seconds\n", | |
"Learner CatBoost final loss 0.320242187891504 time 8.47 seconds\n", | |
"Learner CatBoost final loss 0.3276007892517636 time 6.67 seconds\n", | |
"Learner CatBoost final loss 0.31286838781837084 time 8.78 seconds\n", | |
"Learner CatBoost final loss 0.3194302661081719 time 8.27 seconds\n", | |
"Learner CatBoost final loss 0.308917492043316 time 8.73 seconds\n", | |
"Learner CatBoost final loss 0.5221297190975681 time 7.52 seconds\n", | |
"Learner Xgboost final loss 0.311372604774252 time 19.98 seconds\n", | |
"Learner Xgboost final loss 0.2842308795115343 time 44.59 seconds\n", | |
"Learner Xgboost final loss 0.5124198237210352 time 6.05 seconds\n", | |
"Learner Xgboost final loss 0.31983838345814286 time 28.92 seconds\n", | |
"Learner RF final loss 0.32201007412896604 time 6.64 seconds\n", | |
"Learner RF final loss 0.30181666534456947 time 6.41 seconds\n", | |
"Learner RF final loss 0.31178228545519937 time 6.59 seconds\n", | |
"Learner RF final loss 0.3077256287675534 time 6.34 seconds\n", | |
"Learner RF final loss 0.3037423508551022 time 6.51 seconds\n", | |
"Learner RF final loss 0.3022910292393144 time 6.39 seconds\n", | |
"Learner RF final loss 0.3024787249684428 time 6.36 seconds\n", | |
"Learner RF final loss 0.3051025531513369 time 6.43 seconds\n", | |
"Learner RF final loss 0.3009434792047708 time 6.39 seconds\n", | |
"Learner RF final loss 0.3059040438520827 time 6.35 seconds\n", | |
"Learner LightGBM final loss 0.52685725579385 time 74.56 seconds\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\r", | |
"Learner NN final loss 0.4404172010190013 time 37.23 seconds\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/sklearn/preprocessing/data.py:645: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" return self.partial_fit(X, y)\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n", | |
"/home/piotr/sandbox/rf/automl-rf/venv/lib/python3.6/site-packages/supervised/preprocessing/preprocessing_scale.py:26: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", | |
" X.loc[:, self.columns] = self.scale.transform(X[self.columns])\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Learner NN final loss 0.44313672512751584 time 49.54 seconds\n", | |
"Learner CatBoost final loss 0.2924718431766818 time 7.46 seconds\n", | |
"Learner CatBoost final loss 0.3008517882859747 time 7.93 seconds\n", | |
"Learner RF final loss 0.30141657521244086 time 6.42 seconds\n", | |
"Learner RF final loss 0.30108077775213377 time 6.44 seconds\n", | |
"Learner RF final loss 0.3029979984348817 time 6.36 seconds\n", | |
"Learner RF final loss 0.30137187809404953 time 6.37 seconds\n", | |
"Learner RF final loss 0.3039241119152016 time 6.36 seconds\n", | |
"Learner CatBoost final loss 0.29292902314568803 time 7.38 seconds\n", | |
"Learner Ensemble final loss 0.2841549706146483 time 63.89 seconds\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"automl = AutoML(total_time_limit=10*60) # let's go crazy and train for 10 minutes :-D\n", | |
"automl.fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.2975106252933293" | |
] | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"log_loss(y_test, automl.predict(X_test)['p_>50K'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>uid</th>\n", | |
" <th>model_type</th>\n", | |
" <th>metric_type</th>\n", | |
" <th>metric_value</th>\n", | |
" <th>train_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>82cab950-7ce3-42f4-bd7d-886ad5553643</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.496273</td>\n", | |
" <td>8.698081</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>fc5ea6f6-a92e-4272-a2fa-3a27f12fcc7c</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.298972</td>\n", | |
" <td>7.781434</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3adba1ba-fb1b-43ae-afcf-969a2c6e7375</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.368796</td>\n", | |
" <td>6.723236</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>f8dea4a9-abf1-4218-8361-63d3b795a752</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.524141</td>\n", | |
" <td>6.980789</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>74bc7a7d-09b4-4ece-9035-686a33612bed</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.320242</td>\n", | |
" <td>8.472449</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>c1a40c3a-0fe6-466e-9bda-e0b356254f71</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.327601</td>\n", | |
" <td>6.673680</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>fd61e2ab-7c75-4e9f-a1b7-a4b2dfd08a79</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.312868</td>\n", | |
" <td>8.777036</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>6b67440e-6001-4acc-bbef-bd0cf1052598</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.319430</td>\n", | |
" <td>8.268195</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0c8cb41a-cd92-4ac6-ac83-db1ec779b4e2</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.308917</td>\n", | |
" <td>8.730249</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>fe348a66-22c2-4ecd-9bd5-9293ace74ed3</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.522130</td>\n", | |
" <td>7.523533</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>bee786ab-c29d-4404-b904-c1937330cca5</td>\n", | |
" <td>Xgboost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.311373</td>\n", | |
" <td>19.979386</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>a0ff281d-efc9-4593-ae0a-3973acc7badb</td>\n", | |
" <td>Xgboost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.284231</td>\n", | |
" <td>44.590009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>ed0bdf42-6b57-42e3-aec7-105522f83f68</td>\n", | |
" <td>Xgboost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.512420</td>\n", | |
" <td>6.050704</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>277ca639-5908-4347-87f8-9ecabd1ad4a3</td>\n", | |
" <td>Xgboost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.319838</td>\n", | |
" <td>28.923516</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>30c6b1e4-16b2-4590-87ca-91814a83c091</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.322010</td>\n", | |
" <td>6.638074</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>cc9ed647-bbca-4ede-a85a-ce9ef001741d</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.301817</td>\n", | |
" <td>6.411106</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>eadcee3f-7982-48c6-8b82-11ad9cfdefe7</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.311782</td>\n", | |
" <td>6.589697</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>54aac109-abc7-4ae7-b613-601df85ea44e</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.307726</td>\n", | |
" <td>6.337984</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>0f8846d9-244a-43b1-881e-bf8503bdb181</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.303742</td>\n", | |
" <td>6.505825</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>402ea7a2-6737-46cc-aab5-f743e4bdea4f</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.302291</td>\n", | |
" <td>6.394600</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>b7ab3204-44a4-4d25-b8ae-a6b500689a55</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.302479</td>\n", | |
" <td>6.355931</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>c3a0e608-1ff3-4d35-9eeb-84003d8ae3be</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.305103</td>\n", | |
" <td>6.428447</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>d7d81758-0ec0-4dac-90fb-57c74f8ab348</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.300943</td>\n", | |
" <td>6.389448</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>7b661734-ac7d-45ba-bf71-dbe1618f654a</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.305904</td>\n", | |
" <td>6.351137</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>7d7fbf29-a00b-4501-95a1-b63a8bb15457</td>\n", | |
" <td>LightGBM</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.526857</td>\n", | |
" <td>74.563801</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>baf6f37a-41fb-4d9b-a13c-30ef9310f217</td>\n", | |
" <td>NN</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.440417</td>\n", | |
" <td>37.227938</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>b4cacc4f-4d1a-48e9-ae08-9ec12e2a838f</td>\n", | |
" <td>NN</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.443137</td>\n", | |
" <td>49.542672</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>1d9ec79a-38eb-4dfc-945a-80f9e31beb1b</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.292472</td>\n", | |
" <td>7.463734</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>06799b1e-972d-4eb5-9271-fd8413d489ec</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.300852</td>\n", | |
" <td>7.930739</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>21ad6589-1c25-470a-8436-e344c00320c4</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.301417</td>\n", | |
" <td>6.422292</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>30</th>\n", | |
" <td>e07d516d-33c1-41d0-9ee5-b581616c4edb</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.301081</td>\n", | |
" <td>6.438730</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31</th>\n", | |
" <td>83879b2c-b0fb-43f3-a282-be9419c48ee3</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.302998</td>\n", | |
" <td>6.355017</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>32</th>\n", | |
" <td>d8aa6b74-2894-4deb-bae8-b0d1439f71a1</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.301372</td>\n", | |
" <td>6.374434</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33</th>\n", | |
" <td>08e4a0b8-01fd-4b47-8c8f-e593d7ee37ea</td>\n", | |
" <td>RF</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.303924</td>\n", | |
" <td>6.363737</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>34</th>\n", | |
" <td>2491de8b-1bc0-4964-be8d-1287b9090e29</td>\n", | |
" <td>CatBoost</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.292929</td>\n", | |
" <td>7.383213</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>35</th>\n", | |
" <td>12f08b01-6f52-4014-bb5e-305780191a96</td>\n", | |
" <td>Ensemble</td>\n", | |
" <td>logloss</td>\n", | |
" <td>0.284155</td>\n", | |
" <td>63.892297</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" uid model_type metric_type metric_value \\\n", | |
"0 82cab950-7ce3-42f4-bd7d-886ad5553643 CatBoost logloss 0.496273 \n", | |
"1 fc5ea6f6-a92e-4272-a2fa-3a27f12fcc7c CatBoost logloss 0.298972 \n", | |
"2 3adba1ba-fb1b-43ae-afcf-969a2c6e7375 CatBoost logloss 0.368796 \n", | |
"3 f8dea4a9-abf1-4218-8361-63d3b795a752 CatBoost logloss 0.524141 \n", | |
"4 74bc7a7d-09b4-4ece-9035-686a33612bed CatBoost logloss 0.320242 \n", | |
"5 c1a40c3a-0fe6-466e-9bda-e0b356254f71 CatBoost logloss 0.327601 \n", | |
"6 fd61e2ab-7c75-4e9f-a1b7-a4b2dfd08a79 CatBoost logloss 0.312868 \n", | |
"7 6b67440e-6001-4acc-bbef-bd0cf1052598 CatBoost logloss 0.319430 \n", | |
"8 0c8cb41a-cd92-4ac6-ac83-db1ec779b4e2 CatBoost logloss 0.308917 \n", | |
"9 fe348a66-22c2-4ecd-9bd5-9293ace74ed3 CatBoost logloss 0.522130 \n", | |
"10 bee786ab-c29d-4404-b904-c1937330cca5 Xgboost logloss 0.311373 \n", | |
"11 a0ff281d-efc9-4593-ae0a-3973acc7badb Xgboost logloss 0.284231 \n", | |
"12 ed0bdf42-6b57-42e3-aec7-105522f83f68 Xgboost logloss 0.512420 \n", | |
"13 277ca639-5908-4347-87f8-9ecabd1ad4a3 Xgboost logloss 0.319838 \n", | |
"14 30c6b1e4-16b2-4590-87ca-91814a83c091 RF logloss 0.322010 \n", | |
"15 cc9ed647-bbca-4ede-a85a-ce9ef001741d RF logloss 0.301817 \n", | |
"16 eadcee3f-7982-48c6-8b82-11ad9cfdefe7 RF logloss 0.311782 \n", | |
"17 54aac109-abc7-4ae7-b613-601df85ea44e RF logloss 0.307726 \n", | |
"18 0f8846d9-244a-43b1-881e-bf8503bdb181 RF logloss 0.303742 \n", | |
"19 402ea7a2-6737-46cc-aab5-f743e4bdea4f RF logloss 0.302291 \n", | |
"20 b7ab3204-44a4-4d25-b8ae-a6b500689a55 RF logloss 0.302479 \n", | |
"21 c3a0e608-1ff3-4d35-9eeb-84003d8ae3be RF logloss 0.305103 \n", | |
"22 d7d81758-0ec0-4dac-90fb-57c74f8ab348 RF logloss 0.300943 \n", | |
"23 7b661734-ac7d-45ba-bf71-dbe1618f654a RF logloss 0.305904 \n", | |
"24 7d7fbf29-a00b-4501-95a1-b63a8bb15457 LightGBM logloss 0.526857 \n", | |
"25 baf6f37a-41fb-4d9b-a13c-30ef9310f217 NN logloss 0.440417 \n", | |
"26 b4cacc4f-4d1a-48e9-ae08-9ec12e2a838f NN logloss 0.443137 \n", | |
"27 1d9ec79a-38eb-4dfc-945a-80f9e31beb1b CatBoost logloss 0.292472 \n", | |
"28 06799b1e-972d-4eb5-9271-fd8413d489ec CatBoost logloss 0.300852 \n", | |
"29 21ad6589-1c25-470a-8436-e344c00320c4 RF logloss 0.301417 \n", | |
"30 e07d516d-33c1-41d0-9ee5-b581616c4edb RF logloss 0.301081 \n", | |
"31 83879b2c-b0fb-43f3-a282-be9419c48ee3 RF logloss 0.302998 \n", | |
"32 d8aa6b74-2894-4deb-bae8-b0d1439f71a1 RF logloss 0.301372 \n", | |
"33 08e4a0b8-01fd-4b47-8c8f-e593d7ee37ea RF logloss 0.303924 \n", | |
"34 2491de8b-1bc0-4964-be8d-1287b9090e29 CatBoost logloss 0.292929 \n", | |
"35 12f08b01-6f52-4014-bb5e-305780191a96 Ensemble logloss 0.284155 \n", | |
"\n", | |
" train_time \n", | |
"0 8.698081 \n", | |
"1 7.781434 \n", | |
"2 6.723236 \n", | |
"3 6.980789 \n", | |
"4 8.472449 \n", | |
"5 6.673680 \n", | |
"6 8.777036 \n", | |
"7 8.268195 \n", | |
"8 8.730249 \n", | |
"9 7.523533 \n", | |
"10 19.979386 \n", | |
"11 44.590009 \n", | |
"12 6.050704 \n", | |
"13 28.923516 \n", | |
"14 6.638074 \n", | |
"15 6.411106 \n", | |
"16 6.589697 \n", | |
"17 6.337984 \n", | |
"18 6.505825 \n", | |
"19 6.394600 \n", | |
"20 6.355931 \n", | |
"21 6.428447 \n", | |
"22 6.389448 \n", | |
"23 6.351137 \n", | |
"24 74.563801 \n", | |
"25 37.227938 \n", | |
"26 49.542672 \n", | |
"27 7.463734 \n", | |
"28 7.930739 \n", | |
"29 6.422292 \n", | |
"30 6.438730 \n", | |
"31 6.355017 \n", | |
"32 6.374434 \n", | |
"33 6.363737 \n", | |
"34 7.383213 \n", | |
"35 63.892297 " | |
] | |
}, | |
"execution_count": 66, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"automl.get_leaderboard()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# 1. new missing value\n", | |
"# 2. new categorical value\n", | |
"# 3. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "venv", | |
"language": "python", | |
"name": "venv" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment