Skip to content

Instantly share code, notes, and snippets.

@amirziai
Last active January 19, 2018 04:49
Show Gist options
  • Select an option

  • Save amirziai/038a679383fc6de68080582bd5fad1d5 to your computer and use it in GitHub Desktop.

Select an option

Save amirziai/038a679383fc6de68080582bd5fad1d5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"def get_x_y_and_names(data):\n",
" return [data[x] for x in ['data', 'target', 'feature_names']]\n",
"\n",
"def print_feature_importance(rf, names):\n",
" print(sorted(zip(map(lambda x: round(x, 4),\n",
" rf.feature_importances_),\n",
" names), reverse=True))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(0.65780000000000005, 'petal width (cm)'), (0.29389999999999999, 'petal length (cm)'), (0.036200000000000003, 'sepal length (cm)'), (0.0121, 'sepal width (cm)')]\n"
]
}
],
"source": [
"iris = load_iris()\n",
"x, y, names = get_x_y_and_names(iris)\n",
"\n",
"rf = RandomForestClassifier()\n",
"rf.fit(x, y)\n",
"print_feature_importance(rf, names)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(0.44650000000000001, 'LSTAT'), (0.37959999999999999, 'RM'), (0.055800000000000002, 'DIS'), (0.032099999999999997, 'CRIM'), (0.021399999999999999, 'NOX'), (0.0147, 'AGE'), (0.013599999999999999, 'TAX'), (0.012999999999999999, 'PTRATIO'), (0.012999999999999999, 'B'), (0.0057000000000000002, 'INDUS'), (0.0025000000000000001, 'RAD'), (0.0011999999999999999, 'CHAS'), (0.001, 'ZN')]\n"
]
}
],
"source": [
"from sklearn.datasets import load_boston\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"\n",
"boston = load_boston()\n",
"x, y, names = get_x_y_and_names(boston)\n",
"\n",
"rf = RandomForestRegressor()\n",
"rf.fit(x, y)\n",
"print_feature_importance(rf, names)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CRIM</th>\n",
" <th>ZN</th>\n",
" <th>INDUS</th>\n",
" <th>CHAS</th>\n",
" <th>NOX</th>\n",
" <th>RM</th>\n",
" <th>AGE</th>\n",
" <th>DIS</th>\n",
" <th>RAD</th>\n",
" <th>TAX</th>\n",
" <th>PTRATIO</th>\n",
" <th>B</th>\n",
" <th>LSTAT</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0.0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1.0</td>\n",
" <td>296.0</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.02731</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0.0</td>\n",
" <td>0.469</td>\n",
" <td>6.421</td>\n",
" <td>78.9</td>\n",
" <td>4.9671</td>\n",
" <td>2.0</td>\n",
" <td>242.0</td>\n",
" <td>17.8</td>\n",
" <td>396.90</td>\n",
" <td>9.14</td>\n",
" <td>21.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.02729</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0.0</td>\n",
" <td>0.469</td>\n",
" <td>7.185</td>\n",
" <td>61.1</td>\n",
" <td>4.9671</td>\n",
" <td>2.0</td>\n",
" <td>242.0</td>\n",
" <td>17.8</td>\n",
" <td>392.83</td>\n",
" <td>4.03</td>\n",
" <td>34.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.03237</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0.0</td>\n",
" <td>0.458</td>\n",
" <td>6.998</td>\n",
" <td>45.8</td>\n",
" <td>6.0622</td>\n",
" <td>3.0</td>\n",
" <td>222.0</td>\n",
" <td>18.7</td>\n",
" <td>394.63</td>\n",
" <td>2.94</td>\n",
" <td>33.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.06905</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0.0</td>\n",
" <td>0.458</td>\n",
" <td>7.147</td>\n",
" <td>54.2</td>\n",
" <td>6.0622</td>\n",
" <td>3.0</td>\n",
" <td>222.0</td>\n",
" <td>18.7</td>\n",
" <td>396.90</td>\n",
" <td>5.33</td>\n",
" <td>36.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n",
"0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n",
"1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
"2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
"3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
"4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
"\n",
" PTRATIO B LSTAT y \n",
"0 15.3 396.90 4.98 24.0 \n",
"1 17.8 396.90 9.14 21.6 \n",
"2 17.8 392.83 4.03 34.7 \n",
"3 18.7 394.63 2.94 33.4 \n",
"4 18.7 396.90 5.33 36.2 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(x, columns=names)\n",
"df['y'] = y\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment