amirziai · January 19, 2018 04:49
diff --git a/rf-feature-importance.ipynb b/rf-feature-importance.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.datasets import load_iris\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "def get_x_y_and_names(data):\n",
    "    return [data[x] for x in ['data', 'target', 'feature_names']]\n",
    "\n",
    "def print_feature_importance(rf, names):\n",
    "    print(sorted(zip(map(lambda x: round(x, 4),\n",
    "                         rf.feature_importances_),\n",
    "                     names), reverse=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(0.65780000000000005, 'petal width (cm)'), (0.29389999999999999, 'petal length (cm)'), (0.036200000000000003, 'sepal length (cm)'), (0.0121, 'sepal width (cm)')]\n"
     ]
    }
   ],
   "source": [
    "iris = load_iris()\n",
    "x, y, names = get_x_y_and_names(iris)\n",
    "\n",
    "rf = RandomForestClassifier()\n",
    "rf.fit(x, y)\n",
    "print_feature_importance(rf, names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(0.44650000000000001, 'LSTAT'), (0.37959999999999999, 'RM'), (0.055800000000000002, 'DIS'), (0.032099999999999997, 'CRIM'), (0.021399999999999999, 'NOX'), (0.0147, 'AGE'), (0.013599999999999999, 'TAX'), (0.012999999999999999, 'PTRATIO'), (0.012999999999999999, 'B'), (0.0057000000000000002, 'INDUS'), (0.0025000000000000001, 'RAD'), (0.0011999999999999999, 'CHAS'), (0.001, 'ZN')]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.datasets import load_boston\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "\n",
    "boston = load_boston()\n",
    "x, y, names = get_x_y_and_names(boston)\n",
    "\n",
    "rf = RandomForestRegressor()\n",
    "rf.fit(x, y)\n",
    "print_feature_importance(rf, names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CRIM</th>\n",
       "      <th>ZN</th>\n",
       "      <th>INDUS</th>\n",
       "      <th>CHAS</th>\n",
       "      <th>NOX</th>\n",
       "      <th>RM</th>\n",
       "      <th>AGE</th>\n",
       "      <th>DIS</th>\n",
       "      <th>RAD</th>\n",
       "      <th>TAX</th>\n",
       "      <th>PTRATIO</th>\n",
       "      <th>B</th>\n",
       "      <th>LSTAT</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.00632</td>\n",
       "      <td>18.0</td>\n",
       "      <td>2.31</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.538</td>\n",
       "      <td>6.575</td>\n",
       "      <td>65.2</td>\n",
       "      <td>4.0900</td>\n",
       "      <td>1.0</td>\n",
       "      <td>296.0</td>\n",
       "      <td>15.3</td>\n",
       "      <td>396.90</td>\n",
       "      <td>4.98</td>\n",
       "      <td>24.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.02731</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.07</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.469</td>\n",
       "      <td>6.421</td>\n",
       "      <td>78.9</td>\n",
       "      <td>4.9671</td>\n",
       "      <td>2.0</td>\n",
       "      <td>242.0</td>\n",
       "      <td>17.8</td>\n",
       "      <td>396.90</td>\n",
       "      <td>9.14</td>\n",
       "      <td>21.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.02729</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.07</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.469</td>\n",
       "      <td>7.185</td>\n",
       "      <td>61.1</td>\n",
       "      <td>4.9671</td>\n",
       "      <td>2.0</td>\n",
       "      <td>242.0</td>\n",
       "      <td>17.8</td>\n",
       "      <td>392.83</td>\n",
       "      <td>4.03</td>\n",
       "      <td>34.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.03237</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.18</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.458</td>\n",
       "      <td>6.998</td>\n",
       "      <td>45.8</td>\n",
       "      <td>6.0622</td>\n",
       "      <td>3.0</td>\n",
       "      <td>222.0</td>\n",
       "      <td>18.7</td>\n",
       "      <td>394.63</td>\n",
       "      <td>2.94</td>\n",
       "      <td>33.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.06905</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.18</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.458</td>\n",
       "      <td>7.147</td>\n",
       "      <td>54.2</td>\n",
       "      <td>6.0622</td>\n",
       "      <td>3.0</td>\n",
       "      <td>222.0</td>\n",
       "      <td>18.7</td>\n",
       "      <td>396.90</td>\n",
       "      <td>5.33</td>\n",
       "      <td>36.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \\\n",
       "0  0.00632  18.0   2.31   0.0  0.538  6.575  65.2  4.0900  1.0  296.0   \n",
       "1  0.02731   0.0   7.07   0.0  0.469  6.421  78.9  4.9671  2.0  242.0   \n",
       "2  0.02729   0.0   7.07   0.0  0.469  7.185  61.1  4.9671  2.0  242.0   \n",
       "3  0.03237   0.0   2.18   0.0  0.458  6.998  45.8  6.0622  3.0  222.0   \n",
       "4  0.06905   0.0   2.18   0.0  0.458  7.147  54.2  6.0622  3.0  222.0   \n",
       "\n",
       "   PTRATIO       B  LSTAT     y  \n",
       "0     15.3  396.90   4.98  24.0  \n",
       "1     17.8  396.90   9.14  21.6  \n",
       "2     17.8  392.83   4.03  34.7  \n",
       "3     18.7  394.63   2.94  33.4  \n",
       "4     18.7  396.90   5.33  36.2  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(x, columns=names)\n",
    "df['y'] = y\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"from sklearn.datasets import load_iris\n",
	"from sklearn.ensemble import RandomForestClassifier\n",
	"\n",
	"def get_x_y_and_names(data):\n",
	" return [data[x] for x in ['data', 'target', 'feature_names']]\n",
	"\n",
	"def print_feature_importance(rf, names):\n",
	" print(sorted(zip(map(lambda x: round(x, 4),\n",
	" rf.feature_importances_),\n",
	" names), reverse=True))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[(0.65780000000000005, 'petal width (cm)'), (0.29389999999999999, 'petal length (cm)'), (0.036200000000000003, 'sepal length (cm)'), (0.0121, 'sepal width (cm)')]\n"
	]
	}
	],
	"source": [
	"iris = load_iris()\n",
	"x, y, names = get_x_y_and_names(iris)\n",
	"\n",
	"rf = RandomForestClassifier()\n",
	"rf.fit(x, y)\n",
	"print_feature_importance(rf, names)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[(0.44650000000000001, 'LSTAT'), (0.37959999999999999, 'RM'), (0.055800000000000002, 'DIS'), (0.032099999999999997, 'CRIM'), (0.021399999999999999, 'NOX'), (0.0147, 'AGE'), (0.013599999999999999, 'TAX'), (0.012999999999999999, 'PTRATIO'), (0.012999999999999999, 'B'), (0.0057000000000000002, 'INDUS'), (0.0025000000000000001, 'RAD'), (0.0011999999999999999, 'CHAS'), (0.001, 'ZN')]\n"
	]
	}
	],
	"source": [
	"from sklearn.datasets import load_boston\n",
	"from sklearn.ensemble import RandomForestRegressor\n",
	"\n",
	"boston = load_boston()\n",
	"x, y, names = get_x_y_and_names(boston)\n",
	"\n",
	"rf = RandomForestRegressor()\n",
	"rf.fit(x, y)\n",
	"print_feature_importance(rf, names)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>CRIM</th>\n",
	" <th>ZN</th>\n",
	" <th>INDUS</th>\n",
	" <th>CHAS</th>\n",
	" <th>NOX</th>\n",
	" <th>RM</th>\n",
	" <th>AGE</th>\n",
	" <th>DIS</th>\n",
	" <th>RAD</th>\n",
	" <th>TAX</th>\n",
	" <th>PTRATIO</th>\n",
	" <th>B</th>\n",
	" <th>LSTAT</th>\n",
	" <th>y</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>0.00632</td>\n",
	" <td>18.0</td>\n",
	" <td>2.31</td>\n",
	" <td>0.0</td>\n",
	" <td>0.538</td>\n",
	" <td>6.575</td>\n",
	" <td>65.2</td>\n",
	" <td>4.0900</td>\n",
	" <td>1.0</td>\n",
	" <td>296.0</td>\n",
	" <td>15.3</td>\n",
	" <td>396.90</td>\n",
	" <td>4.98</td>\n",
	" <td>24.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>0.02731</td>\n",
	" <td>0.0</td>\n",
	" <td>7.07</td>\n",
	" <td>0.0</td>\n",
	" <td>0.469</td>\n",
	" <td>6.421</td>\n",
	" <td>78.9</td>\n",
	" <td>4.9671</td>\n",
	" <td>2.0</td>\n",
	" <td>242.0</td>\n",
	" <td>17.8</td>\n",
	" <td>396.90</td>\n",
	" <td>9.14</td>\n",
	" <td>21.6</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>0.02729</td>\n",
	" <td>0.0</td>\n",
	" <td>7.07</td>\n",
	" <td>0.0</td>\n",
	" <td>0.469</td>\n",
	" <td>7.185</td>\n",
	" <td>61.1</td>\n",
	" <td>4.9671</td>\n",
	" <td>2.0</td>\n",
	" <td>242.0</td>\n",
	" <td>17.8</td>\n",
	" <td>392.83</td>\n",
	" <td>4.03</td>\n",
	" <td>34.7</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>0.03237</td>\n",
	" <td>0.0</td>\n",
	" <td>2.18</td>\n",
	" <td>0.0</td>\n",
	" <td>0.458</td>\n",
	" <td>6.998</td>\n",
	" <td>45.8</td>\n",
	" <td>6.0622</td>\n",
	" <td>3.0</td>\n",
	" <td>222.0</td>\n",
	" <td>18.7</td>\n",
	" <td>394.63</td>\n",
	" <td>2.94</td>\n",
	" <td>33.4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>0.06905</td>\n",
	" <td>0.0</td>\n",
	" <td>2.18</td>\n",
	" <td>0.0</td>\n",
	" <td>0.458</td>\n",
	" <td>7.147</td>\n",
	" <td>54.2</td>\n",
	" <td>6.0622</td>\n",
	" <td>3.0</td>\n",
	" <td>222.0</td>\n",
	" <td>18.7</td>\n",
	" <td>396.90</td>\n",
	" <td>5.33</td>\n",
	" <td>36.2</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n",
	"0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n",
	"1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
	"2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
	"3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
	"4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
	"\n",
	" PTRATIO B LSTAT y \n",
	"0 15.3 396.90 4.98 24.0 \n",
	"1 17.8 396.90 9.14 21.6 \n",
	"2 17.8 392.83 4.03 34.7 \n",
	"3 18.7 394.63 2.94 33.4 \n",
	"4 18.7 396.90 5.33 36.2 "
	]
	},
	"execution_count": 19,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df = pd.DataFrame(x, columns=names)\n",
	"df['y'] = y\n",
	"df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"kernelspec": {
	"display_name": "Python [default]",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found