hirokiky · August 1, 2017 09:06
diff --git a/wine.ipynb b/wine.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', delimiter=';')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>fixed acidity</th>\n",
       "      <th>volatile acidity</th>\n",
       "      <th>citric acid</th>\n",
       "      <th>residual sugar</th>\n",
       "      <th>chlorides</th>\n",
       "      <th>free sulfur dioxide</th>\n",
       "      <th>total sulfur dioxide</th>\n",
       "      <th>density</th>\n",
       "      <th>pH</th>\n",
       "      <th>sulphates</th>\n",
       "      <th>alcohol</th>\n",
       "      <th>quality</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7.4</td>\n",
       "      <td>0.70</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.076</td>\n",
       "      <td>11.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.9978</td>\n",
       "      <td>3.51</td>\n",
       "      <td>0.56</td>\n",
       "      <td>9.4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7.8</td>\n",
       "      <td>0.88</td>\n",
       "      <td>0.00</td>\n",
       "      <td>2.6</td>\n",
       "      <td>0.098</td>\n",
       "      <td>25.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>0.9968</td>\n",
       "      <td>3.20</td>\n",
       "      <td>0.68</td>\n",
       "      <td>9.8</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.8</td>\n",
       "      <td>0.76</td>\n",
       "      <td>0.04</td>\n",
       "      <td>2.3</td>\n",
       "      <td>0.092</td>\n",
       "      <td>15.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>0.9970</td>\n",
       "      <td>3.26</td>\n",
       "      <td>0.65</td>\n",
       "      <td>9.8</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11.2</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.56</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.075</td>\n",
       "      <td>17.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.9980</td>\n",
       "      <td>3.16</td>\n",
       "      <td>0.58</td>\n",
       "      <td>9.8</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7.4</td>\n",
       "      <td>0.70</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.076</td>\n",
       "      <td>11.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.9978</td>\n",
       "      <td>3.51</td>\n",
       "      <td>0.56</td>\n",
       "      <td>9.4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \\\n",
       "0            7.4              0.70         0.00             1.9      0.076   \n",
       "1            7.8              0.88         0.00             2.6      0.098   \n",
       "2            7.8              0.76         0.04             2.3      0.092   \n",
       "3           11.2              0.28         0.56             1.9      0.075   \n",
       "4            7.4              0.70         0.00             1.9      0.076   \n",
       "\n",
       "   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \\\n",
       "0                 11.0                  34.0   0.9978  3.51       0.56   \n",
       "1                 25.0                  67.0   0.9968  3.20       0.68   \n",
       "2                 15.0                  54.0   0.9970  3.26       0.65   \n",
       "3                 17.0                  60.0   0.9980  3.16       0.58   \n",
       "4                 11.0                  34.0   0.9978  3.51       0.56   \n",
       "\n",
       "   alcohol  quality  \n",
       "0      9.4        5  \n",
       "1      9.8        5  \n",
       "2      9.8        5  \n",
       "3      9.8        6  \n",
       "4      9.4        5  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "X = df.iloc[:, :-1].values\n",
    "y = df.iloc[:, -1].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.grid_search import GridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "gs = GridSearchCV(RandomForestClassifier(),\n",
    "            param_grid=[{'n_estimators': [3, 10, 20, 50],\n",
    "                         'max_depth': [3, 5, 10, 20, 30]}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=None, error_score='raise',\n",
       "       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
       "            min_impurity_split=1e-07, min_samples_leaf=1,\n",
       "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,\n",
       "            verbose=0, warm_start=False),\n",
       "       fit_params={}, iid=True, n_jobs=1,\n",
       "       param_grid=[{'max_depth': [3, 5, 10, 20, 30], 'n_estimators': [3, 10, 20, 50]}],\n",
       "       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': 10, 'n_estimators': 50}"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6875"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "            max_depth=10, max_features='auto', max_leaf_nodes=None,\n",
       "            min_impurity_split=1e-07, min_samples_leaf=1,\n",
       "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "            n_estimators=50, n_jobs=1, oob_score=False, random_state=None,\n",
       "            verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.69374999999999998"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "es = gs.best_estimator_\n",
    "es.fit(X_train, y_train)\n",
    "es.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.07894648,  0.10711908,  0.0643799 ,  0.0656545 ,  0.07742105,\n",
       "        0.06366092,  0.10172709,  0.09202301,  0.067935  ,  0.11266989,\n",
       "        0.16846308])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "es.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.svm import SVC\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "pipe = Pipeline([('sv', StandardScaler()), ('clf', SVC())])\n",
    "gs_svm = GridSearchCV(pipe,\n",
    "            param_grid=[{'clf__gamma': [0.01, 0.1, 1, 10, 100],\n",
    "                         'clf__C': [0.01, 0.1, 1, 10, 100]}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=None, error_score='raise',\n",
       "       estimator=Pipeline(steps=[('sv', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
       "  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',\n",
       "  max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
       "  tol=0.001, verbose=False))]),\n",
       "       fit_params={}, iid=True, n_jobs=1,\n",
       "       param_grid=[{'clf__gamma': [0.01, 0.1, 1, 10, 100], 'clf__C': [0.01, 0.1, 1, 10, 100]}],\n",
       "       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs_svm.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.66041666666666665"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs_svm.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'clf__C': 1, 'clf__gamma': 1}"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs_svm.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "pipe = Pipeline([('sv', StandardScaler()), ('clf', LogisticRegression())])\n",
    "gs_lr = GridSearchCV(pipe,\n",
    "            param_grid=[{'clf__C': [0.01, 0.1, 1, 10, 100]}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=None, error_score='raise',\n",
       "       estimator=Pipeline(steps=[('sv', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
       "          verbose=0, warm_start=False))]),\n",
       "       fit_params={}, iid=True, n_jobs=1,\n",
       "       param_grid=[{'clf__C': [0.01, 0.1, 1, 10, 100]}],\n",
       "       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs_lr.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.58333333333333337"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs_lr.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "y_s = np.array([1 if a >= 7 else 0 for a in y])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "X_s_train, X_s_test, y_s_train, y_s_test = train_test_split(X, y_s, test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": [
    "gs = GridSearchCV(RandomForestClassifier(),\n",
    "            param_grid=[{'n_estimators': [3, 10, 20, 50],\n",
    "                         'max_depth': [3, 5, 10, 20, 30]}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=None, error_score='raise',\n",
       "       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
       "            min_impurity_split=1e-07, min_samples_leaf=1,\n",
       "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,\n",
       "            verbose=0, warm_start=False),\n",
       "       fit_params={}, iid=True, n_jobs=1,\n",
       "       param_grid=[{'max_depth': [3, 5, 10, 20, 30], 'n_estimators': [3, 10, 20, 50]}],\n",
       "       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs.fit(X_s_train, y_s_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': 10, 'n_estimators': 20}"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gs.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.90416666666666667"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "e = gs.best_estimator_\n",
    "e.fit(X_s_train, y_s_train)\n",
    "e.score(X_s_test, y_s_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "button": false,
    "collapsed": false,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.06236372,  0.11460061,  0.08839113,  0.06540987,  0.07850471,\n",
       "        0.05851349,  0.07462395,  0.09557788,  0.0689122 ,  0.12739927,\n",
       "        0.16570318])"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "e.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "button": false,
    "collapsed": true,
    "deletable": true,
    "new_sheet": false,
    "run_control": {
     "read_only": false
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', delimiter=';')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style>\n",
	" .dataframe thead tr:only-child th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: left;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>fixed acidity</th>\n",
	" <th>volatile acidity</th>\n",
	" <th>citric acid</th>\n",
	" <th>residual sugar</th>\n",
	" <th>chlorides</th>\n",
	" <th>free sulfur dioxide</th>\n",
	" <th>total sulfur dioxide</th>\n",
	" <th>density</th>\n",
	" <th>pH</th>\n",
	" <th>sulphates</th>\n",
	" <th>alcohol</th>\n",
	" <th>quality</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>7.4</td>\n",
	" <td>0.70</td>\n",
	" <td>0.00</td>\n",
	" <td>1.9</td>\n",
	" <td>0.076</td>\n",
	" <td>11.0</td>\n",
	" <td>34.0</td>\n",
	" <td>0.9978</td>\n",
	" <td>3.51</td>\n",
	" <td>0.56</td>\n",
	" <td>9.4</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>7.8</td>\n",
	" <td>0.88</td>\n",
	" <td>0.00</td>\n",
	" <td>2.6</td>\n",
	" <td>0.098</td>\n",
	" <td>25.0</td>\n",
	" <td>67.0</td>\n",
	" <td>0.9968</td>\n",
	" <td>3.20</td>\n",
	" <td>0.68</td>\n",
	" <td>9.8</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>7.8</td>\n",
	" <td>0.76</td>\n",
	" <td>0.04</td>\n",
	" <td>2.3</td>\n",
	" <td>0.092</td>\n",
	" <td>15.0</td>\n",
	" <td>54.0</td>\n",
	" <td>0.9970</td>\n",
	" <td>3.26</td>\n",
	" <td>0.65</td>\n",
	" <td>9.8</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>11.2</td>\n",
	" <td>0.28</td>\n",
	" <td>0.56</td>\n",
	" <td>1.9</td>\n",
	" <td>0.075</td>\n",
	" <td>17.0</td>\n",
	" <td>60.0</td>\n",
	" <td>0.9980</td>\n",
	" <td>3.16</td>\n",
	" <td>0.58</td>\n",
	" <td>9.8</td>\n",
	" <td>6</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>7.4</td>\n",
	" <td>0.70</td>\n",
	" <td>0.00</td>\n",
	" <td>1.9</td>\n",
	" <td>0.076</td>\n",
	" <td>11.0</td>\n",
	" <td>34.0</td>\n",
	" <td>0.9978</td>\n",
	" <td>3.51</td>\n",
	" <td>0.56</td>\n",
	" <td>9.4</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
	"0 7.4 0.70 0.00 1.9 0.076 \n",
	"1 7.8 0.88 0.00 2.6 0.098 \n",
	"2 7.8 0.76 0.04 2.3 0.092 \n",
	"3 11.2 0.28 0.56 1.9 0.075 \n",
	"4 7.4 0.70 0.00 1.9 0.076 \n",
	"\n",
	" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
	"0 11.0 34.0 0.9978 3.51 0.56 \n",
	"1 25.0 67.0 0.9968 3.20 0.68 \n",
	"2 15.0 54.0 0.9970 3.26 0.65 \n",
	"3 17.0 60.0 0.9980 3.16 0.58 \n",
	"4 11.0 34.0 0.9978 3.51 0.56 \n",
	"\n",
	" alcohol quality \n",
	"0 9.4 5 \n",
	"1 9.8 5 \n",
	"2 9.8 5 \n",
	"3 9.8 6 \n",
	"4 9.4 5 "
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"X = df.iloc[:, :-1].values\n",
	"y = df.iloc[:, -1].values"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"from sklearn.ensemble import RandomForestClassifier\n",
	"from sklearn.grid_search import GridSearchCV"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"gs = GridSearchCV(RandomForestClassifier(),\n",
	" param_grid=[{'n_estimators': [3, 10, 20, 50],\n",
	" 'max_depth': [3, 5, 10, 20, 30]}])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"from sklearn.model_selection import train_test_split\n",
	"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"GridSearchCV(cv=None, error_score='raise',\n",
	" estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
	" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
	" min_impurity_split=1e-07, min_samples_leaf=1,\n",
	" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
	" n_estimators=10, n_jobs=1, oob_score=False, random_state=None,\n",
	" verbose=0, warm_start=False),\n",
	" fit_params={}, iid=True, n_jobs=1,\n",
	" param_grid=[{'max_depth': [3, 5, 10, 20, 30], 'n_estimators': [3, 10, 20, 50]}],\n",
	" pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)"
	]
	},
	"execution_count": 24,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs.fit(X_train, y_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'max_depth': 10, 'n_estimators': 50}"
	]
	},
	"execution_count": 25,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs.best_params_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.6875"
	]
	},
	"execution_count": 26,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs.score(X_test, y_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
	" max_depth=10, max_features='auto', max_leaf_nodes=None,\n",
	" min_impurity_split=1e-07, min_samples_leaf=1,\n",
	" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
	" n_estimators=50, n_jobs=1, oob_score=False, random_state=None,\n",
	" verbose=0, warm_start=False)"
	]
	},
	"execution_count": 27,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs.best_estimator_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.69374999999999998"
	]
	},
	"execution_count": 28,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"es = gs.best_estimator_\n",
	"es.fit(X_train, y_train)\n",
	"es.score(X_test, y_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([ 0.07894648, 0.10711908, 0.0643799 , 0.0656545 , 0.07742105,\n",
	" 0.06366092, 0.10172709, 0.09202301, 0.067935 , 0.11266989,\n",
	" 0.16846308])"
	]
	},
	"execution_count": 29,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"es.feature_importances_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"from sklearn.svm import SVC\n",
	"from sklearn.pipeline import Pipeline\n",
	"from sklearn.preprocessing import StandardScaler\n",
	"pipe = Pipeline([('sv', StandardScaler()), ('clf', SVC())])\n",
	"gs_svm = GridSearchCV(pipe,\n",
	" param_grid=[{'clf__gamma': [0.01, 0.1, 1, 10, 100],\n",
	" 'clf__C': [0.01, 0.1, 1, 10, 100]}])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 36,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"GridSearchCV(cv=None, error_score='raise',\n",
	" estimator=Pipeline(steps=[('sv', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
	" decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',\n",
	" max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
	" tol=0.001, verbose=False))]),\n",
	" fit_params={}, iid=True, n_jobs=1,\n",
	" param_grid=[{'clf__gamma': [0.01, 0.1, 1, 10, 100], 'clf__C': [0.01, 0.1, 1, 10, 100]}],\n",
	" pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)"
	]
	},
	"execution_count": 36,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs_svm.fit(X_train, y_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 37,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.66041666666666665"
	]
	},
	"execution_count": 37,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs_svm.score(X_test, y_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 38,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'clf__C': 1, 'clf__gamma': 1}"
	]
	},
	"execution_count": 38,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs_svm.best_params_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 39,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"from sklearn.linear_model import LogisticRegression\n",
	"pipe = Pipeline([('sv', StandardScaler()), ('clf', LogisticRegression())])\n",
	"gs_lr = GridSearchCV(pipe,\n",
	" param_grid=[{'clf__C': [0.01, 0.1, 1, 10, 100]}])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 41,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"GridSearchCV(cv=None, error_score='raise',\n",
	" estimator=Pipeline(steps=[('sv', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
	" intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
	" penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
	" verbose=0, warm_start=False))]),\n",
	" fit_params={}, iid=True, n_jobs=1,\n",
	" param_grid=[{'clf__C': [0.01, 0.1, 1, 10, 100]}],\n",
	" pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)"
	]
	},
	"execution_count": 41,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs_lr.fit(X_train, y_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 42,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.58333333333333337"
	]
	},
	"execution_count": 42,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs_lr.score(X_test, y_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 67,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"y_s = np.array([1 if a >= 7 else 0 for a in y])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"X_s_train, X_s_test, y_s_train, y_s_test = train_test_split(X, y_s, test_size=0.3)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 69,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": [
	"gs = GridSearchCV(RandomForestClassifier(),\n",
	" param_grid=[{'n_estimators': [3, 10, 20, 50],\n",
	" 'max_depth': [3, 5, 10, 20, 30]}])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 70,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"GridSearchCV(cv=None, error_score='raise',\n",
	" estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
	" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
	" min_impurity_split=1e-07, min_samples_leaf=1,\n",
	" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
	" n_estimators=10, n_jobs=1, oob_score=False, random_state=None,\n",
	" verbose=0, warm_start=False),\n",
	" fit_params={}, iid=True, n_jobs=1,\n",
	" param_grid=[{'max_depth': [3, 5, 10, 20, 30], 'n_estimators': [3, 10, 20, 50]}],\n",
	" pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)"
	]
	},
	"execution_count": 70,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs.fit(X_s_train, y_s_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 71,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'max_depth': 10, 'n_estimators': 20}"
	]
	},
	"execution_count": 71,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"gs.best_params_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 72,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.90416666666666667"
	]
	},
	"execution_count": 72,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"e = gs.best_estimator_\n",
	"e.fit(X_s_train, y_s_train)\n",
	"e.score(X_s_test, y_s_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 73,
	"metadata": {
	"button": false,
	"collapsed": false,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([ 0.06236372, 0.11460061, 0.08839113, 0.06540987, 0.07850471,\n",
	" 0.05851349, 0.07462395, 0.09557788, 0.0689122 , 0.12739927,\n",
	" 0.16570318])"
	]
	},
	"execution_count": 73,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"e.feature_importances_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"button": false,
	"collapsed": true,
	"deletable": true,
	"new_sheet": false,
	"run_control": {
	"read_only": false
	}
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}