Created
October 4, 2016 20:57
-
-
Save vinceallenvince/06c83b0a1c62a10b296cac79f249e394 to your computer and use it in GitHub Desktop.
Titanic Kaggle competition - Feature EDA - Sex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from __future__ import division\n", | |
"import operator\n", | |
"import math\n", | |
"\n", | |
"import pandas as pd\n", | |
"from pandas import Series, DataFrame\n", | |
"import numpy as np\n", | |
"\n", | |
"import matplotlib.pyplot as plt\n", | |
"import seaborn as sns\n", | |
"sns.set_style('whitegrid')\n", | |
"%matplotlib inline\n", | |
"\n", | |
"from sklearn.cross_validation import cross_val_score\n", | |
"from sklearn.linear_model import LogisticRegression\n", | |
"from sklearn.svm import SVC, LinearSVC\n", | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"from sklearn.neighbors import KNeighborsClassifier\n", | |
"from sklearn.naive_bayes import GaussianNB" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def check_classifiers(X_train, Y_train):\n", | |
" \n", | |
" _cv = 5\n", | |
" classifier_score = {}\n", | |
" \n", | |
" scores = cross_val_score(LogisticRegression(), X, y, cv=_cv)\n", | |
" classifier_score['LogisticRegression'] = scores.mean()\n", | |
" \n", | |
" scores = cross_val_score(KNeighborsClassifier(), X, y, cv=_cv)\n", | |
" classifier_score['KNeighborsClassifier'] = scores.mean()\n", | |
" \n", | |
" scores = cross_val_score(RandomForestClassifier(), X, y, cv=_cv)\n", | |
" classifier_score['RandomForestClassifier'] = scores.mean()\n", | |
" \n", | |
" scores = cross_val_score(SVC(), X, y, cv=_cv)\n", | |
" classifier_score['SVC'] = scores.mean()\n", | |
" \n", | |
" scores = cross_val_score(GaussianNB(), X, y, cv=_cv)\n", | |
" classifier_score['GaussianNB'] = scores.mean()\n", | |
"\n", | |
" return sorted(classifier_score.items(), key=operator.itemgetter(1), reverse=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"X_train = pd.read_csv('data/train.csv', dtype={'Age': np.float64})\n", | |
"y_train = X_train['Survived']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAskAAAFgCAYAAABJzuRWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYXFWd//F3JwRECGgGFBwQBeGLC0TpyBIZCEsEWSTA\nIGZYNLKDOAyDYkBFmWFRNCIgIATZZFEy4EYAl8iSKPlpywgR/AIi4ICCCCQsIsT0749zG4pLlk7o\n6uruer+eJ0/qLnX7W5XOqU+de+65Hd3d3UiSJEl6ybBWFyBJkiQNNIZkSZIkqcaQLEmSJNUYkiVJ\nkqQaQ7IkSZJUY0iWJEmSapZrdQEavCJic+BkYBQwHHgQ+GRm3tlHxz8EWDUzv7QMz90aOCszN+qL\nWpZFRNwATMzMx2vrTwD+KTM/8SqO/VHg45T3fTngF8AxmTlv2SuWNNBExHKUtvV/M3OnVtfTahGx\nK7BdZh7VB8f6J+AvmfmKDsOIWACsVm+/l+LYawCnA+8AuoFngVMy8/uvomT1M3uStUwiYnngB8B/\nZOa7qzB6OTA9Ijr64mdk5jeWJSA3aPUk4OObcdCIGAN8lvJB8R5gY+AfwNnN+HmSWmp34DdAZ0RE\nq4tptcz8QV8E5EoHi/6ceLWfH1OBX2Tmxpk5GvgYcKH/hoOLPclaVq8FVgVG9qzIzMsiYi4wPCLe\nR0NPbmPPbtWTugWwBvBb4F+ACZn562rfK4Abq+2rAd8HvpKZG1fbVwX+ALy1eu5kYATwBuCSzPzc\n4gqPiI8BRwPzgceAj2TmQxFxMHBktf4R4OOZeW9EXAjckZlTque/uBwRfwAuArYD1ga+nZmfjohv\nVj/uZxGxU2Y+VCvjHRFxE6UX/tfA4cBo4MrMfHP1c1YE7gfemZmPNTx3TUrjvjIwNzO7I+KzwDsb\nXuNxwB6UL8L3V8d/EvhV9e9wbvU+/DuwWWY+t7j3TFLLHA5cAdwDHAUcFhGXAb/OzK/Ai2fdxmXm\nxKqn9XhKm/gs5QzT7IZ2d01K6D4G+Aal3VwDeAD4UGY+FhGbAl+vjnEfsA6lQ+TmhRz/k5l5a2PB\nETEcOBN4H/B8dYxJwOrAnMwcWe23Ts9yRHwEOIDy2TIPWIHS7l9d7XtKdfjfAf9KacN/DqyZmfMj\nYlj1GsYDDwNfA95V1fnTqs4FEbEH8N/AM5T2cFE6gJMj4r3V489k5vSI+BHwncycWtV1HOXM4H/W\nnr8msGJEdGRmd2beFREfBJ6onrdhVWPPmdgzMvOiiNgfOAHYqPq5vwROzsxvLaZWNYk9yVommfkk\n8Cnghoi4NyIuiYhJwE8zc361W/2beOPym4H3ZOY+wAWUBpSIeD2wPaVXGqA7M38MrBQRm1TrJgI/\nzMy5wH8A+2fmppQPgMkRMWpRdUfExsCpwPsz892UAH58RGxD+dDYuuqdvQL4Xi/fjpUycyvKB8In\nImKdzPxYtW3cQgIywHrA7tWXiGGUBvjnwGMRsWO1z4eBn9QCMsB1lA+H+yOiKyLOBDbNzJuq17gf\npYHdNDM3qfa/oArCHwZOjIgPUD4o9jQgSwNTRLwD2Az4NnAJsF/VRp4PfKRh10nAeRHxNuAk4AOZ\n2QkcAlxTfeGG0u6+OzP3p7QFP8/M92XmesDfquMPB6YBx1dt5BmUL/As4vhXNxy/xxaUtm90Zr6X\nEpI3rrYt7nPhHZQ2eNvqNfZ8LgwD9qX0zkL5XLgHmAN8sFq3A/CHzPwd8FXgV9XP3oQSzo+OiDdQ\nPm92r7Y9sJC3vdG91evcD7ikGp7xdeCgqq4O4EDgnIU89z8pnS6PRMR3I+KYqr5HG97jY6s6xgGf\njIhNM/MSSvt+GiVE32RAbh1DspZZZp5O6YX4BOWb+7HAryNi5GKfWNyamT2N44XAXtXYu4nADzLz\nqdr+3wQ+Wj2exEuN5QeBMRHxOWBKtW6lxfzc7YDrM/Ph6jWckZmHAztSeoEfr9ZfDLyp6ulYku9V\nz3kYeJTSM9BjUUNPrm4Y63YhLw3NOJuqAaZ8AL2i8c3M+Zm5L6Xn+suUnpKLqh54gF0oH6xdEXEb\nZezy+tVz5wAnAj+k9DDd24vXJ6k1DgWuzcy5mfkrylmhQzLzRmCFiNgkIt5OGTv7M0o7sgbw0+r/\n/mWUM2Nvq473YrubmWcAv4iI/4iIsylnolamfMHuzswfVfvdSAmj9OL4Pe4A5kfE7Ig4kdLe3cqS\n3Z6Zz1SPvwNsXgXbHYF7MvP3tf2n8tLnwkcpwRpKG3hIVWMX8N7qdW1Z/Yys9vvGEuo5FyAzfwvc\nSQn/PwDeGBEbUYL5fQtrR6v3bW1gAnArsCtwV0R0AhtQOkq+WdV4E/Aa4D3V0w+rXvNmlLN9ahGH\nW2iZRMRYYGxmfhmYThmLfBylMR1PGcbQGBCXrx3i6Z4HmflgRPya0oh8lIU3ChdSAvgFlIv5bo6I\n1wK3AVcDt1CC9AQWHUyhNOgv9lxExGsopxIX9oVxGCWAdi/htfyt4XF930X5R8PjDuCF6vFlwEkR\nMY7SQz2z/sSqx/6xzPwBpcf7iog4CfhDRBxBOXX3xcz8RrX/CF4e3N8F/JnS4F+OpAGnat/2B/4W\nEfdR2omRwBERcRqlR/QjwN+rx1D+7/80Myc2HGctSifGHjS0uxHxRWAMpd2cQWnrOihtZL09XLCE\n47/sbFlmzo2IdwNjgW2Bb0fE14Brasde3OfCsxFxFbAPpa06n1eaBkyphi5sVb1fVD9jr54wHBGr\nVOu34+Xtc2M7XNfd8LqpnvdCNWTjXMrQkDdRBelGEbE68HnKkL2fU3qGT42InjMA5wFPVGf6ep7z\nBsqQOChfRF5DeX/eRPlypBawJ1nL6i+UYQpjG9b9M2U82R3V9jdHxGrVKakJSzjeVEpP9IqZ+Yv6\nxqqX9v9Rvvn39CKvT/nQ+ExmXks5ZbU8pSFflJ8B20fEG6vlQ4EvAtcDe0fEavCyIHpv9VrGVOtX\no4yD7o35lA+ehflgRKxanXY7mDIkgsz8GyUof5OFn8KD0nCfGhH/3LAuKA3pE8ANwIENPfr/TTlV\nSzUeb2vKqc8dqjFykgaefSkzL6yZmetm5lspvY8jgQ8BF1POpP0rpRMBSth9f0S5OCwidqKMP15h\nIcd/P3B6Zl5G6dQYT2k77wKei4j3V8fYlKp3eTHHf03jgSNiZ8o44F9k5omU9mc0JQSOqEItlOC+\nOD09xVsA/1PfmJl/pwxFuQj4n2oZSht4dFXLCpTe3yMonSnvrHqB4aVe6IXp6NleDfV7GzC72nYB\n5YLKTSjBv+5xyrDBFzt8qi89b6b0bCflPd6n2rY2pYOpszqjejnl4uwvAFdWnxNqAUOylkk1HmwC\ncEo1JnkOcCVwUGbek5l3UQJtF+Vb9MNLOOT3KT26Uxezz/nAuykfDgC3U4YNZET8inKK7U5eeeqv\nse45wCcpY6lvo3xQHJqZP6WMY5sREXdQxqDtUj3tTMrQi7uASylBu8fixtddA8ysxhXW3QlcS/mA\neYIyTrrHhZQxdJcu4jVcXNU0PSLuiog7KUMqdqxOpU6lvC+3Vq/lXcBHqh6fs4H9MvOvVD0aEfGm\nhf0cSS11KPCVxhXVdRhnAEdl5iOUi35/k5l/rrbfSfnSfWXVvn0B2LX68l13IvCViPglpUf2FuBt\nmfkPSvD+QkR0Ua77+BPw7FIc/zpK6JtTHX8L4PNZpqj8FHB9RMxm8T25ZLmY+wVgWmY+v4jdzqcM\np2jsaf534LVV+/e/lHb2S9X1Hf8GXF59ZixuOF03sG51lvM8YO/qWhwy8y+Ui/6uqN6vet3/oHy2\njI2I+yLidso0nddn5sWZ+QKwG6Uz4zeUTprjqw6ik4E/ZeY3q4sDH6OMA1cLdHR3t3qWLEmNIuLT\nwNqZeUSra5HUfiLiS8BpmfmX6sv1/wLrpvOwAy+eUZwNbLWIC7M1RDR9THL1TXRutfgHyrekiyin\njOf0BIGIOIjyDfUF4KTq9LnUVqqxh4/y0hXbUp+ohj2dTTnt/RxwYGbe17B9H16aGvHCzHzFWEu1\njQcoZ9V6rpU4wIBcRMSBlJ7dkwzIQ19Te5KrsUA/r6ZQ6Vn3PeDLmXlLRJxDOc1wK/Bjyvie1wIz\ngc7qlIQk6VWKiN0pp8Y/FhGbAZMzc0LD9oeBt1Pmvr0TGFOd3pekttTsnuTRlPltb6BcEHA8sElm\n3lJtv44ybmcBMDPL/LrzIuIeyoVFXU2uT5LaxZaUTgmy3FxiTG37b4DX89K4esfiSWprzb5w71nK\nuKYdKPP+XcbLp195CliFcrVuY4/F05S7uUmS+sYqvLyd7blLWY/fUjom7qDcrMfT65LaWrN7ku8G\n7oUyG0JE/JUypKLHSMqUMPMoDXh9/SJ1dXXZyyFpSOjs7OzN3Nqv1jwabiMPDMvMBQDVlFg7U672\nfwa4LCL2zMxXTLvVwzZY0lCysHa42SH5Y5T5FY+opplaBfhRRGyd5Ra6H6DMu/hLyg0UlgdWBDbk\npTv8LFJnZ+eSdhkyurq62ur1avDyd3XpdHX126iyWZRpDadFxOaUHuMecyln/v6emd0R8Shl6MVi\n+e+8dPy/of7g79nSW1Q73OyQfAFwYUTcQhl3/FHgr8DU6i5gd1HmP+yOiDMoF+x1AMctZk5ESdLS\nuwYYHxGzquVJETGRcmfHqRFxHmVe778Dv6fMQiRJbaupIbmanWLfhWwat5B9L+ClW2tKkvpQdaOZ\nw2qr727Y/g3KDYAkSXjHPUmSJOkVDMmSJElSjSFZkiRJqjEkS5IkSTWGZEmSJKmm2VPASWpXn+/j\nm2Z+fu6S96k8//zz7LjjjowfP55JkyaxxhprLPE5Rx99NBMnTuS9733vq6lSkjREGJIlDTnd3d10\ndHQwefLkVpciSRqkDMmShoRnn32WY445hqeeeoq1116b7u5u9ttvP0488URWX311jjvuOObOLb3R\nn/nMZ1h//fW57LLLmDZtGquvvjqPP/54i1+BJGkgMSRLGhKuvPJKNthgA4466ihuv/12br31Vjo6\nOgA499xzGTt2LB/+8Id54IEHmDx5MmeeeSaXXHIJ1157LQB77rlnK8uXJA0whmRJQ8L999/PuHHj\nANh4440ZMWLEi9vuvvtuZs+ezfTp0+nu7mbevHk8+OCDbLDBBiy3XGkGN9poo1aULUkaoAzJkoaE\n9dZbj9tuu41tt92WO++8kxdeeOFl2971rnex88478/jjjzNt2jTWWWcd7rnnHp5//nmGDx/OnXfe\nyW677dbCVyBJGkgMyZKGhIkTJ/KpT32KffbZh3XXXZcVVljhxW2HHHIIxx9/PFdeeSXPPPMMRx55\nJKNGjeKggw5i7733ZtSoUay00kotrF6SNNAYkiU1x1JM2dYXll9+eU4//fRFbv/617/+inV77rmn\nY5ElSQvlzUQkSZKkGkOyJEmSVGNIliRJkmoG95jkvr7t7UC264xWVyBJktQ27EmWJEmSagZ3T7Ik\nSRqyrrn1kVaXMAitxYO+b0vlzSMWvt6QLKkp3vLpa/v0ePefunOfHm9Rfve73zFjxgwOP/zwZXr+\nfffdxwknnMCll17ax5VJkvqTIVmSGmy44YZsuOGGr+oYHR0dfVSNJKlVDMmShoT777+fyZMns9xy\ny9Hd3c1ee+3FTTfdxJQpUwDYcsstmTlzJpMnT+aJJ55g7ty5vPWtb2XTTTdlwoQJPPbYYxx88MF8\n+tOf5sorr2SXXXbhxz/+MaeccgoAe+yxB1OnTmX27NlcfPHFDB8+nM7OTo4++mj+8pe/cMwxxwCw\n2mqrtew9kCT1HS/ckzQkzJo1i9GjR3PRRRfx8Y9/nKeffnqRPbpbbLEFV1xxBQcccADXXHMNAN/7\n3vdevPteR0cH48aN4ze/+Q3PPfccd9xxB2uvvTbDhw/nrLPO4uKLL+ayyy7jz3/+Mz//+c8599xz\n2WWXXbj44ovZbrvt+u01S5Kax5AsaUjYa6+9WHnllTnggAO4/PLLGT58+CL3fetb3wrAeuutx4IF\nC3j44YeZPn06u+2224v7DBs2jB122IEbbriBq6++mg996EM88MADPP744xx00EHst99+/P73v+eP\nf/wj999/PxtvvDEAnZ2dzX2hkqR+YUiWNCT85Cc/YcyYMVx00UXssMMOTJ8+nUcffRSAhx56iCef\nfPLFfYcNe6np23PPPTnttNNYf/31WXnllV92zD333JPvf//73H777bzvfe9jrbXWYs011+TCCy/k\n0ksvZd9992X06NG87W1v47bbbgPg9ttv74dXK0lqNsckSxoSNtpoI4499ljOOeccFixYwKc+9SnO\nPfdc9t57b9Zdd13WXnvthT5vxx135OSTT+acc855xba11loLgO233x6AUaNGMWnSJPbZZx8WLFjA\nWmutxU477cShhx7KMcccw/Tp0198jiRpcOvo7u5udQ3LpKurq7vzB9u2uox+07XrDE/jalDo6ury\nd3UpVO/XoJsOo6urq9t/56Xj/42l5zzJ6g9vHvF/C22HHW4hSZIk1RiSJUmSpBpDsiRJklTjhXuS\n1AYiogM4GxgNPAccmJn3VdveCFwJdAMdwLuBYzPzvBaVK0ktZ0iWpPYwAVghM8dGxGbAlGodmfkI\nsA1ARGwO/DdwfqsKlaSBwOEWktQetgSuB8jM2cCYRex3JnBoZg7OqY8kqY8YkiWpPawCzG1Ynh8R\nL/sMiIhdgTmZeW+/ViZJA5DDLSSpPcwDRjYsD8vMBbV99gVO7+0Bu7q6+qKutuJ7trS8OY9ax5As\nSe1hFrALMK0ad3zHQvYZk5m/6O0BvTHG0vFmIkvvQW8mohYyJEtSe7gGGB8Rs6rlSRExEVgpM6dG\nxGq8fDiGJLU1Q7IktYHqQrzDaqvvbtj+GLBJvxYlSQOYF+5JkiRJNYZkSZIkqcaQLEmSJNUYkiVJ\nkqQaQ7IkSZJUY0iWJEmSagzJkiRJUo0hWZIkSaoxJEuSJEk1hmRJkiSpxpAsSZIk1RiSJUmSpBpD\nsiRJklRjSJYkSZJqDMmSJElSjSFZkiRJqlmu2T8gIt4A/ArYHvgHcBGwAJiTmUdU+xwEHAy8AJyU\nmdc2uy5JkiRpUZrakxwRywHnAs9Wq6YAx2Xm1sCwiNgtIt4IHAlsAewInBIRI5pZlyRJkrQ4zR5u\n8WXgHOBhoAPYJDNvqbZdB4wHNgVmZub8zJwH3ANs3OS6JEmSpEVqWkiOiI8Cj2bmjykBuf7zngJW\nAUYCcxvWPw2s2qy6JEmSpCVp5pjkScCCiBgPjAYuAVZv2D4SeBKYRwnL9fWq6erqanUJUq/4uypJ\nGuyaFpKrcccARMQM4FDgtIjYKjNvBj4AzAB+CZwUEcsDKwIbAnOaVddg1tnZ2eoSpCXq6uryd3Up\n+IVCkgamps9uUXMMcH51Yd5dwLTM7I6IM4CZlGEZx2Xm8/1clyRJkvSifgnJmbltw+K4hWy/ALig\nP2qRJEmSlsSbiUiSJEk1hmRJkiSpxpAsSZIk1RiSJUmSpBpDsiRJklRjSJYkSZJqDMmSJElSTX/f\nTESS1AIR0QGcDYwGngMOzMz7Gra/F/hKtfhnYF9v7CSpndmTLEntYQKwQmaOBSYDU2rbzwM+mplb\nAdcD6/RzfZI0oBiSJak9bEkJv2TmbGBMz4aI2AD4K3B0RNwIjMrMe1pRpCQNFIZkSWoPqwBzG5bn\nR0TPZ8BqwBbAGcD2wPYRMa5/y5OkgcUxyZLUHuYBIxuWh2XmgurxX4F7M/NugIi4ntLTfOPiDtjV\n1dWEMoc237OltVarC1AbMyRLUnuYBewCTIuIzYE7GrbdB6wcEetWF/P9CzB1SQfs7OxsSqFDVVdX\nl+/ZUnrw1kdaXYLamCFZktrDNcD4iJhVLU+KiInASpk5NSIOAK6ICICfZ+Z1rSpUkgYCQ7IktYHM\n7AYOq62+u2H7jcBm/VmTJA1kXrgnSZIk1RiSJUmSpBpDsiRJklRjSJYkSZJqDMmSJElSjSFZkiRJ\nqjEkS5IkSTWGZEmSJKnGkCxJkiTVGJIlSZKkGkOyJEmSVGNIliRJkmoMyZIkSVKNIVmSJEmqMSRL\nkiRJNYZkSZIkqcaQLEmSJNUYkiVJkqQaQ7IkSZJUY0iWJEmSagzJkiRJUo0hWZIkSaoxJEuSJEk1\nhmRJkiSpxpAsSZIk1RiSJUmSpBpDsiRJklRjSJYkSZJqDMmSJElSjSFZkiRJqjEkS5IkSTXLtboA\nSVLzRUQHcDYwGngOODAz72vYfhRwIPBoteqQzLyn3wuVpAHCkCxJ7WECsEJmjo2IzYAp1boencB+\nmXlbS6qTpAHG4RaS1B62BK4HyMzZwJja9k5gckTcEhGf7u/iJGmgMSRLUntYBZjbsDw/Iho/A64A\nDgW2AbaMiJ36szhJGmgcbiFJ7WEeMLJheVhmLmhY/lpmzgOIiGuB9wDTF3fArq6uPi9yqPM9W1pr\ntboAtTFDsiS1h1nALsC0iNgcuKNnQ0SsAsyJiA2BvwHbAhcs6YCdnZ1NKnVo6urq8j1bSg/e+kir\nS1AbMyRL0iASEStThkSsDywA7gV+kpnPLeGp1wDjI2JWtTwpIiYCK2Xm1IiYDNxImfnip5l5fVNe\ngCQNEk0NydV4t/OBoDTmhwJ/By6qludk5hHVvgcBBwMvACdl5rXNrE2SBpOIeC1wArAHcDvwAKW9\nHAt8NSKuBv4rM59e2PMzsxs4rLb67obtlwGXNaF0SRqUmt2TvCvQnZlbRsTWwMlAB3BcZt4SEedE\nxG7ArcCRwCbAa4GZEfGjzHyhyfVJ0mDxLeA8YHJtLHFPh8Qu1T4TFvJcSdJSampIzszvRcQPqsV1\ngCeA7TPzlmrddcD7Kb3KMzNzPjAvIu4BNga8wkGSij2r3uBXqELz9xvaW0nSq9T0McmZuSAiLqL0\nbuwFjG/Y/BRlWqKRvHxqoqeBVZtdmyQNIp+NiEVuzMwTFxWiJUlLr18u3MvMj0bEG4BfAis2bBoJ\nPEmZmmiVhaxXA6cO0mDh72pTdFR/b0qZF+sqYD6wO3B/i2qSpCGr2Rfu7QuslZmnUq6Y/gfwq4jY\nOjNvAj4AzKCE55MiYnlKiN4QmNPM2gYjpw7SYOA0V0unt18oMvMLANXsFFtk5rPV8unAz5pWoCS1\nqWb3JF8NXBgRN1U/6xPA74CpETECuAuYlpndEXEGMJOXLux7vsm1SdJgtDrQOKxiBDCqRbVI0pDV\n7Av3ngX2XsimcQvZ9wJ6MXm9JLW58yln5KYDPbNanN7akiRp6BnW6gIkSb2XmacB+wN/Bh4CPpSZ\n57S2KkkaegzJkjT4BGWIxTeA0S2uRZKGJEOyJA0iEXEqsBPlznvDKbeX/kprq5KkoceQLEmDyw7A\nfsBzmTmPMvf8B1pbkiQNPYZkSRpcem5J3TPDxQoN6yRJfcSQLEmDy3eAbwOjIuIo4Gbg8taWJElD\nT7/ccU+S1Dcy84sRsQPwAPBm4ITM/GGLy5KkIceQLEmDSER8F/gWcLw3XZKk5nG4hSQNLucDE4Df\nR8TUiBjX4nokaUgyJEvSIJKZ12bmvsAGwPXAVyLigRaXJUlDjsMtJGmQiYh3AB8G9gL+iLellqQ+\nZ0iWpEEkIu4A5lPGJW+bmX9qcUmSNCQZkiVpcPm3zLyj1UVI0lBnSJakQSAizsvMg4EzIqK7vj0z\nt21BWZI0ZPUqJEfEOzPzt7V1m2fmrc0pS5JU843q78+3sghJaheLDckR8T5gODA1Ig4AOhqedy7l\n6mpJUpNlZlf18GjgUuD7zpMsSc2zpJ7k8cDWwJrAiQ3r5/NSr4Ykqf+cB0wEvhoRNwDfyswbW1uS\nJA09iw3Jmfl5gIjYLzMv7ZeKJEmLlJnXAtdGxIrAzpR5klfLzHVaXJokDSm9vXDv5og4DRjFS0Mu\nyMyPNaUqSdIiOU+yJDVfb0Pyd4Bbqj+vuKpaktQ/GuZJvhTnSZakpultSB6Rmcc0tRJJUm+cl5ln\ntroISRrqhvVyv5kRsWtELN/UaiRJS3JIqwuQpHbQ257kfwU+DhARPeu6M3N4M4qSJC3SHyNiBjAb\n+FvPysw8cdFPkSQtrV6F5Mx8U7MLkST1SuNNnDoWuVdNRHQAZwOjgeeAAzPzvoXs9w3gr5l53Kst\nVJIGs97ece9zC1tvz4Uk9a/M/MIyPnUCsEJmjo2IzYAp1boXRcQhwLuAm15dlZI0+PV2uEVjb8UI\nYEfKqT5JUj+KiAW8cpahhzNz7SU8dUvgeoDMnB0RY2rH3QJ4L+VGURv2UbmSNGj1drjFy3ouIuK/\ngB81pSJJ0iJl5osXXEfECEpv8Ba9eOoqwNyG5fkRMSwzF0TEGsAJ1bH27st6JWmw6m1Pct3KwJv7\nshBJ0tLJzBeAqyLi+F7sPg8Y2bA8LDMXVI/3Av4JmA6sCawYEb/LzEsWd8Curq5lqLq9+Z4trbVa\nXYDaWG/HJP+Bl07vDQNeB5zWrKIkSQsXEfs3LHYA7wSe78VTZwG7ANMiYnPgjp4N1bzLZ1bH/wgQ\nSwrIAJ2dnUtRubq6unzPltKDtz7S6hLUxnrbkzyu4XE38GRmzuv7ciRJS7BNw+Nu4DF6N0TiGmB8\nRMyqlidFxERgpcyc2sc1StKg19uQ/CBwKLBd9ZwZEXFWw6k6SVI/yMxJy/i8buCw2uq7F7Lfxcty\nfEkaanobkr8ErA98k3J6bxKwLnBUk+qSJDWIiNcCJwLfycz/FxFTgIOA24CJmflQSwuUpCGmtyH5\n/cB7enqOI+JaGsazSZKa7nRgPnB/ROwE7AO8hzKv8VnA7i2sTZKGnN6G5OWqP883LP+jKRVJkhZm\ni8zcCCAidqP0KN8L3BsRJ7e2NEkaenobki8DboyIK6rlicDlzSlJkrQQjR0T44BPNSwv37+lSNLQ\nt8SQHBGvB86njHvbtvpzemZe2uTaJEkv+WtEbAqsBPwz8BOAiBgH/F8L65KkIWnY4jZGxHuAO4HO\nzLwuMz8J3ACcGhEb90eBkiQA/gO4CJgGHJ6Zz0TEZ4DvAJ9sZWGSNBQtqSf5y5Srpm/sWZGZx0XE\nTcAUYPuuoCUpAAAO00lEQVQm1iZJqmTm7cA7aquvBM7MzLkLeYok6VVYUkh+fWNA7pGZN0TEF5tT\nkqTBbM+r/gxXXdvqMvrF/afu3G8/KyJOAU5tDMTVhXs920cBx2bmsf1WlCQNYUsKySMiYlj9piER\nMQwvFJGk/vQd4HsR8TBwM2Uc8nxgHcq1Im/Cueslqc8sKSTfBJxQ/Wn0GeBXTalIkvQKmXkbMC4i\ntgE+COwCLAB+D3wjM2e0sj5JGmqWFJInA9MjYh/gl5S77W0CPEpppCVJ/Sgzfwb8rNV1SNJQt9iQ\nnJlPRcRWwDaUOzstAL6embf0R3GSpJeLiB2A/wZGUTouAMjMdVtWlCQNQUucJzkzu4EZ1R9JUmud\nCRwNzAG6W1yLJA1Zvb3jniRpYHgsM3/Y6iIkaagzJEvS4HJLREwBrgee61mZmTe3riRJGnoMyZI0\nuGxa/f2ehnXdlGngJEl9xJAsSYNIZm7T6hokqR0YkiVpEImILYFPAitTZrcYDqyTmW9pZV2SNNQM\na3UBkqSlMhX4LqWT4+vAPcA1La1IkoYgQ7IkDS5/y8wLgRuBJ4CDgK1bWpEkDUGGZEkaXJ6LiFFA\nAptXc9mv1OKaJGnIMSRL0uAyBfg28ANg/4j4LfCr1pYkSUOPIVmSBpHMvAp4f2Y+BXQC+wL7tbYq\nSRp6mja7RUQsB3wTeAuwPHAScCdwEbAAmJOZR1T7HgQcDLwAnJSZ1zarLkkazCLi9cCXImI9YC/g\nSOA/KeOTJUl9pJk9yftSbp+6FbAjcBblNOFxmbk1MCwidouIN1Ia+S2q/U6JiBFNrEuSBrPzgV8C\n/wQ8BfwJ+FZLK5KkIaiZIfk7wGerx8OB+cAmmXlLte46YDzl7lEzM3N+Zs6jTGe0cRPrkqTB7K2Z\neR6wIDOfz8zjgbVaXZQkDTVNG26Rmc8CRMRI4CrgeODLDbs8BawCjATmNqx/Gli1WXVJ0iA3PyJW\npdyKmohYnzKETZLUh5p6x72IWBu4GjgrM6+MiC81bB4JPAnMo4Tl+nrVdHV1tboESQ1a9H/yBMoc\nyWtHxHcpQ9U+1opCJGkoa+aFe28EbgCOyMyfVatvi4itMvNm4APADMrYupMiYnlgRWBDYE6z6hrM\nOjs7W12CtGRXtc91t33xf3IZgnYX5Q57uwJvpnREdALt88ZLUj9oZk/yZOB1wGcj4nOUU4P/DpxZ\nXZh3FzAtM7sj4gxgJtBBubDv+SbWJUmD2XTgduCHDes6WlSLJA1ZzRyTfBRw1EI2jVvIvhcAFzSr\nFkkaSjLzgFbXIElDXVPHJEuS+tx3I+JAynC1+T0rM/PBxT0pIjqAs4HRwHPAgZl5X8P2PYFjKRcB\nXp6ZZzShdkkaNLzjniQNLqsCXwV+CtxU/bmxF8+bAKyQmWMpw+Gm9GyIiGHAycC2wFjg8IgY1bdl\nS9LgYk+yJA0uewJvyMy/LeXztgSuB8jM2RExpmdDZi6IiLdXf7+B0oHitSGS2po9yZI0uNwHvH4Z\nnrcKL5+Tfn7Vgwy8GJR3B/6X0jP9zKspUpIGO3uSJWlw6QbujIg5NPT2Zua2S3jePMo89D2GZebL\nbkKSmdcA10TExcD+wMWLO6Bzty8937Ol5c0k1TqGZEkaXE5axufNAnYBpkXE5sAdPRuqO6P+AHh/\nNQXnM/TiLn7O3b50urq6fM+W0oO3PtLqEtTGDMmSNIhk5k3L+NRrgPERMatanhQRE4GVMnNqRHwL\nuDkinqfMw/ytPihXkgYtQ7IktYHM7AYOq62+u2H7VGBqvxYlSQOYF+5JkiRJNYZkSZIkqcaQLEmS\nJNUYkiVJkqQaQ7IkSZJUY0iWJEmSagzJkiRJUo0hWZIkSaoxJEuSJEk1hmRJkiSpxpAsSZIk1RiS\nJUmSpBpDsiRJklRjSJYkSZJqDMmSJElSjSFZkiRJqjEkS5IkSTWGZEmSJKnGkCxJkiTVGJIlSZKk\nGkOyJEmSVGNIliRJkmoMyZIkSVKNIVmSJEmqMSRLkiRJNYZkSZIkqcaQLEmSJNUYkiVJkqQaQ7Ik\nSZJUY0iWJEmSapZrdQGSpOaLiA7gbGA08BxwYGbe17B9IvDvwAvAHZl5eEsKlaQBwp5kSWoPE4AV\nMnMsMBmY0rMhIl4DnAhsnZn/ArwuInZpTZmSNDAYkiWpPWwJXA+QmbOBMQ3b/g6Mzcy/V8vLUXqb\nJaltGZIlqT2sAsxtWJ4fEcMAMrM7M/8CEBFHAitl5k9aUKMkDRiOSZak9jAPGNmwPCwzF/QsVGOW\nvwSsD+zRz7VJ0oBjSJak9jAL2AWYFhGbA3fUtp8H/C0zJ/T2gF1dXX1YXnvwPVtaa7W6ALUxQ7Ik\ntYdrgPERMatanlTNaLES0AVMAm6JiJ8B3cDXMvN7iztgZ2dnM+sdcrq6unzPltKDtz7S6hLUxgzJ\nktQGMrMbOKy2+u6Gx34eSFIDL9yTJEmSauw5kCQtk7d8+tpWlzD4XOV7tjS+OmHMkneSmsSeZEmS\nJKnGkCxJkiTVGJIlSZKkGkOyJEmSVOOFe4PEnlf9ua0u+Lj/1J1bXYIkSWpj9iRLkiRJNU3vSY6I\nzYBTM3ObiFgPuAhYAMzJzCOqfQ4CDgZeAE7KzPbpMpUkSdKA09Se5Ij4JHA+sEK1agpwXGZuDQyL\niN0i4o3AkcAWwI7AKRExopl1SZIkSYvT7OEW9wK7Nyx3ZuYt1ePrgPHApsDMzJyfmfOAe4CNm1yX\nJEmStEhNDcmZeQ0wv2FVR8Pjp4BVgJHA3Ib1TwOrNrMuSZIkaXH6e3aLBQ2PRwJPAvMoYbm+Xm2s\nq6ur1SVIS+TvqSQNXf0dkn8dEVtl5s3AB4AZwC+BkyJieWBFYENgTj/XpQGms7Oz1SVoWbXRVIV9\n8Xtq0Jakgam/Q/IxwPnVhXl3AdMyszsizgBmUoZjHJeZz/dzXZIkSdKLmh6SM/MBYGz1+B5g3EL2\nuQC4oNm1SJIkSb3hzUQkSZKkGkOyJEmSVGNIliRJkmoMyZIkSVKNIVmSJEmqMSRLkiRJNYZkSZIk\nqcaQLEmSJNX09x33pPbz+VVbXUE/u7zVBUiS9KrZkyxJkiTVGJIlSZKkGkOyJEmSVGNIliRJkmoM\nyZIkSVKNIVmSJEmqcQo4SWoDEdEBnA2MBp4DDszM+2r7vBb4EfCxzLy7/6uUpIHDnmRJag8TgBUy\ncywwGZjSuDEiOoGbgHVbUJskDTiGZElqD1sC1wNk5mxgTG378pQg/bt+rkuSBiRDsiS1h1WAuQ3L\n8yPixc+AzPxFZj4EdPR7ZZI0ABmSJak9zANGNiwPy8wFrSpGkgY6Q7IktYdZwE4AEbE5cEdry5Gk\ngc3ZLSSpPVwDjI+IWdXypIiYCKyUmVMb9uvu/9IkaeAxJEtSG8jMbuCw2upXTPOWmdv2T0WSNLA5\n3EKSJEmqMSRLkiRJNYZkSZIkqcaQLEmSJNUYkiVJkqQaQ7IkSZJUY0iWJEmSagzJkiRJUo0hWZIk\nSaoxJEuSJEk1hmRJkiSpxpAsSZIk1RiSJUmSpBpDsiRJklRjSJYkSZJqDMmSJElSjSFZkiRJqjEk\nS5IkSTWGZEmSJKnGkCxJkiTVGJIlSZKkGkOyJEmSVGNIliRJkmoMyZIkSVKNIVmSJEmqMSRLkiRJ\nNYZkSZIkqcaQLEmSJNUYkiVJkqQaQ7IkSZJUY0iWJEmSapZrdQE9IqIDOBsYDTwHHJiZ97W2Kkka\nGpbUxkbErsBngReACzNzaksKlaQBYiD1JE8AVsjMscBkYEqL65GkoWSRbWxELFctbw+MAw6OiNVb\nUaQkDRQDKSRvCVwPkJmzgTGtLUeShpTFtbFvB+7JzHmZ+QIwE9iq/0uUpIFjIIXkVYC5DcvzI2Ig\n1SdJg9ni2tj6tqeAVfurMEkaiAbMmGRgHjCyYXlYZi5Y3BO6dp3R3IoGkP9pdQH9rKurq9Ul9J02\n+j2F9vpdHWS/p4trY+dRgnKPkcCTSzrg/+y1Rt9VJy3U/7W6ALWxgRSSZwG7ANMiYnPgjsXt3NnZ\n2dEvVUnS0LC4NvYu4G0R8TrgWcpQi9MWdzDbYElDXUd3d3erawBeduX1xtWqSZl5dwtLkqQhY2Ft\nLNAJrJSZUyNiZ+AEoAO4IDPPbU2lkjQwDJiQLEmSJA0UXhgnSZIk1RiSJUmSpBpDsiRJklQzkGa3\nUC9FxEeADTNzcqtr0dAUEcOBnwAjgJ0zc+4SntLb4/4pM9fsi2NJg5ntuBbHNnhgMCQPXl5xqWb6\nZ2DlzHxvHx/X31vpJf5/0KLYBg8AhuQWq3oTdgVWBNYAzgB2A94JfBJYG9gDeC3wGLB77fkfB/4N\nWABcmZln9VvxGsrOAdaPiG9Sbiwxqlr/icz8bUTcQ5l3dwNgBuXubJsCmZn7R8Q7gSmUIV2rAYdl\n5q09B4+IjYCvVYt/BT6WmU/1w+uS+pztuJrANngAcEzywLByZu4MfAk4NDP3AA4BDgBGZeZ2mbkF\n5bTLi98qI+LtwN7A+yiT/+8eEev3e/Uaig6n3GDiEeAnmbkd5XeyZ+7ctwDHU37vPgGclZmbAVtG\nxCqUcHB0Zo6n/F5Pqh3/PODwzNwWuA44trkvR2o623H1JdvgAcCe5IHhturvJyn/KQCeAJYHXoiI\nK4BnKKdfRjQ8713AOsBPKTcAeB2wPnBPP9Ss9rAxsG1E7E35HXt9tf6vmfkQQEQ8nZlZrX8SeA3w\nEPC5iHiWcrvj+ni6twNnRwSU32l/ZzXY2Y6rGWyDW8ie5IFhUWOElgd2y8yJwJHAcMp/kh4JzMnM\nbTNzG+Bi4PamVqp2cxfw1aq34UPAt6r1jb+zHbXHHZTTzZ/LzEmU2x931Pb9HbB/ddxjgR82p3yp\n39iOqxlsg1vInuSB7QXgmYiYWS0/DLypZ2Nm3h4RM6rtKwCzKd8epb7QDZwEfDMiDqGMi/t8wzYW\n8bgbuBSYFhGPA/9HGRPXuO/hwKURsRxlHOYBzXgB0gBgO65lZRvcYt6WWpIkSapxuIUkSZJUY0iW\nJEmSagzJkiRJUo0hWZIkSaoxJEuSJEk1hmRJkiSpxpAsSZIk1RiSJUmSpJr/D62wcmT6crERAAAA\nAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x119c9ff10>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"fig, (axis1, axis2) = plt.subplots(1, 2, figsize=(10, 5))\n", | |
"\n", | |
"total_male = X_train[X_train.Sex == 'male'].shape[0]\n", | |
"total_female = X_train[X_train.Sex == 'female'].shape[0]\n", | |
"\n", | |
"died_male = total_male - X_train[X_train.Sex == 'male'].Survived.sum()\n", | |
"died_female = total_female - X_train[X_train.Sex == 'female'].Survived.sum()\n", | |
"survived_male = X_train[X_train.Sex == 'male'].Survived.sum()\n", | |
"survived_female = X_train[X_train.Sex == 'female'].Survived.sum()\n", | |
"\n", | |
"colors = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a']\n", | |
"N = 2\n", | |
"d = (died_male, died_female)\n", | |
"s = (survived_male, survived_female)\n", | |
"\n", | |
"ind = np.arange(N) # the x locations for the groups\n", | |
"width = 0.35 # the width of the bars\n", | |
"margin = 0.1\n", | |
"\n", | |
"rects1 = axis1.bar(ind, d, width, color=colors[2], lw=0.0)\n", | |
"rects2 = axis1.bar(ind + width, s, width, color=colors[0], lw=0.0)\n", | |
"\n", | |
"axis1.set_ylabel('Count')\n", | |
"axis1.set_title('Survival count by Sex')\n", | |
"axis1.set_xticks(ind + width)\n", | |
"axis1.set_xticklabels(('male', 'female'))\n", | |
"\n", | |
"axis1.legend((rects1[0], rects2[0]), ('died', 'survived'))\n", | |
"\n", | |
"##\n", | |
"\n", | |
"N = 1\n", | |
"ind = np.arange(N)\n", | |
"m = (survived_male/total_male)\n", | |
"f = (survived_female/total_female)\n", | |
"\n", | |
"rects1 = axis2.bar(ind, m, width, color=colors[0], lw=0.0)\n", | |
"rects2 = axis2.bar(ind + width, f, width, color=colors[1], lw=0.0)\n", | |
"\n", | |
"axis2.set_ylabel('mean(Survived)')\n", | |
"axis2.set_title('Average survived by Sex')\n", | |
"axis2.set_xticks((width/2, width*1.5))\n", | |
"axis2.set_xticklabels(('male', 'female'))\n", | |
"\n", | |
"plt.tight_layout()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# create dummy variables\n", | |
"_feature = 'Sex'\n", | |
"\n", | |
"# train\n", | |
"dummies = pd.get_dummies(X_train[_feature])\n", | |
"X_train = X_train.join(dummies)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# drop male as it has the lower average of survived passengers\n", | |
"features = ['female']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('RandomForestClassifier', 0.78671502492918788),\n", | |
" ('LogisticRegression', 0.78671502492918788),\n", | |
" ('SVC', 0.78671502492918788),\n", | |
" ('GaussianNB', 0.78671502492918788),\n", | |
" ('KNeighborsClassifier', 0.78671502492918788)]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X = DataFrame(X_train[features])\n", | |
"y = y_train\n", | |
"scores = check_classifiers(X, y)\n", | |
"scores" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Features</th>\n", | |
" <th>Coefficient Estimate</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>female</td>\n", | |
" <td>2.430709</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Features Coefficient Estimate\n", | |
"0 female 2.430709" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# get Correlation Coefficient for each feature using Logistic Regression\n", | |
"coeff_df = DataFrame(X.columns)\n", | |
"coeff_df.columns = ['Features']\n", | |
"classifier = LogisticRegression()\n", | |
"coeff_df[\"Coefficient Estimate\"] = pd.Series(classifier.fit(X, y).coef_[0])\n", | |
"\n", | |
"# preview\n", | |
"coeff_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Female passengers were 11.37 times more likely to survive.\n" | |
] | |
} | |
], | |
"source": [ | |
"print 'Female passengers were %.2f times more likely to survive.' %math.exp(2.430709)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment