Skip to content

Instantly share code, notes, and snippets.

@kellrott
Created March 2, 2017 17:27
Show Gist options
  • Save kellrott/ca7aa26fbbe003ee7d3216331e0e6374 to your computer and use it in GitHub Desktop.
Save kellrott/ca7aa26fbbe003ee7d3216331e0e6374 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Turn on Graphics"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Download the data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import urllib\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"('wdbc.data', <httplib.HTTPMessage instance at 0x1093e4a70>)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"urllib.urlretrieve(\"http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data\", \"wdbc.data\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load the Data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import pandas"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"names = [\n",
" \"ID\",\n",
" \"Diagnosis\",\n",
" \"radius\",\n",
" \"texture\",\n",
" \"perimeter\",\n",
" \"area\",\n",
" \"smoothness\",\n",
" \"compactness\",\n",
" \"concavity\",\n",
" \"concave_points\",\n",
" \"symmetry\",\n",
" \"fractal_dimension\"\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"m = pandas.read_csv(\"wdbc.data\", names=names, usecols=names, header=None, index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"labels_named = m[\"Diagnosis\"]\n",
"features = m[names[2:]]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"labels = labels_named.map( {\"M\" : 1.0, \"B\" : 0.0} )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Do a simple linear regression"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
"from sklearn.metrics import roc_curve, auc"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python2.7/site-packages/scipy/linalg/basic.py:884: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n",
" warnings.warn(mesg, RuntimeWarning)\n"
]
},
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lg = LinearRegression()\n",
"lg.fit(features, labels)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"pred = lg.predict(features)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x10cd30f90>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAD/lJREFUeJzt3XFsnPV9x/HPJ46hrtbVrPFaYhIMWvDKSrcwC6gmbVTQ\nmaIqiSibSFUVKrpIbGzSNlkimgSITWsra5tUFY1lbQRUGmRDUeZpqawOUjFtC8UsQAirVy+jTRw2\n3ARnqnBL4nz3xz2JjLmz73zPPY/vfu+XdMpz3/vpfr9fnHzu8e957nkcEQIApGVN2QMAABSP8AeA\nBBH+AJAgwh8AEkT4A0CCCH8ASBDhDwAJIvwBIEGEPwAkaG3ZA6hl3bp1MTAwUPYwAKCtvPDCCz+M\niL7l2q3a8B8YGNDExETZwwCAtmL7+/W0Y9kHABJE+ANAggh/AEgQ4Q8ACSL8ASBBhD8AJIjwB4AE\nrdrz/AEgBfsOTWt0fFInZue0vrdHI8OD2ra5v+X9Ev4AUJJ9h6a1c+9hzZ2ZlyRNz85p597DktTy\nDwCWfQCgJKPjkxeC/7y5M/MaHZ9sed+EPwCU5MTsXEP1PBH+AFCS9b09DdXzRPgDQElGhgfV0931\njlpPd5dGhgdb3jcHfAGgJOcP6nK2DwAkZtvm/kLCfjGWfQAgQYQ/ACSI8AeABBH+AJAgDvgCQIm4\ntg8AJKbtr+1je7ftN2y/UuP1G22ftv1i9rg/j34BoJ2VeW2fvPb8H5X0VUmPL9HmnyPiUzn1BwBt\nb7rGNXxq1fOUy55/RDwr6VQe7wUAqVjjxuq59t36Li74mO2XbH/T9i8U2C8ArErnorF6noo64Pvv\nki6PiB/ZvlXSPkmbFjeyvUPSDknauHFjQUMDgPQUsucfEf8XET/KtvdL6ra9rkq7XRExFBFDfX19\nRQwNAErT29PdUD1PhYS/7Q/ZdrZ9XdbvySL6BoDV6lO/eGlD9Tzlsuxj+wlJN0paZ/u4pAckdUtS\nRDwi6XZJ99g+K2lO0h0RUcCqFgCsXge+O9NQPU+5hH9EbF/m9a+qciooACDT9qd6AgAa5xqndNaq\n54nwB4CS1Fr8LmJRnPAHgAQR/gBQku4aCVyrnifCHwBKcvZcY/U8Ef4AUJJaS/tFnAdP+ANAggh/\nACjJe2ss7teq54nwB4CS/OltH33X5ZvXuFJvNcIfAEqybXO/PnP9RnVl3+rqsvWZ6zcWcg9fwh8A\nSrLv0LT2fOeY5rNvdc1HaM93jmnfoemW9034A0BJHhw7ojOL7txy5lzowbEjLe+b8AeAkszOnWmo\nnifCHwASRPgDQIIIfwBIEOEPAAki/AEgQYQ/ACSI8AeABBH+AJAgwh8AEkT4A0CCCH8ASBDhDwAJ\nIvwBIEGEPwAkiPAHgJLYjdXzRPgDQEkiGqvnifAHgJLU2sEvYMef8AeAstTawS9gx5/wB4AUEf4A\nkCDCHwASRPgDQIIIfwBIEOEPAAnKJfxt77b9hu1Xarxu21+xPWX7ZdvX5tEvAGBl8trzf1TSLUu8\n/klJm7LHDkl/mVO/AIAVyCX8I+JZSaeWaLJV0uNRcVBSr+1L8+gbANC4otb8+yUdW/D8eFYDAJRg\nVR3wtb3D9oTtiZmZmbKHAwAdq6jwn5a0YcHzy7LaO0TErogYioihvr6+goYGAOkpKvzHJH0uO+vn\nBkmnI+L1gvoGACyyNo83sf2EpBslrbN9XNIDkrolKSIekbRf0q2SpiS9JenzefQLAFiZXMI/IrYv\n83pI+p08+gIANG9VHfAFABSD8AeABBH+AJAgwh8AEkT4A0CCCH8ASBDhDwAJIvwBIEGEPwAkiPAH\ngAQR/gCQIMIfABJE+ANAggh/AEgQ4Q8ACSL8ASBBhD8AJIjwB4AEEf4AkCDCHwASRPgDQIIIfwBI\n0NqyB9Aq+w5Na3R8Uidm57S+t0cjw4Patrm/7GEBwKrQkeG/79C0du49rLkz85Kk6dk57dx7WJL4\nAAAAdeiyz+j45IXgP2/uzLxGxydLGhEArC4dGf4nZucaqgNAajoy/Nf39jRUB4DUdGT4jwwPqqe7\n6x21nu4ujQwPljQiAFhdOvKA7/mDupztAwDVdWT4S5UPAMIeAKrryGUfAMDSOnbPny95AUBtHRn+\nfMkLAJbWkcs+fMkLAJbWkeE/XePLXLXqAJCajgz/NW6sDgCp6cjwPxeN1QEgNbmEv+1bbE/anrJ9\nX5XX77I9Y/vF7PGFPPoFAKxM02f72O6S9LCkT0g6Lul522MR8eqipnsi4t5m+wMANC+PPf/rJE1F\nxNGIeFvSk5K25vC+AIAWySP8+yUdW/D8eFZb7NO2X7b9lO0N1d7I9g7bE7YnZmZmchgaAKCaog74\n/oOkgYj4qKRvSXqsWqOI2BURQxEx1NfXV9DQACA9eYT/tKSFe/KXZbULIuJkRPwke/o1Sb+cQ78A\ngBXKI/yfl7TJ9hW2L5J0h6SxhQ1sX7rg6RZJ/5FDvwCAFWr6bJ+IOGv7Xknjkrok7Y6II7YfkjQR\nEWOSfs/2FklnJZ2SdFez/QIAVi6XC7tFxH5J+xfV7l+wvVPSzjz6AgA0ryO/4QsAWBrhDwAJIvwB\nIEGEPwAkiPAHgAQR/gCQIMIfABJE+ANAggh/AEgQ4Q8ACSL8ASBBhD8AlGSNG6vn2nfruwAAVHMu\nGqvnifAHgJJ0ufoufq16ngh/ACjJfFTfxa9VzxPhDwAJIvwBIEGEPwCUhDV/AEjQDVde0lA9T4Q/\nAJTktZNzDdXzRPgDQElOzFYP+Vr1PBH+AFCS9b09DdXzRPgDQEk+/vN9DdXzRPgDQEkOfHemoXqe\nCH8AKAlr/gCQINb8ASBBI8OD6unueketp7tLI8ODLe97bct7AABUtW1zvyRpdHxSJ2bntL63RyPD\ngxfqrUT4A0CJtm3uLyTsF2PZBwASRPgDQIIIfwBIEOEPAAki/AEgQYQ/ACSI8AeABBH+AJCgXMLf\n9i22J21P2b6vyusX296Tvf6c7YE8+gUArEzT4W+7S9LDkj4p6WpJ221fvajZ3ZLejIifk/QXkr7c\nbL8AgJXLY8//OklTEXE0It6W9KSkrYvabJX0WLb9lKSb7AJuTw8AqCqP8O+XdGzB8+NZrWqbiDgr\n6bSkDyx+I9s7bE/YnpiZaf3NDAAgVavqgG9E7IqIoYgY6utr/W3MACBVeYT/tKQNC55fltWqtrG9\nVtL7JZ3MoW8AwArkEf7PS9pk+wrbF0m6Q9LYojZjku7Mtm+X9ExERA59AwBWoOnr+UfEWdv3ShqX\n1CVpd0Qcsf2QpImIGJP0dUnfsD0l6ZQqHxAAgJLkcjOXiNgvaf+i2v0Ltn8s6Tfy6AsA0LxVdcAX\nAFAMwh8AEkT4A0CCCH8ASBDhDwAJIvwBIEGEPwAkiPAHgAQR/gCQIMIfABJE+ANAggh/AEgQ4Q8A\nCSL8ASBBhD8AJIjwB4AEEf4AkCDCHwASRPgDQIIIfwBIEOEPAAki/AEgQYQ/ACSI8AeABBH+AJAg\nwh8AEkT4A0CCOjL8u+yG6gCQmo4M/+3Xb2ioDgCpWVv2AFrhT7ZdI0l64rljmo9Ql63t12+4UAeA\n1Dkiyh5DVUNDQzExMVH2MACgrdh+ISKGlmvXkcs+AIClEf4AkCDCHwAS1JEHfCVp36FpjY5P6sTs\nnNb39mhkeFDbNveXPSwAWBU6Mvz3HZrWzr2HNXdmXpI0PTunnXsPSxIfAACgDl32GR2fvBD8582d\nmdfo+GRJIwKA1aWp8Lf9M7a/Zft72Z+X1Gg3b/vF7DHWTJ/1ODE711AdAFLT7J7/fZKejohNkp7O\nnlczFxG/lD22NNnnstb39jRUB4DUNBv+WyU9lm0/Jmlbk++Xi5HhQfV0d72j1tPdpZHhwZJGBACr\nS7Ph/8GIeD3b/h9JH6zR7j22J2wftF3zA8L2jqzdxMzMzIoHtW1zv7542zXq7+2RJfX39uiLt13D\nwV4AyCx7eQfb/yTpQ1Ve+iNJj0VE74K2b0bEu9b9bfdHxLTtKyU9I+mmiPivpfrl8g4A0Lh6L++w\n7KmeEXHzEp38r+1LI+J125dKeqPGe0xnfx61/W1JmyUtGf4AgNZpdtlnTNKd2fadkv5+cQPbl9i+\nONteJ+lXJL3aZL8AgCY0G/5fkvQJ29+TdHP2XLaHbH8ta/NhSRO2X5J0QNKXIoLwB4ASNfUN34g4\nKemmKvUJSV/Itv9VEhfSB4BVpCO/4QsAWBrhDwAJWrV38rI9I+n7ObzVOkk/zOF92gXz7WzMt3Pl\nNdfLI6JvuUarNvzzYnuinnNeOwXz7WzMt3MVPVeWfQAgQYQ/ACQohfDfVfYACsZ8Oxvz7VyFzrXj\n1/wBAO+Wwp4/AGCRjgl/27fYnrQ9ZftdN5WxfbHtPdnrz9keKH6U+aljvn9g+1XbL9t+2vblZYwz\nL8vNd0G7T9sO2217hkg9c7X9m9nP94jtvyl6jHmq49/yRtsHbB/K/j3fWsY482J7t+03bL9S43Xb\n/kr29/Gy7WtbMpCIaPuHpC5VrhJ6paSLJL0k6epFbX5b0iPZ9h2S9pQ97hbP9+OS3ptt39Pp883a\nvU/Ss5IOShoqe9wt/NluknRI0iXZ858te9wtnu8uSfdk21dLeq3scTc551+VdK2kV2q8fqukb0qy\npBskPdeKcXTKnv91kqYi4mhEvC3pSVXuMrbQwruOPSXpJtsucIx5Wna+EXEgIt7Knh6UdFnBY8xT\nPT9fSfpjSV+W9OMiB5ezeub6W5Iejog3JSkiql5KvU3UM9+Q9NPZ9vslnShwfLmLiGclnVqiyVZJ\nj0fFQUm92SXzc9Up4d8v6diC58ezWtU2EXFW0mlJHyhkdPmrZ74L3a3KnkS7Wna+2a/GGyLiH4sc\nWAvU87O9StJVtv8luzveLYWNLn/1zPdBSZ+1fVzSfkm/W8zQStPo/+8Vaeqqnlj9bH9W0pCkXyt7\nLK1ie42kP5d0V8lDKcpaVZZ+blTlN7pnbV8TEbOljqp1tkt6NCL+zPbHJH3D9kci4lzZA2tnnbLn\nPy1pw4Lnl2W1qm1sr1Xl18eThYwuf/XMV7ZvVuV2m1si4icFja0Vlpvv+yR9RNK3bb+myjrpWJse\n9K3nZ3tc0lhEnImI/5b0n6p8GLSjeuZ7t6S/laSI+DdJ71HlOjidqq7/383qlPB/XtIm21fYvkiV\nA7pji9osvOvY7ZKeiezoShtadr62N0v6K1WCv53XhKVl5hsRpyNiXUQMRMSAKsc4tkTlvhLtpp5/\ny/tU2es/f3e8qyQdLXKQOapnvj9Qdt8Q2x9WJfxnCh1lscYkfS476+cGSacj4vW8O+mIZZ+IOGv7\nXknjqpw9sDsijth+SNJERIxJ+roqvy5OqXKw5Y7yRtycOuc7KumnJP1ddlz7BxGxpbRBN6HO+XaE\nOuc6LunXbb8qaV7SSFRurNR26pzvH0r6a9u/r8rB37vaeMdNtp9Q5cN7XXYc4wFJ3ZIUEY+oclzj\nVklTkt6S9PmWjKON/w4BACvUKcs+AIAGEP4AkCDCHwASRPgDQIIIfwBIEOEPAAki/AEgQYQ/ACTo\n/wFUtY9oA9QtmAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10ce1c1d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter( labels, pred )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Build the ROC curve and calculate the AUC"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"fpr, tpr, _ = roc_curve(labels, pred)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.985967972094\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADxBJREFUeJzt3XGIZWd9xvHvY7aptE206Y4Qd3fclW7AUYuGIasINcVY\nNgF3KbaSiLSW4FbbSEEppLVEiVCwpRaERV2oWIUYo1AdcCWlNhIQd7sridHdEBk3muwamtXE+Ido\nDP31j3u3uZns7D2zc2bu3He+Hxi455x3z/m9e2eeeec959yTqkKS1JYXTLoASVL/DHdJapDhLkkN\nMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg7ZM6sBbt26tnTt3TurwkjSVvvWtb/24qmbGtZtY\nuO/cuZPjx49P6vCSNJWS/LBLO6dlJKlBhrskNchwl6QGGe6S1CDDXZIaNDbck3wqyeNJvrvM9iT5\nWJLFJA8kubr/MiVJK9Fl5P5pYO8Ftl8P7B5+HQA+vvqyJEmrMfY696q6N8nOCzTZD3ymBs/rO5Lk\nxUmurKrHeqpRU+yOo4/w5fvPTLoMaUOZe+nlfPAtr1zTY/RxE9M24NGR5dPDdc8L9yQHGIzumZ2d\n7eHQWo31CN6jDz8BwJ5dV6zpcSQ917reoVpVh4BDAPPz8z6Z+yL0GcjrEbx7dl3B/tds4+17/GUu\nrac+wv0MsGNkeftwndbAl+8/w8nHfsbclZevel8Gr9SuPsJ9AbglyZ3AHuAp59v7NTpaPxfsn/+L\n10+4Kkkb2dhwT/I54Fpga5LTwAeBXwOoqk8Ah4EbgEXg58Cfr1WxrRo31TI6fTJ35eXsf8229SpN\n0pTqcrXMTWO2F/BXvVXUiJXMjY+b+3b6RNJKTewjf1s0GugrOVlpeEvqm+F+kc43Mh8NdANb0iQZ\n7h2MC/JzDHRJG4Xhfh5Lw9wglzRtDPcl7jj6CH/3798Bng1zg1zStDHclzg3Yv+HP3q1YS5pahnu\nQ+emYk4+9jP27LrCYJc01XxYx9Dobf3eJCRp2m26kftyNxd5W7+klmy6kfu5EfpSjtgltWTTjNxH\n59QdoUtq3aYI96WXNzpCl9S6TRHuXt4oabNpfs79jqOPcPThJ7y8UdKm0uTI/XyfzuhUjKTNpLlw\nXzq/7kcHSNqMmgt359clqbE5d+fXJWmgqXA/N2p3fl3SZtdMuDtql6RnNRHuoydRHbVLUiPh7klU\nSXquqQ93p2Mk6fmmPtw9iSpJzzf14Q44apekJZoId0nScxnuktQgw12SGmS4S1KDpvaDw5Y+Nk+S\n9KxOI/cke5M8lGQxya3n2T6b5J4k9yV5IMkN/Zf6rHN3pB59+AkfbC1J5zF25J7kEuAg8GbgNHAs\nyUJVnRxp9vfAXVX18SRzwGFg5xrUC3hHqiSN02Xkfg2wWFWnqupp4E5g/5I2BZybG3kR8KP+Sjw/\nr22XpOV1mXPfBjw6snwa2LOkzYeA/0jyXuA3get6qU6SdFH6ulrmJuDTVbUduAH4bJLn7TvJgSTH\nkxw/e/ZsT4eWJC3VJdzPADtGlrcP1426GbgLoKq+CbwQ2Lp0R1V1qKrmq2p+Zmbm4iqWJI3VJdyP\nAbuT7EpyKXAjsLCkzSPAmwCSvIJBuDs0l6QJGRvuVfUMcAtwN/Agg6tiTiS5Pcm+YbP3A+9K8m3g\nc8A7q6rWqmhJ0oV1uompqg4zuLxxdN1tI69PAm/otzRJ0sXy4wckqUGGuyQ1yHCXpAYZ7pLUIMNd\nkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ2aunC/\n4+gjHH34iUmXIUkb2tSF+5fvHzybe/9rtk24EknauKYu3AH27LqCt++ZnXQZkrRhTWW4S5IuzHCX\npAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIa1Cnck+xN8lCS\nxSS3LtPmbUlOJjmR5I5+y5QkrcSWcQ2SXAIcBN4MnAaOJVmoqpMjbXYDfwu8oaqeTPKStSpYkjRe\nl5H7NcBiVZ2qqqeBO4H9S9q8CzhYVU8CVNXj/ZYpSVqJLuG+DXh0ZPn0cN2oq4CrknwjyZEke/sq\nUJK0cmOnZVawn93AtcB24N4kr66qn442SnIAOAAwO+vDNiRprXQZuZ8Bdowsbx+uG3UaWKiqX1XV\nw8D3GIT9c1TVoaqar6r5mZmZi61ZkjRGl3A/BuxOsivJpcCNwMKSNl9iMGonyVYG0zSneqxTkrQC\nY8O9qp4BbgHuBh4E7qqqE0luT7Jv2Oxu4CdJTgL3AH9TVT9Zq6IlSRfWac69qg4Dh5esu23kdQHv\nG35JkibMO1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QG\nGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDh\nLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWpQp3BPsjfJQ0kWk9x6gXZvTVJJ5vsr\nUZK0UmPDPcklwEHgemAOuCnJ3HnaXQb8NXC07yIlSSvTZeR+DbBYVaeq6mngTmD/edp9GPgI8Ise\n65MkXYQu4b4NeHRk+fRw3f9LcjWwo6q+cqEdJTmQ5HiS42fPnl1xsZKkblZ9QjXJC4CPAu8f17aq\nDlXVfFXNz8zMrPbQkqRldAn3M8COkeXtw3XnXAa8Cvh6kh8ArwMWPKkqSZPTJdyPAbuT7EpyKXAj\nsHBuY1U9VVVbq2pnVe0EjgD7qur4mlQsSRprbLhX1TPALcDdwIPAXVV1IsntSfatdYGSpJXb0qVR\nVR0GDi9Zd9syba9dfVmSpNXwDlVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3\nSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJek\nBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoE7hnmRvkoeSLCa59Tzb\n35fkZJIHknwtycv6L1WS1NXYcE9yCXAQuB6YA25KMrek2X3AfFX9HvBF4B/7LlSS1F2Xkfs1wGJV\nnaqqp4E7gf2jDarqnqr6+XDxCLC93zIlSSvRJdy3AY+OLJ8erlvOzcBXV1OUJGl1tvS5syTvAOaB\nNy6z/QBwAGB2drbPQ0uSRnQZuZ8Bdowsbx+ue44k1wEfAPZV1S/Pt6OqOlRV81U1PzMzczH1SpI6\n6BLux4DdSXYluRS4EVgYbZDktcAnGQT74/2XKUlaibHhXlXPALcAdwMPAndV1YkktyfZN2z2T8Bv\nAV9Icn+ShWV2J0laB53m3KvqMHB4ybrbRl5f13NdkqRV8A5VSWqQ4S5JDTLcJalBhrskNchwl6QG\nGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDh\nLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S\n1CDDXZIa1Cnck+xN8lCSxSS3nmf7ryf5/HD70SQ7+y5UktTd2HBPcglwELgemANuSjK3pNnNwJNV\n9bvAvwAf6btQSVJ3XUbu1wCLVXWqqp4G7gT2L2mzH/i34esvAm9Kkv7KlCStxJYObbYBj44snwb2\nLNemqp5J8hTwO8CP+yhy1NxLL+97l5LUnC7h3pskB4ADALOzsxe1jw++5ZV9liRJTeoyLXMG2DGy\nvH247rxtkmwBXgT8ZOmOqupQVc1X1fzMzMzFVSxJGqtLuB8DdifZleRS4EZgYUmbBeDPhq//GPiv\nqqr+ypQkrcTYaZnhHPotwN3AJcCnqupEktuB41W1APwr8Nkki8ATDH4BSJImpNOce1UdBg4vWXfb\nyOtfAH/Sb2mSpIvlHaqS1CDDXZIaZLhLUoMMd0lqUCZ1xWKSs8APL/Kfb2UN7n7d4Ozz5mCfN4fV\n9PllVTX2RqGJhftqJDleVfOTrmM92efNwT5vDuvRZ6dlJKlBhrskNWhaw/3QpAuYAPu8OdjnzWHN\n+zyVc+6SpAub1pG7JOkCNnS4b8Znt3bo8/uSnEzyQJKvJXnZJOrs07g+j7R7a5JKMvVXVnTpc5K3\nDd/rE0nuWO8a+9bhe3s2yT1J7ht+f98wiTr7kuRTSR5P8t1ltifJx4b/Hw8kubrXAqpqQ34x+ATK\n7wMvBy4Fvg3MLWnzl8Anhq9vBD4/6brXoc9/APzG8PV7NkOfh+0uA+4FjgDzk657Hd7n3cB9wG8P\nl18y6brXoc+HgPcMX88BP5h03avs8+8DVwPfXWb7DcBXgQCvA472efyNPHLfjM9uHdvnqrqnqn4+\nXDzC4OEp06zL+wzwYQYPXv/Feha3Rrr0+V3Awap6EqCqHl/nGvvWpc8FnHuO5ouAH61jfb2rqnsZ\nfAT6cvYDn6mBI8CLk1zZ1/E3crif79mt25ZrU1XPAOee3TqtuvR51M0MfvNPs7F9Hv65uqOqvrKe\nha2hLu/zVcBVSb6R5EiSvetW3dro0ucPAe9IcprBR4y/d31Km5iV/ryvyLo+Q1X9SfIOYB5446Rr\nWUtJXgB8FHjnhEtZb1sYTM1cy+Cvs3uTvLqqfjrRqtbWTcCnq+qfk7yewQOAXlVV/zvpwqbRRh65\n9/bs1inSpc8kuQ74ALCvqn65TrWtlXF9vgx4FfD1JD9gMDe5MOUnVbu8z6eBhar6VVU9DHyPQdhP\nqy59vhm4C6Cqvgm8kMFnsLSq08/7xdrI4b4Zn906ts9JXgt8kkGwT/s8LIzpc1U9VVVbq2pnVe1k\ncJ5hX1Udn0y5vejyvf0lBqN2kmxlME1zaj2L7FmXPj8CvAkgySsYhPvZda1yfS0Afzq8auZ1wFNV\n9Vhve5/0GeUxZ5tvYDBi+T7wgeG62xn8cMPgzf8CsAj8N/DySde8Dn3+T+B/gPuHXwuTrnmt+7yk\n7deZ8qtlOr7PYTAddRL4DnDjpGtehz7PAd9gcCXN/cAfTrrmVfb3c8BjwK8Y/CV2M/Bu4N0j7/HB\n4f/Hd/r+vvYOVUlq0EaelpEkXSTDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBv0fk17E\nWKmJCx0AAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10cee7b10>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.plot(fpr, tpr)\n",
"print auc(fpr, tpr)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Try K Fold Analysis"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
]
}
],
"source": [
"from sklearn.cross_validation import KFold"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"kf = KFold(features.shape[0], n_folds=10)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.990118577075\n",
"0.980519480519\n",
"0.964285714286\n",
"0.991379310345\n",
"0.991379310345\n",
"0.983333333333\n",
"0.995426829268\n",
"1.0\n",
"0.993006993007\n",
"0.98747763864\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAExJJREFUeJzt3X+QXWV9x/H3l8QFIQENuxTMb6ZhYxSm4A6iziit2IZo\nSa1WQodaM4xBLU47Mu3QsUXFf9SO7dRORknVKM4IAabYnXGFWoXBIj+yiCYmuBgDmIQfCQSCMQQS\n+PaPexcua3bv3d27e/c++37NZHLPOc+e8312k0+ePOdXZCaSpLIc1eoCJEnNZ7hLUoEMd0kqkOEu\nSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCjSzVQfu7OzMRYsWterwktSW7r333icys6teu5aF+6JF\ni+jv72/V4SWpLUXEw420c1pGkgpkuEtSgQx3SSqQ4S5JBTLcJalAdcM9Ir4eEbsj4ufDbI+I+FJE\nbIuITRFxVvPLlCSNRiMj928Ay0fYfj6wpPprDfDl8ZclSRqPute5Z+btEbFohCYrgWuy8r6+uyLi\nNRFxSmY+2qQaX6G/v5/NmzdPxK554TfP8+L+Q69YN/ukTczuHHhpOTOhoTcTJiO9wfD2o9/KPR09\nYytUUlube+AJrv2zj07oMZox5z4X2FGzvLO67ndExJqI6I+I/j179ozpYJs3b+axxx4b09fW8+L+\nQ+TzL7xi3ezOATqOrak1GTG0X2pWp809HT3smHHEb5Mkjduk3qGameuAdQA9PT1jfjP3ySefzOrV\nq5tW16DdV28C4KRLz3hp3b0/+TEwlzed9W0AbvriTwB47+Ujn1p4+K8+CMDCb11zxO3r7/slJwI3\nnfnWunVt+MwVAFz4qc8N32j9uyu/r/5u3f0NuvDqOwE4duG6yi6Wr2/4ayVNbc0Yue8C5tcsz6uu\nkyS1SDPCvRf4YPWqmXOAfRM13y5JakzdaZmIuBY4F+iMiJ3Ap4BXAWTmV4A+YAWwDTgANH++pIV2\nP3OQJ/Y/xxc2VqYwznj0OQCuq05pDOdDjz4DwD8M027rvA4ALrxn5P0ALH2ksq8LRzjmlU/uA+Cq\nOnW9ooZHn2HZKcc33F5S+2jkapmL6mxP4G+aVtEU88T+5zgw5CRrKZadcjwr/2Au33+q1ZVIaraW\nPfJ3Ktnyo108cM/jdD/9LAB3VE+aAsxakMziKFbtPxqAPQd/y+zndnPx/9084j4P7nuEY5YuZcOl\nbzni9vfe90sANrx7Sd36NnzmvwH4zDD7AmD9CZW2q0doM4zvj9wVSW3Ixw8AD9zzOE/s3N9Q29nP\n7eaknT+u2+6YpUs5/j3vGW9pkjQmjtyrOufNonNW5dux7NIzoH89bL6R21/1IABvn/PPADz80CMA\nLPyj19Xf6YFvwvpvHnnbyZdUfl//d/X3M3h6evByxyN5bDOcfHr9fUmaFgz34Wy+sRKYrz3uiJtv\nYD998dsx736Ayp2wq+Pxum0Xx5xq273DNzrlJDjuRbh59OezB/YO0D2ne9RfJ2nqMtxHcvLpPDz4\nHRq8OeiHlZuT+k6ZMb5QfNVxLx2jro6nq20n5o7W7jndrDh1xYTsW1JrGO7j0D2ne8x3dQ6eUF1/\nZv2v33B35Q7VK5ePcIeqJNVou3B/evcBePYQt336R03b5/xnK09C2PdksKsDPn71nS9dN37ghFkc\n2zGjaceSpMnQflfLPHuIjhcnZte7OuC+4175LTm2Ywads46emANK0gRpu5H7zBcOwwsvsHjHfzZt\nnz8+pvK4+rcevJnFwJ8DPFY5WXrqwt/CvpcfBHbwF7/gmKVLm3ZsSZoIbRfuA0thU9di/uvFhU3b\n5+GofBu+mjUnFfNPAHixA446NIOjz6+eAD0fZpx4Ir+aWblyZXDufLS27H+WN8x69diLlqQRtF24\nb+payM6Zr2P+4eY9eDKz8niBw0cdVbuy8vsLMzj8wtE89+qaqZkD+znm8GEA9jzz4JiOeRKwYOvD\nbOj9Wt22ex56kK5Fi8d0HEnTU9uFO8C8w4/wsTt/Ur9hg57YUbk7tXP+rJdXPlp929MpR75UcWBv\n5e1Mk3F9eNeixbz+bedO+HEklaMtwx3qvLiijhseuIG+7X0vLXc//y4A+t/8/ZcbPVa9uejkk464\nj4G9j9M9p9vLEyVNSe13tUwT9G3ve2nkPVbe+CNpKmvbkft41d6AdNOWyhTPFcv/8uUGg89x8dVz\nktpQ24b7TV8c+5x7997KNMxgqD+xcz+d82aN9CWS1Fam5bTMUJ3zZnHa2b/X6jIkqWnaduT+3svP\nGvPXrr75P4Ah0zCSVBBH7pJUIMNdkgpkuEtSgdp2zn1E1VfkDWvw7Ue+tk5SococuQ++Im88Tj4d\nTn9/c+qRpElW5sgdKuE8+Gq8oQbfM+oNSpIKVebIXZKmOcNdkgpkuEtSgQx3SSqQ4S5JBTLcJalA\n7XsppDcgSdKwGhq5R8TyiBiIiG0RccURti+IiFsj4r6I2BQRrX1FkTcgSZrm6o7cI2IGsBZ4F7AT\n2BgRvZm5tabZPwHXZ+aXI2IZ0AcsmoB6XzbcDUqSpIZG7mcD2zJze2Y+D1wHrBzSJoHjq59PAB5p\nXomSpNFqZM59LrCjZnkn8OYhbT4N/E9EfBw4DjivKdVJksakWVfLXAR8IzPnASuAb0XE7+w7ItZE\nRH9E9O/Zs6dJh5YkDdVIuO8C5tcsz6uuq3UJcD1AZt4JHAN0Dt1RZq7LzJ7M7Onq6hpbxZKkuhoJ\n943AkohYHBEdwCqgd0ibXwPvBIiI11MJd4fmktQidcM9Mw8DlwG3APdTuSpmS0RcFREXVJtdDnw4\nIn4GXAt8KDNzooqWJI2soZuYMrOPyuWNteuurPm8FXhbc0uTJI2Vjx+QpAIZ7pJUIMNdkgpkuEtS\ngQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXI\ncJekAhnuklQgw12SCmS4S1KBDHdJKtDMVhcwEW544Ab6tvcNu31g7wDdc7onsSJJmlxFjtz7tvcx\nsHdg2O3dc7pZceqKSaxIkiZXkSN3qAT4+uXrW12GJLVEkSN3SZruDHdJKpDhLkkFMtwlqUCGuyQV\nyHCXpAIZ7pJUoIbCPSKWR8RARGyLiCuGafOBiNgaEVsi4tvNLVOSNBp1b2KKiBnAWuBdwE5gY0T0\nZubWmjZLgH8E3paZT0XESRNVsCSpvkZG7mcD2zJze2Y+D1wHrBzS5sPA2sx8CiAzdze3TEnSaDQS\n7nOBHTXLO6vrap0GnBYRd0TEXRGxvFkFSpJGr1nPlpkJLAHOBeYBt0fE6Zn5dG2jiFgDrAFYsGBB\nkw4tSRqqkZH7LmB+zfK86rpaO4HezDyUmQ8CD1AJ+1fIzHWZ2ZOZPV1dXWOtWZJURyPhvhFYEhGL\nI6IDWAX0DmnzHSqjdiKik8o0zfYm1ilJGoW64Z6Zh4HLgFuA+4HrM3NLRFwVERdUm90CPBkRW4Fb\ngb/PzCcnqmhJ0sgamnPPzD6gb8i6K2s+J/CJ6i9JUot5h6okFchwl6QCGe6SVCDDXZIKZLhLUoEM\nd0kqkOEuSQUy3CWpQIa7JBWoWU+FnHSrb1497LaBvQN0z+mexGokaWopcuTePaebFaeuaHUZktQy\nbTtyX798fatLkKQpq8iRuyRNd4a7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCG\nuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVKCGwj0ilkfE\nQERsi4grRmj3vojIiOhpXomSpNGqG+4RMQNYC5wPLAMuiohlR2g3G/hb4O5mFylJGp1GRu5nA9sy\nc3tmPg9cB6w8QrvPAp8HDjaxPknSGDQS7nOBHTXLO6vrXhIRZwHzM/O7I+0oItZERH9E9O/Zs2fU\nxUqSGjPuE6oRcRTwr8Dl9dpm5rrM7MnMnq6urvEeWpI0jEbCfRcwv2Z5XnXdoNnAG4HbIuIh4Byg\n15OqktQ6jYT7RmBJRCyOiA5gFdA7uDEz92VmZ2YuysxFwF3ABZnZPyEVS5LqqhvumXkYuAy4Bbgf\nuD4zt0TEVRFxwUQXKEkavZmNNMrMPqBvyLorh2l77vjLkiSNh3eoSlKBDHdJKpDhLkkFMtwlqUCG\nuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhL\nUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQV\nyHCXpAIZ7pJUoIbCPSKWR8RARGyLiCuOsP0TEbE1IjZFxA8iYmHzS5UkNapuuEfEDGAtcD6wDLgo\nIpYNaXYf0JOZZwA3Al9odqGSpMY1MnI/G9iWmdsz83ngOmBlbYPMvDUzD1QX7wLmNbdMSdJoNBLu\nc4EdNcs7q+uGcwnwvfEUJUkan5nN3FlEXAz0AO8YZvsaYA3AggULmnloSVKNRkbuu4D5Ncvzqute\nISLOAz4JXJCZzx1pR5m5LjN7MrOnq6trLPVKkhrQSLhvBJZExOKI6ABWAb21DSLiTOBqKsG+u/ll\nSpJGo264Z+Zh4DLgFuB+4PrM3BIRV0XEBdVm/wLMAm6IiJ9GRO8wu5MkTYKG5twzsw/oG7LuyprP\n5zW5LknSOHiHqiQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6S\nVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCtTQC7KnknnPPtXqEiRp\nymu7cL/itgWVDytbW4ckTWVOy0hSgdpu5P6bwwdbXYIkTXltF+6/eOZxAN7W4jokaSpzWkaSCtR2\nI/fIaHUJkjTltV24ZxzT6hIkacpru3A/fJQnVCWpnrYL9x+eczoAl7a4DkmayjyhKkkFaruR+7LX\nHd/qEiRpymso3CNiOfDvwAzgq5n5uSHbjwauAd4EPAlcmJkPNbfUik/96RsmYreSVJS60zIRMQNY\nC5wPLAMuiohlQ5pdAjyVmb8P/Bvw+WYXKklqXCNz7mcD2zJze2Y+D1zH7z62ayXwzernG4F3RoQX\npEtSizQS7nOBHTXLO6vrjtgmMw8D+4ATh+4oItZERH9E9O/Zs2dsFUuS6prUq2Uyc11m9mRmT1dX\n12QeWpKmlUbCfRcwv2Z5XnXdEdtExEzgBConViVJLdBIuG8ElkTE4ojoAFYBvUPa9AJ/Xf38fuCH\nmZnNK1OSNBp1L4XMzMMRcRlwC5VLIb+emVsi4iqgPzN7ga8B34qIbcBeKv8ASJJapKHr3DOzD+gb\nsu7Kms8Hgb9obmmSpLGKVs2eRMQe4OExfnkn8EQTy2kH9nl6sM/Tw3j6vDAz616R0rJwH4+I6M/M\nnlbXMZns8/Rgn6eHyeizDw6TpAIZ7pJUoHYN93WtLqAF7PP0YJ+nhwnvc1vOuUuSRtauI3dJ0gim\ndLhHxPKIGIiIbRFxxRG2Hx0RG6rb746IRZNfZXM10OdPRMTWiNgUET+IiIWtqLOZ6vW5pt37IiIj\nou2vrGikzxHxgerPektEfHuya2y2Bv5sL4iIWyPivuqf7xWtqLNZIuLrEbE7In4+zPaIiC9Vvx+b\nIuKsphaQmVPyF5W7YX8FnAp0AD8Dlg1p8zHgK9XPq4ANra57Evr8h8Cx1c8fnQ59rrabDdwO3AX0\ntLruSfg5LwHuA15bXT6p1XVPQp/XAR+tfl4GPNTqusfZ57cDZwE/H2b7CuB7QADnAHc38/hTeeQ+\nHZ8jX7fPmXlrZh6oLt5F5UFu7ayRnzPAZ6m8BObgZBY3QRrp84eBtZn5FEBm7p7kGputkT4nMPge\nzROARyaxvqbLzNupPI5lOCuBa7LiLuA1EXFKs44/lcO9ac+RbyON9LnWJVT+5W9ndftc/e/q/Mz8\n7mQWNoEa+TmfBpwWEXdExF3VV122s0b6/Gng4ojYSeVxJx+fnNJaZrR/30el7V6QrYqIuBjoAd7R\n6lomUkQcBfwr8KEWlzLZZlKZmjmXyv/Obo+I0zPz6ZZWNbEuAr6RmV+MiLdQeRjhGzPzxVYX1o6m\n8sh9Oj5HvpE+ExHnAZ8ELsjM5yaptolSr8+zgTcCt0XEQ1TmJnvb/KRqIz/nnUBvZh7KzAeBB6iE\nfbtqpM+XANcDZOadwDFUnsFSqob+vo/VVA736fgc+bp9jogzgaupBHu7z8NCnT5n5r7M7MzMRZm5\niMp5hgsys7815TZFI3+2v0Nl1E5EdFKZptk+mUU2WSN9/jXwToCIeD2VcC/5fZy9wAerV82cA+zL\nzEebtvdWn1Guc7Z5BZURy6+AT1bXXUXlLzdUfvg3ANuAe4BTW13zJPT5f4HHgZ9Wf/W2uuaJ7vOQ\ntrfR5lfLNPhzDirTUVuBzcCqVtc8CX1eBtxB5UqanwJ/3Oqax9nfa4FHgUNU/id2CfAR4CM1P+O1\n1e/H5mb/ufYOVUkq0FSelpEkjZHhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgf4fAYJt\nq7PQ0sMAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10cf34110>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for train_index, test_index in kf:\n",
" train_features = features.loc[features.index[train_index]]\n",
" test_features = features.loc[features.index[test_index]]\n",
" train_labels = labels.loc[labels.index[train_index]]\n",
" test_labels = labels.loc[labels.index[test_index]]\n",
" lg = LinearRegression()\n",
" lg.fit(train_features, train_labels)\n",
" pred = lg.predict(test_features)\n",
" fpr, tpr, _ = roc_curve(test_labels, pred)\n",
" plt.plot(fpr, tpr)\n",
" print auc(fpr, tpr)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Try a harder dataset"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"('wpbc.data', <httplib.HTTPMessage instance at 0x10d00e098>)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"urllib.urlretrieve(\"http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wpbc.data\", \"wpbc.data\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"wpbc_names = [\n",
" \"ID\",\n",
" \"Outcome\",\n",
" \"Time\",\n",
" \"radius\",\n",
" \"texture\",\n",
" \"perimeter\",\n",
" \"area\",\n",
" \"smoothness\",\n",
" \"compactness\",\n",
" \"concavity\",\n",
" \"concave_points\",\n",
" \"symmetry\",\n",
" \"fractal_dimension\"\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"wpbc = pandas.read_csv(\"wpbc.data\", names=wpbc_names, usecols=wpbc_names, header=None, index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"labels_named = wpbc[\"Outcome\"]\n",
"features = wpbc[wpbc_names[3:]]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"labels = labels_named.map({\"N\" : 0, \"R\" : 1})"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"lg = LinearRegression()\n",
"lg.fit(features, labels)\n",
"pred = lg.predict(features)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x10d054250>"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEnxJREFUeJzt3X+IXXeZx/H3k+m0jrvVqBnB/KipbhpN7UrcoSqFXUvd\nTVo0CdiVRMoqlBZdKgtKoMWlluo/7rAuLGRZCyu6ov2hlDBgZNjVSEFM7Ui0Ma0jMVqTiWtj2ykL\nHe1k+uwf96Y7md47c+/knLlzzn2/IHDvc7/Meb6ZySdnvudXZCaSpHpZ0+sGJEnFM9wlqYYMd0mq\nIcNdkmrIcJekGjLcJamGDHdJqiHDXZJqyHCXpBq6pFcbXrduXW7evLlXm5ekSvrxj3/8+8wcXmpc\nz8J98+bNTExM9GrzklRJEfFUJ+NclpGkGjLcJamGDHdJqiHDXZJqyHCXpBoy3CWphgx3Saohw12S\naqhnFzFJUj84eHSK0fFJzkzPsH7tEPt3bGXP9g2lb7ejPfeI2BkRkxFxIiLubPH5FRFxOCKORsTj\nEXFT8a1KUrUcPDrFXQ8fY2p6hgSmpme46+FjHDw6Vfq2lwz3iBgADgA3AtuAfRGxbcGwfwQeyszt\nwF7g34puVJKqZnR8kpnZuQtqM7NzjI5Plr7tTvbcrwVOZObJzHwReADYvWBMAq9pvn4tcKa4FiWp\nms5Mz3RVL1In4b4BODXv/elmbb57gFsi4jRwCPhkqy8UEbdHxERETJw9e3YZ7UpSdaxfO9RVvUhF\nnS2zD/hKZm4EbgK+FhGv+NqZeV9mjmTmyPDwkneslKRK279jK0ODAxfUhgYH2L9ja+nb7uRsmSlg\n07z3G5u1+W4FdgJk5g8j4lXAOuDpIpqUpCo6f1ZML86W6STcHwO2RMSVNEJ9L/CRBWN+A9wAfCUi\n3g68CnDdRVLf27N9w4qE+UJLLstk5jngDmAceJLGWTHHI+LeiNjVHPZp4LaI+ClwP/CxzMyympYk\nLa6ji5gy8xCNA6Xza3fPe/0EcF2xrUmSlsvbD0hSDRnuklRDhrsk1ZDhLkk15F0hJalEvborpOEu\nSSU5f1fI8zcPO39XSKD0gHdZRpJKstrvCilJWobVfldISdIy1OGukJKkBa5/W+u737arF8lwl6SS\nHP556/sntqsXyXCXpJK45i5JNeSauyTV0Gp/EpMkaRlW+5OYJEnLtGqfxCRJqh7DXZJqyHCXpBoy\n3CWphgx3Saohw12Sashwl6QaMtwlqYYMd0mqIcNdkmrIcJekGjLcJamGDHdJqiHDXZJqyHCXpBoy\n3CWphgx3Saohw12SaqijcI+InRExGREnIuLONmM+HBFPRMTxiPhGsW1Kkrqx5DNUI2IAOAD8NXAa\neCwixjLziXljtgB3Addl5nMR8cayGpYkLa2TPfdrgROZeTIzXwQeAHYvGHMbcCAznwPIzKeLbVOS\n1I1Own0DcGre+9PN2nxXAVdFxA8i4khE7CyqQUlS95Zcluni62wB3gdsBB6JiGsyc3r+oIi4Hbgd\n4IorrrioDR48OsXo+CRnpmdYv3aI/Tu2smf7wv9zJKk/dbLnPgVsmvd+Y7M232lgLDNnM/NXwC9o\nhP0FMvO+zBzJzJHh4eHl9szBo1Pc9fAxpqZnSGBqeoa7Hj7GwaML25Kk/tRJuD8GbImIKyPiUmAv\nMLZgzEEae+1ExDoayzQnC+zzAqPjk8zMzl1Qm5mdY3R8sqxNSlKlLBnumXkOuAMYB54EHsrM4xFx\nb0Tsag4bB56JiCeAw8D+zHymrKbPTM90VZekftPRmntmHgIOLajdPe91Ap9q/ind+rVDTLUI8vVr\nh1Zi85K06lXyCtXr39Z6vb5dXZL6TSXD/fDPz3ZVl6R+U8lwd81dkhZXyXBvt7bumrskNVQy3Pfv\n2MrQ4MAFtaHBAfbv2NqjjiRpdSnqCtUVdf5KVK9QlaTWKhnu0Ah4w1ySWqtsuEtSFfTqPliGuySV\n5Px9sM7fLuX8fbCA0gO+kgdUJakKenkfLMNdkkrSy2tyDHdJKkkvr8kx3CWpJL28JscDqpJUkl5e\nk2O4S1KJenVNjssyklRDld1z9wHZktReJcO9lxcGSFIVVHJZxgdkS9LiKrnn7sM6JFVFr5aQK7nn\n7sM6JFXB+SXkqekZkv9fQj54dKr0bVcy3H1Yh6Qq6OUSciWXZXxYh6Qq6OUSciXDHXxYh6TVb/3a\nIaZaBLn3lpGkCvPeMsvgRUySVrs92zcw8dSz3P/oKeYyGYjgQ3+xMqsOldxz7+URaEnq1MGjUzz4\no0awA8xl8uCPTnm2TDtexCSpCu4ZO87sS3lBbfal5J6x46Vvu5Lh7kVMkqpgema2q3qRKhnuXsQk\nSYurZLjv37GVwTVxQW1wTXgRk6RV5XWvHuyqXqRKhjsAscR7Seqxz37wagYHFuyIDgSf/eDVpW+7\nkuE+Oj7J7NyCgxRz6QFVSavKnu0bGL35nWxYO0QAG9YOMXrzOz0Vsh0PqErS4ip5EdPaVw/y3Auv\nPNq8dgXWsSSpU718sFBHe+4RsTMiJiPiRETcuci4D0VERsRIcS2+UmZ3dUnqhV5ek7NkuEfEAHAA\nuBHYBuyLiG0txl0O/APwaNFNLvR8m3NE29UlqRda3TRssXqROtlzvxY4kZknM/NF4AFgd4txnwO+\nAPyhwP5a8jx3SVUwEK1P42tXL1In4b4BODXv/elm7WUR8S5gU2Z+u8De2rr+bcNd1SWpF+barBW3\nqxfpos+WiYg1wBeBT3cw9vaImIiIibNnzy57m99+/Ldd1SWp33QS7lPApnnvNzZr510OvAP4fkT8\nGngPMNbqoGpm3peZI5k5Mjy8/L3sVmfKLFaXpH7TSbg/BmyJiCsj4lJgLzB2/sPMfD4z12Xm5szc\nDBwBdmXmRCkdS5KWtGS4Z+Y54A5gHHgSeCgzj0fEvRGxq+wGJUnd6+gipsw8BBxaULu7zdj3XXxb\niwug1eEIby8jSQ2VvP1AuxA33CWtJt4VsksvdVmXpF7o5dX0lQx3SaoCn8QkSSqU4S5JNVTJcP+T\nSwe6qktSv6lkuA8OtG67XV2S+k0l07CXBykkqVOr/a6QkqRlqPRdISVJrW1o84yJdvUiGe6SVJL9\nO7YyuObCJZjBNcH+HVtL33Ylw93bD0iqjIXBtEJBVclwl6QqGB2fZHbuwvX12blcHQ/IXo3aHYpY\ngds1SFLHVvsDsiVJy9DLJWTDXZJK0stVBsNdkmrIcJekGjLcJamGDHdJKsnQYOuIbVcvkuEuSSWZ\nmW398M929SIZ7pJUQ4a7JNWQ4S5JNWS4S1JJ1rS5FLVdvdBtl78JSepPH3n3FV3Vi2S4S1JJ/uv4\n/3RVL5LhLkkl+d3/vthVvUiGuyTVkOEuSTVkuEtSSV5z2UBX9SIZ7pJUkoGB1hHbrl4kw12SSvLc\nC7Nd1YtkuEtSDRnuklRDHYV7ROyMiMmIOBERd7b4/FMR8UREPB4R342INxffqiSpU0uGe0QMAAeA\nG4FtwL6I2LZg2FFgJDP/HPgW8E9FNypJ6lwne+7XAicy82Rmvgg8AOyePyAzD2fmC823R4CNxbYp\nSepGJ+G+ATg17/3pZq2dW4HvXExTkqSLU+gB1Yi4BRgBRtt8fntETETExNmzZ4vctCStOte99fVd\n1YvUSbhPAZvmvd/YrF0gIt4PfAbYlZl/bPWFMvO+zBzJzJHh4eHl9CtJlfH12977iiC/7q2v5+u3\nvbf0bV/SwZjHgC0RcSWNUN8LfGT+gIjYDnwJ2JmZTxfepSRV1EoEeStL7rln5jngDmAceBJ4KDOP\nR8S9EbGrOWwU+FPgmxHxk4gYK61jSdKSOtlzJzMPAYcW1O6e9/r9BfclSboIXqEqSTVkuEtSDRnu\nklRDhrsk1ZDhLkk1ZLhLUg0Z7pJUQ4a7JNWQ4S5JNdTRFaqSpOU5eHSK0fFJzkzPsH7tEPt3bGXP\n9sXuml4Mw12SSnLw6BR3PXyMmdk5AKamZ7jr4WMApQe8yzKSVJLR8cmXg/28mdk5RscnS9+24S5J\nJZmanumqXiTDXZJqyHCXpBoy3CWphgx3Saohw12SSjLYJmHb1YtkuEtSSc5ld/UiVTLc2zVdyclI\nqq1sE+Lt6kWqZB6+1GVdknphIKKrepEqGe6SVAX73r2pq3qRvLeMJJXk83uuAeD+R08xl8lABPve\nvenlepkMd0kq0ef3XLMiYb6QyzKSVEOGuyTVkOEuSTVUyXBfOzTYVV2S+k0lw/2eXVd3VZekflPJ\ncJ946tmu6pLUbyoZ7l9/9Ddd1SWp31Qy3Ht5vwZJqoJKhrskaXGGuyTVUCXD3VMhJWlxHYV7ROyM\niMmIOBERd7b4/LKIeLD5+aMRsbnoRuf7wDvf1FVdkvrNkuEeEQPAAeBGYBuwLyK2LRh2K/BcZv4Z\n8C/AF4pudL7DPz/bVV2S+k0ne+7XAicy82Rmvgg8AOxeMGY38NXm628BN0SUdzf6M9MzXdUlqd90\nEu4bgFPz3p9u1lqOycxzwPPAG4posJX1a4e6qktSv1nRA6oRcXtETETExNmzy19C2fyG1iHeri5J\n/aaTcJ8C5j8TamOz1nJMRFwCvBZ4ZuEXysz7MnMkM0eGh4eX1zFw5ORzXdUlqd90Eu6PAVsi4sqI\nuBTYC4wtGDMGfLT5+mbge5nlXS861+ZLt6tLUr9Z8jF7mXkuIu4AxoEB4MuZeTwi7gUmMnMM+A/g\naxFxAniWxn8ApQmgVYyX/zxxSaqGjp6hmpmHgEMLanfPe/0H4G+Lba29gTXBuZdeGe8Da4x3SYKK\nXqHaKtgXq0tSv6lkuEuSFme4S1INVTLcL7ukddvt6pLUbyqZhn8891JXdUnqN5UMd0nS4ioZ7t7P\nXZIWV8lwv2fX1a9ofE2zLkmqaLgDDAzEou8lqZ9VMtxHxyeZnbvwgqXZuWR0fLJHHUnS6lLJcPdh\nHZK0uEqGuw/rkKTFVTLc9+/YytDgwAW1ocEB9u/Y2qOOJGl16eiukKvNnu2Np/yNjk9yZnqG9WuH\n2L9j68t1Sep3lQx3aAS8YS5JrVVyWUaStDjDXZJqyHCXpBoy3CWphgx3Saohw12Sashwl6QaMtwl\nqYYiM5ceVcaGI84CTxXwpdYBvy/g61SF862vfporON/lenNmDi81qGfhXpSImMjMkV73sVKcb331\n01zB+ZbNZRlJqiHDXZJqqA7hfl+vG1hhzre++mmu4HxLVfk1d0nSK9Vhz12StEBlwj0idkbEZESc\niIg7W3x+WUQ82Pz80YjYvPJdFqODuX4qIp6IiMcj4rsR8eZe9FmUpeY7b9yHIiIjotJnWHQy34j4\ncPN7fDwivrHSPRapg5/nKyLicEQcbf5M39SLPosQEV+OiKcj4mdtPo+I+Nfm38XjEfGu0prJzFX/\nBxgAfgm8BbgU+CmwbcGYvwf+vfl6L/Bgr/suca7XA69uvv5EVefa6Xyb4y4HHgGOACO97rvk7+8W\n4Cjwuub7N/a675Lnex/wiebrbcCve933Rcz3L4F3AT9r8/lNwHeAAN4DPFpWL1XZc78WOJGZJzPz\nReABYPeCMbuBrzZffwu4ISJiBXssypJzzczDmflC8+0RYOMK91ikTr63AJ8DvgD8YSWbK0En870N\nOJCZzwFk5tMr3GOROplvAq9pvn4tcGYF+ytUZj4CPLvIkN3Af2bDEWBtRLypjF6qEu4bgFPz3p9u\n1lqOycxzwPPAG1aku2J1Mtf5bqWxJ1BVS863+avrpsz89ko2VpJOvr9XAVdFxA8i4khE7Fyx7orX\nyXzvAW6JiNPAIeCTK9NaT3T773vZKvsMVUFE3AKMAH/V617KEhFrgC8CH+txKyvpEhpLM++j8VvZ\nIxFxTWZO97Sr8uwDvpKZ/xwR7wW+FhHvyMyXet1YlVVlz30K2DTv/cZmreWYiLiExq93z6xId8Xq\nZK5ExPuBzwC7MvOPK9RbGZaa7+XAO4DvR8SvaaxTjlX4oGon39/TwFhmzmbmr4Bf0Aj7KupkvrcC\nDwFk5g+BV9G4D0sddfTvuwhVCffHgC0RcWVEXErjgOnYgjFjwEebr28GvpfNIxgVs+RcI2I78CUa\nwV7l9VhYYr6Z+XxmrsvMzZm5mcYxhl2ZOdGbdi9aJz/LB2nstRMR62gs05xcySYL1Ml8fwPcABAR\nb6cR7mdXtMuVMwb8XfOsmfcAz2fmb0vZUq+PLndxFPomGnswvwQ+06zdS+MfOjR+IL4JnAB+BLyl\n1z2XONf/Bn4H/KT5Z6zXPZc53wVjv0+Fz5bp8PsbNJaingCOAXt73XPJ890G/IDGmTQ/Af6m1z1f\nxFzvB34LzNL4DexW4OPAx+d9bw80/y6Olfmz7BWqklRDVVmWkSR1wXCXpBoy3CWphgx3Saohw12S\nashwl6QaMtwlqYYMd0mqof8DF3FadyY47V4AAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10d1136d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter( labels, pred )"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"fpr, tpr, _ = roc_curve(labels, pred)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.684514583627\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAD9FJREFUeJzt3XGIZWd5x/Hvz02jtE20dFeI2V03pRtw1FLTIY0INcW0\nbAJm/7DVTRFrCW61jZTaCimWKPGf2lJLhW112warEGP0Dx1wJaU2ISAmzUjSaFYia4zJrqFZbYx/\niMbQp3/cu/V6M7v3zMyZufe+9/uBgXvOfTP3eTOzzz77nPO+J1WFJKktz5t2AJKk/pncJalBJndJ\napDJXZIaZHKXpAaZ3CWpQSZ3SWqQyV2SGmRyl6QGnTetD965c2ft27dvWh8vSXPpy1/+8neqatek\ncVNL7vv27WN1dXVaHy9JcynJt7qMsy0jSQ0yuUtSg0zuktQgk7skNcjkLkkNmpjck9yS5MkkXz3L\n+0nyoSQnkjyY5LL+w5QkrUeXyv2jwIFzvH81sH/4dRj4x82HJUnajIn3uVfV3Un2nWPIQeBjNXhe\n3z1JXpTkoqp6oqcYJemn3HrvY3z2gVPTDmPDll5yIe99/cu39DP66LlfDDw+cnxyeO45khxOsppk\n9fTp0z18tKRF9NkHTnH8ie9PO4yZtq0rVKvqKHAUYHl52SdzS9qwpYsu5JN/+OpphzGz+qjcTwF7\nRo53D89Jkqakj+S+ArxleNfMFcDT9tslabomtmWSfAK4EtiZ5CTwXuBnAKrqw8Ax4BrgBPAD4A+2\nKlhJUjdd7pa5bsL7BfxxbxFJkjbNFaqS1CCTuyQ1aGoP65CkrsYXLR1/4vssXXThFCOafVbukmbe\n+KKlpYsu5OCvrrlWUkNW7pLmgouW1sfKXZIaZOUuaWad6bXbY18/K3dJM2s0sdtjXx8rd0kzzV77\nxli5S1KDrNwlzQzvZ++PlbukmeH97P2xcpc0U+yx98PKXZIaZHKXpAaZ3CWpQSZ3SWqQyV2SGmRy\nl6QGeSukpKlzg7D+WblLmjo3COuflbukmeDipX5ZuUtSg6zcpSkY3yBr0dlr75+VuzQF4xtkLTp7\n7f2zcpemxB6ztpKVuyQ1yOQuSQ0yuUtSg0zuktQgk7skNahTck9yIMnDSU4kuXGN9/cmuTPJ/Uke\nTHJN/6FKkrqamNyT7ACOAFcDS8B1SZbGhv0lcHtVvQo4BPxD34FKkrrrUrlfDpyoqkeq6hngNuDg\n2JgCziwveyHw7f5ClCStV5dFTBcDj48cnwR+fWzM+4B/S/JO4OeAq3qJTpK0IX1dUL0O+GhV7Qau\nAT6e5DnfO8nhJKtJVk+fPt3TR0uSxnWp3E8Be0aOdw/PjboeOABQVV9K8gJgJ/Dk6KCqOgocBVhe\nXq4NxizNjbNtEOZGWdpqXSr3+4D9SS5Jcj6DC6YrY2MeA14HkORlwAsAS3MtvLNtEOZGWdpqEyv3\nqno2yQ3AHcAO4JaqeijJzcBqVa0Afwb8U5I/ZXBx9a1VZWUu4QZhmo5Ou0JW1THg2Ni5m0ZeHwde\n029okqSNcoWqJDXI/dyloa14OpIXTjUtVu7S0FY8HckLp5oWK3dphBc/1Qord0lqkMldkhpkcpek\nBpncJalBJndJapB3y2jhnbm/3XvS1RIrdy280cTuPelqhZW7hPe3qz1W7pLUIJO7JDXI5C5JDTK5\nS1KDTO6S1CCTuyQ1yFshNfO24iEao1y8pBZZuWvmbcVDNEa5eEktsnLXXHCRkbQ+Vu6S1CCTuyQ1\nyOQuSQ0yuUtSg0zuktQgk7skNcjkLkkNMrlLUoNM7pLUIJO7JDWoU3JPciDJw0lOJLnxLGPemOR4\nkoeS3NpvmJKk9Zi4t0ySHcAR4LeAk8B9SVaq6vjImP3AXwCvqaqnkrx4qwKWJE3WpXK/HDhRVY9U\n1TPAbcDBsTFvA45U1VMAVfVkv2FKktajS3K/GHh85Pjk8NyoS4FLk3wxyT1JDvQVoCRp/fra8vc8\nYD9wJbAbuDvJK6vqe6ODkhwGDgPs3bu3p4+WJI3rUrmfAvaMHO8enht1Elipqh9X1TeBrzNI9j+l\nqo5W1XJVLe/atWujMUuSJuiS3O8D9ie5JMn5wCFgZWzMZxhU7STZyaBN80iPcUqS1mFicq+qZ4Eb\ngDuArwG3V9VDSW5Ocu1w2B3Ad5McB+4E3l1V392qoCVJ59ap515Vx4BjY+duGnldwLuGX5ozW/0A\n6s3yAdbS+rlCVVv+AOrN8gHW0vr5gGwBPoBaao2VuyQ1yOQuSQ0yuUtSg0zuktQgk7skNcjkLkkN\n8lbIBXZm8ZKLhKT2WLkvsNHE7iIhqS1W7gvOxUtSm6zcJalBVu5zbLMbftlrl9pl5T7HNrvhl712\nqV1W7nPOnrmktVi5S1KDrNxn2KSeuj1zSWdj5T7DJvXU7ZlLOhsr9xlnT13SRli5S1KDTO6S1CDb\nMjPIDb0kbZaV+wxyQy9Jm2XlPqO8kCppM6zcJalBVu5T4OIkSVvNyn0KXJwkaatZuU+JPXVJW8nK\nXZIaZOW+jbx/XdJ2sXLfRt6/Lmm7WLlvM3vtkrZDp8o9yYEkDyc5keTGc4x7Q5JKstxfiJKk9ZqY\n3JPsAI4AVwNLwHVJltYYdwHwJ8C9fQcpSVqfLm2Zy4ETVfUIQJLbgIPA8bFx7wc+ALy71wjniIuT\nJM2KLm2Zi4HHR45PDs/9vySXAXuq6nPn+kZJDidZTbJ6+vTpdQc761ycJGlWbPqCapLnAR8E3jpp\nbFUdBY4CLC8v12Y/exZ5wVTSLOhSuZ8C9owc7x6eO+MC4BXAXUkeBa4AVryoKknT06Vyvw/Yn+QS\nBkn9EPB7Z96sqqeBnWeOk9wF/HlVrfYb6uw4W2/dnrqkWTGxcq+qZ4EbgDuArwG3V9VDSW5Ocu1W\nBziLztZbt6cuaVZ06rlX1THg2Ni5m84y9srNhzX77K1LmmVuPyBJDXL7gXVw4y9J88LKfR3c+EvS\nvLByXyd77ZLmgZW7JDXIyr0De+2S5o2Vewf22iXNGyv3juy1S5onVu6S1CCTuyQ1yOQuSQ0yuUtS\ng0zuktQgk7skNchbIces9SAOFy9JmjdW7mPWehCHi5ckzRsr9zW4YEnSvLNyl6QGLXzlPt5jt78u\nqQULX7mP99jtr0tqwcJX7mCPXVJ7Fr5yl6QWmdwlqUEL05ZZa3ESeAFVUpsWpnJfa3ESeAFVUpsW\npnIHL5xKWhwLU7lL0iIxuUtSg0zuktQgk7skNcjkLkkN6pTckxxI8nCSE0luXOP9dyU5nuTBJF9I\n8tL+Q5UkdTUxuSfZARwBrgaWgOuSLI0Nux9YrqpfAT4N/HXfgUqSuutSuV8OnKiqR6rqGeA24ODo\ngKq6s6p+MDy8B9jdb5iSpPXoktwvBh4fOT45PHc21wOf30xQkqTN6XWFapI3A8vAa8/y/mHgMMDe\nvXv7/GhJ0ogulfspYM/I8e7huZ+S5CrgPcC1VfWjtb5RVR2tquWqWt61a9dG4l23W+99jDd95Etr\n7isjSa3qktzvA/YnuSTJ+cAhYGV0QJJXAR9hkNif7D/MjTuzYZgbhElaJBPbMlX1bJIbgDuAHcAt\nVfVQkpuB1apaAf4G+HngU0kAHquqa7cw7nVxwzBJi6ZTz72qjgHHxs7dNPL6qp7jkiRtgitUJalB\nJndJapDJXZIaZHKXpAY195i98Qdh+wBsSYuoucp9/EHY3t8uaRE1V7mD97VLUnOVuySpocr9TK/d\nHrskNVS5u4eMJP1EM5U72GuXpDOaqdwlST9hcpekBpncJalBJndJapDJXZIaZHKXpAbN/a2QLl6S\npOea+8rdxUuS9FxzX7mDi5ckadzcV+6SpOeau8rdh3FI0mRzV7n7MA5JmmzuKnewxy5Jk8xd5S5J\nmszkLkkNMrlLUoNM7pLUIJO7JDXI5C5JDTK5S1KD5u4+96WXuBpVkibplNyTHAD+HtgB/HNV/dXY\n+88HPgb8GvBd4E1V9Wi/oQ689/Uv34pvK0lNmdiWSbIDOAJcDSwB1yVZGht2PfBUVf0y8HfAB/oO\nVJLUXZee++XAiap6pKqeAW4DDo6NOQj86/D1p4HXJUl/YUqS1qNLcr8YeHzk+OTw3JpjqupZ4Gng\nF8e/UZLDSVaTrJ4+fXpjEUuSJtrWu2Wq6mhVLVfV8q5du7bzoyVpoXRJ7qeAPSPHu4fn1hyT5Dzg\nhQwurEqSpqBLcr8P2J/kkiTnA4eAlbExK8DvD1//DvAfVVX9hSlJWo+Jt0JW1bNJbgDuYHAr5C1V\n9VCSm4HVqloB/gX4eJITwP8w+AtAkjQlne5zr6pjwLGxczeNvP4h8Lv9hiZJ2qhMq3uS5DTwrQ3+\n5zuB7/QYzjxwzovBOS+Gzcz5pVU18Y6UqSX3zUiyWlXL045jOznnxeCcF8N2zNmNwySpQSZ3SWrQ\nvCb3o9MOYAqc82Jwzothy+c8lz13SdK5zWvlLkk6h5lO7kkOJHk4yYkkN67x/vOTfHL4/r1J9m1/\nlP3qMOd3JTme5MEkX0jy0mnE2adJcx4Z94YklWTu76zoMuckbxz+rB9Kcut2x9i3Dr/be5PcmeT+\n4e/3NdOIsy9JbknyZJKvnuX9JPnQ8P/Hg0ku6zWAqprJLwarYb8B/BJwPvBfwNLYmD8CPjx8fQj4\n5LTj3oY5/ybws8PX71iEOQ/HXQDcDdwDLE877m34Oe8H7gd+YXj84mnHvQ1zPgq8Y/h6CXh02nFv\ncs6/AVwGfPUs718DfB4IcAVwb5+fP8uV+yLuIz9xzlV1Z1X9YHh4D4ON3OZZl58zwPsZPATmh9sZ\n3BbpMue3AUeq6imAqnpym2PsW5c5F3DmOZovBL69jfH1rqruZrAdy9kcBD5WA/cAL0pyUV+fP8vJ\nvbd95OdIlzmPup7B3/zzbOKch/9c3VNVn9vOwLZQl5/zpcClSb6Y5J7hoy7nWZc5vw94c5KTDLY7\neef2hDY16/3zvi5z94BsDSR5M7AMvHbasWylJM8DPgi8dcqhbLfzGLRmrmTwr7O7k7yyqr431ai2\n1nXAR6vqb5O8msFmhK+oqv+ddmDzaJYr90XcR77LnElyFfAe4Nqq+tE2xbZVJs35AuAVwF1JHmXQ\nm1yZ84uqXX7OJ4GVqvpxVX0T+DqDZD+vusz5euB2gKr6EvACBnuwtKrTn/eNmuXkvoj7yE+cc5JX\nAR9hkNjnvQ8LE+ZcVU9X1c6q2ldV+xhcZ7i2qlanE24vuvxuf4ZB1U6SnQzaNI9sZ5A96zLnx4DX\nASR5GYPk3vLzOFeAtwzvmrkCeLqqnujtu0/7ivKEq83XMKhYvgG8Z3juZgZ/uGHww/8UcAL4T+CX\nph3zNsz534H/Bh4Yfq1MO+atnvPY2LuY87tlOv6cw6AddRz4CnBo2jFvw5yXgC8yuJPmAeC3px3z\nJuf7CeAJ4McM/iV2PfB24O0jP+Mjw/8fX+n799oVqpLUoFluy0iSNsjkLkkNMrlLUoNM7pLUIJO7\nJDXI5C5JDTK5S1KDTO6S1KD/AxzAhRcBKmafAAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10d372dd0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.plot(fpr, tpr)\n",
"print auc(fpr, tpr)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.602201537025\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF4tJREFUeJzt3X9wVeWdx/H3NySRhgCWJmgKSLCLYFodxQzVcWZrt3Yb\n0Urd/kCZ6jbjVOrWTmfqbJed7mC1/2i3ulO37BZmtunqrIp1qs1Sqjttddg6Sg1IofxqKYiCQS4G\noSFAAvnuH/fChpDknuSec0/uk89rBrj33Oc+z/fkJh+enJ/m7oiISFjK0i5ARETip3AXEQmQwl1E\nJEAKdxGRACncRUQCpHAXEQmQwl1EJEAKdxGRACncRUQCVJ7WwDU1NV5fX5/W8CIiJWn9+vUH3b02\nX7vUwr2+vp62tra0hhcRKUlmtidKO22WEREJkMJdRCRACncRkQAp3EVEAqRwFxEJUN5wN7MfmdkB\nM/v9IK+bmT1qZjvNbJOZzYu/TBERGY4oM/cfA01DvH4DMDv35y7g3wsvS0RECpH3OHd3X2tm9UM0\nWQg85tn79b1qZuebWZ27t8dUY+wef/sgP33nUNplyBj25z93cvRoZ6JjnOo5Se+pk4mOEYX76b8i\ntD3z19DKrIwyG3puWl7ZRXnlsUjjFtu0roM8+Zm7Ex0jjm3u04C3+jzfm1t2DjO7y8zazKwtk8nE\nMPTI/PSdQ2zpHJ0fuowNR4920t3dnegYvadO0tvbm+gYkbhHyetc2zN/DanMyjBsyDbllccoG9cT\ndeTgFPUMVXdfCawEaGxsTPXO3B+ufh/PXjk7zRJkDGtp+Q0Azc3NiY2x6v6lACy678HExoji2Yc3\nAHDLvfl3x+25/Q4AZj7+2JDtDqzYBMDUJZezaMUrAKxacs1ZbdZvWAzAVfOeiFRn8/PZz6KlqSVS\n+xFruTH3YPTP3PcBM/o8n55bJiIiKYkj3FuBO3JHzVwNHB7N29tFRMaCvJtlzOxJ4Dqgxsz2AvcB\nFQDu/kNgDbAA2Al0Acn9nikiIpFEOVrmtjyvO/DV2CoSEZGC6QxVEZEAKdxFRAKkcBcRCZDCXUQk\nQAp3EZEAKdxFRAKkcBcRCZDCXUQkQAp3EZEAKdxFRAKkcBcRCZDCXUQkQAp3EZEAKdxFRAKkcBcR\nCZDCXUQkQAp3EZEAKdxFRAKkcBcRCZDCXUQkQAp3EZEAladdgEgp61zXTtfGzLDf1/PuUQAOrNgU\nd0lnfOTE1YmPEcWc945FrqOs5sZIbXvaO6moqy68uIBp5i5SgK6NGXraO9MuY8ypqKum6oratMsY\n1TRzFylQRV01U5dcPrz3tKwHYGrz8N43HC/e/wQAi5Z8JrExonj54Q0ANET4Gu25/XsATF2yKNGa\nxgLN3EVEAqRwFxEJkDbLiMg5nlj3Jj/buC+Wvi5vPwHAUyteydv2S+1HAPhmhLanbW0/QkPdpJEV\nFzDN3EXkHD/buI+tuaAd7RrqJrHwimlplzHqaOYuIgNqqJvEqiXXFNzPs7kdqt9aMi9v2z2/yc7A\n4xh3rNPMXUQkQJHC3cyazGyHme00s6UDvH6Rmb1oZq+b2SYzWxB/qSIiElXecDezccBy4AagAbjN\nzBr6Nfsn4Gl3vxK4Ffi3uAsVEZHooszc5wM73X2Xu3cDTwEL+7Vx4PTu6snA2/GVKCIiwxVlh+o0\n4K0+z/cCH+3X5tvA/5jZ14AJwPWxVCciIiMS1w7V24Afu/t0YAHwuJmd07eZ3WVmbWbWlskM/2JL\nIiISTZRw3wfM6PN8em5ZX3cCTwO4+yvAeKCmf0fuvtLdG929sbZWF/0REUlKlHB/DZhtZrPMrJLs\nDtPWfm3eBD4BYGaXkg13Tc1FRFKSN9zd/SRwD/ACsI3sUTFbzOwBM7s51+xe4Mtm9jvgSeBL7u5J\nFS0iIkOLdIaqu68B1vRbtqzP463AtfGWJiIiI6UzVEVEAqRwFxEJkMJdRCRACncRkQDpkr+BO7Tq\naY6sXp12GSVjx8SJ7JpYHf0NFR/I/vsP/zWscToqK5nS3c2e2+8Y1vuG488n3qWnt4cXbpw/7Pfe\n2n0KgBdaxxVcR0ftXdm+bvxK3rZT93VxYFoV336+ueBxP1m+HYAfROxrR8cO5kyZU/C4o4Vm7oE7\nsno1x7dvT7uMkrFrYjUdlZWJjzOlu5uL/9yZ6Bg9vT30em+iY8TtwLQqtl11zvmPRTFnyhwWXBzO\nBW01cx8Dxs+dy8zHH0u7jJJwXksLdUBzc7TZ3oEVmwCYuuTyBKsamczXbwHgq9//5bDfuyh3m7s4\nb9bxqXt/G6n9pwoeMWv9hsUALJ7XElOPpUUzdxGRACncRUQCpHAXEQmQwl1EJEDaoRqQznXtdG08\n+2KcZTU3Av+/40+G1vPuUSD616unvZOKumEcOilSJJq5B6RrY4ae9mQPr5OzVdRVU3WF7k0go49m\n7oGpqKs+67C8Pbd/D4CpSxalVVJJqWhZD8DU5tF3aKPIcGjmLiISIIW7iEiAFO4iIgFSuIuIBEjh\nLiISIIW7iEiAdCikjC1tLbD5mcFf35+7nnfLjQUPta/6PfZPOAxA96leek55wX0Ox4evyc7d1j53\n6bDfe3etU1ZmrF9T+PXcqy6aCsD6NQcK7ms4OitPUN19XiyfZaz2b4YLL0t8GM3cZWzZ/Ez2h6sI\n9k84TGflCQB6Tjm9vcUN90KUlRkV4yztMgpS3X0eFx6dnHYZ57rwMrjsc4kPo5m7jD0XXgbNPx/4\ntZbctb+bHyl8nA2LqQaumvdErNdHj+qB3PXcl33/2aKNOZDT13O/6t55qdYx1mjmLiISIIW7iEiA\nFO4iIgFSuIuIBEjhLiISIB0tI7Foa2tj8+ZkDzHMHMvQcayjsE66L87++/DSAV+u7Kqku6qb5ueb\nCxsH+GT5dgB+8Hwzb1QeAaD5+UkF9xvVBT1dVFVUFW08GV00c5dYbN68mf379yc6RsexDrpOdiU6\nRndVN0drjiY6RrFUVVQxZfyUtMuQlGjmLrG58MILaW4ufMY7mNOz6UebHh15J6fPVhzsOPcYrd+w\nGIDF81rOHOfe0lS849xXrRv4txMZGyLN3M2sycx2mNlOMxvwO8bMvmBmW81si5k9EW+ZIiIyHHln\n7mY2DlgOfBLYC7xmZq3uvrVPm9nAPwLXuvshM5uaVMEiIpJflM0y84Gd7r4LwMyeAhYCW/u0+TKw\n3N0PAbh7ca8QJMHZ9Mvn2fbyS2ctm9XxHlDg5ob23L9vJr/JYvJluwBY9d9Lmft2dofqqvt/lvi4\np2Xe2E1t/ayijSejS5TNMtOAt/o835tb1tclwCVm9rKZvWpmTQN1ZGZ3mVmbmbVlMpmRVSxjwraX\nXyLzxu60yyhptfWzuPTa69IuQ1IS1w7VcmA2cB0wHVhrZpe5+3t9G7n7SmAlQGNjY+lcIk9SUVs/\ni0X3PXjm+ekdqsuaHhzsLfmd2aFaQB8Rnd6h2vQ3D57ZoXp/ES8cJmNblJn7PmBGn+fTc8v62gu0\nunuPu+8G/kA27EVEJAVRwv01YLaZzTKzSuBWoLVfm+fIztoxsxqym2l2xViniIgMQ95wd/eTwD3A\nC8A24Gl332JmD5jZzblmLwDvmtlW4EXg79393aSKFhGRoUXa5u7ua4A1/ZYt6/PYgW/k/sggOte1\n07UxuR3JPe2dVNRVJ9a/iJQOXX6giLo2Zuhp70ys/4q6aqquqE2sfxEpHbr8QJFV1FUzdcnlaZch\nIoHTzF1EJEAKdxGRACncRUQCpHAXEQmQwl1EJEAKdxGRACncRUQCpHAXEQmQwl1EJEAKdxGRACnc\nRUQCpGvLjEKHVj3NkdWrY+nr+PbtjJ87N5a+hpI5lqHjWMeZuyX1deDICQ4ePTGs/q5r7wbgkZbP\nnll23N5ivM84c1ejkVj27mEAHiigj6humpYd67uvvcLW9iM01E1KfMy+tvzvPv7w23eKOuZADu7t\npGa6rlZabJq5j0JHVq/m+PbtsfQ1fu5cJt10Uyx9DaXjWAddJ7sGfO3g0RN0nThZ8BjjfQaTT80v\nuJ80NNRNYuEV/W89nKw//PYdDu5N7iqkUdVMr+aS+RekXcaYo5n7KDV+7lxmPv5Y2mUMS1V5FY82\nPXrO8kUrXgGDVc3R7x+66s2lADwU971OWyZn+x9GLSO1fkN2rNtvSu++qTXTq7nl3nmpjS/p0cxd\nRCRACncRkQAp3EVEAqRwFxEJkMJdRCRACncRkQAp3EVEAqRwFxEJkMJdRCRACncRkQAp3EVEAqRr\ny/TTua6dro2ZRPruae+kok5XxxOR5Gnm3k/Xxgw97clcSa+irpqqK2oT6VtEpC/N3AdQUVfN1CWX\np12GiMiIRZq5m1mTme0ws51mtnSIdp81MzezxvhKFBGR4cob7mY2DlgO3AA0ALeZWcMA7SYCXwfW\nxV2kiIgMT5SZ+3xgp7vvcvdu4Clg4QDtvgM8BByPsT4RERmBKOE+DXirz/O9uWVnmNk8YIa7/zzG\n2kREZIQKPlrGzMqAR4B7I7S9y8zazKwtk0nmcEMREYkW7vuAGX2eT88tO20i8BHgJTN7A7gaaB1o\np6q7r3T3RndvrK3VIYEiIkmJEu6vAbPNbJaZVQK3Aq2nX3T3w+5e4+717l4PvArc7O5tiVQsIiJ5\n5Q13dz8J3AO8AGwDnnb3LWb2gJndnHSBIiIyfJFOYnL3NcCafsuWDdL2usLLGhtWrFnB7m27z1k+\ntXYyAAceHvSUglFn3NFxnJpwKu0yRCRHlx9I0e5tuxl3dFzaZcTi1IRTzLp0VtpliEiOLj+QslMT\nTvHgvQ+etWzP7XcAMPORf02jJBEJgGbuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBKrmjZVrXfpPD\nR7YU1Mc74++k3E+y9rlzz8EaV1MHwPbn2gsaI4qrPpS95d7a5y49a7l9/CQAe/otL1V31zplZcb6\nNdEP+5w8J/vv+jWr4y3m/UehcgJsWBxvvwPo7NxKdfU5V8cWKYqSm7nvynTS1V34yTLlnGR877EY\nKpJ8ysqMinGWdhlZlRNgQnGua1Rd3cCFF3y6KGOJ9FdyM/dXDt4OwKol14y4j5++/kfYv5n5hz8M\nzWdfpfjAik0ARbnN3tLcGaiDHuf++GOJ1zBarbo/+7Vpuu/BPC1FZCAlN3MXEZH8FO4iIgFSuIuI\nBEjhLiISIIW7iEiAFO4iIgEquUMhrz7Sy5VHe88csjgSPdN7ofsCDry5GPr109PeSUVddaFlioik\nquRm7lce7WVad3L9V9RVU3WFbt4tIqWt5GbuAPsqYXYBJxlVvP5H2L+LqRc9cc5JTCIiISi5mbuI\niOSncBcRCZDCXUQkQAp3EZEAldwO1SnvHaCy58SZKyeOxIkbPg/d3ex54m349cD97Jg4kV0Tkz0k\nckJFORO6jpyzLse3b2f83LmJji0iYdPMfRC7JlbTUVmZ6BgTuo5Q0/H2OcvHz53LpJtuSnRsEQlb\nyc3cO86fCsB1y0d+rfPzctdzn7n4g9A8cD/ntbRQBzQ3N494nHyan28GJnFHU0tiY4jI2KSZu4hI\ngBTuIiIBUriLiARI4S4iEiCFu4hIgCKFu5k1mdkOM9tpZksHeP0bZrbVzDaZ2a/MbGb8pYqISFR5\nw93MxgHLgRuABuA2M2vo1+x1oNHdLweeAb4bd6EiIhJdlOPc5wM73X0XgJk9BSwEtp5u4O4v9mn/\nKvDFOIvsa3zXIcpOdbPq/nN+gYgsc/lfQTesWge8OXA/Gc+ewFTIOPnM6ngvO8a65MYoVZk3dlNb\nPyvtMkRKVpTNMtOAt/o835tbNpg7gV8M9IKZ3WVmbWbWlslkolcpY05t/Swuvfa6tMsQKVmxnqFq\nZl8EGoGPDfS6u68EVgI0Njb6SMY4XvV+ABbd9+DIigSeyp2huuijQPPA/bS0ZM8aXZT4GaqwrGnk\n6yIiMpAo4b4PmNHn+fTcsrOY2fXAt4CPufuJeMoTEZGRiLJZ5jVgtpnNMrNK4FagtW8DM7sSWAHc\n7O4H4i9TRESGI2+4u/tJ4B7gBWAb8LS7bzGzB8zs5lyzfwaqgZ+Y2UYzax2kOxERKYJI29zdfQ2w\npt+yZX0eXx9zXSIiUgCdoSoiEqCSu577hMqSK1lEpOhKLilnfmBC2iWIiIx62iwjIhIghbuISIAU\n7iIiASq5be6VH9Q2dxGRfEou3M//9IfSLkFEZNQruXAvlsyxDB3HOs5c3CsJOzp2MGfKnMT6F5Gx\nS9vcB9FxrIOuk12JjjFnyhwWXLwg0TFEZGzSzH0IVeVVPNr0aNpliIgMm2buIiIBUriLiARI4S4i\nEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriL\niARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEqBI4W5mTWa2w8x2mtnSAV4/z8xW5V5fZ2b1cRcq\nIiLR5Q13MxsHLAduABqA28ysoV+zO4FD7v4XwL8AD8VdqIiIRBdl5j4f2Onuu9y9G3gKWNivzULg\nP3OPnwE+YWYWX5kiIjIc5RHaTAPe6vN8L/DRwdq4+0kzOwx8ADgYR5F9NT3+LAcmTCqoj4PVk6g5\nNpn7d10Lywb+JeM8ex9lPp5nH95Q0FgiaTm4t5Oa6dVplyEpKeoOVTO7y8zazKwtk8kUc+iz1HQe\nYfY7b8MQv1yU+XgmldcVsSqReNVMr+aS+RekXYakJMrMfR8wo8/z6bllA7XZa2blwGTg3f4duftK\nYCVAY2Ojj6Tg52+/ZSRvExEZU6LM3F8DZpvZLDOrBG4FWvu1aQX+Nvf4c8Cv3X1E4S0iIoXLO3PP\nbUO/B3gBGAf8yN23mNkDQJu7twL/ATxuZjuBDrL/AYiISEqibJbB3dcAa/otW9bn8XHg8/GWJiIi\nI6UzVEVEAqRwFxEJkMJdRCRACncRkQAp3EVEAmRpHY5uZhlgzwjfXkMClzYY5bTOY4PWeWwoZJ1n\nunttvkaphXshzKzN3RvTrqOYtM5jg9Z5bCjGOmuzjIhIgBTuIiIBKtVwX5l2ASnQOo8NWuexIfF1\nLslt7iIiMrRSnbmLiMgQRnW4j8Ubc0dY52+Y2VYz22RmvzKzmWnUGad869yn3WfNzM2s5I+siLLO\nZvaF3Ge9xcyeKHaNcYvwvX2Rmb1oZq/nvr8XpFFnXMzsR2Z2wMx+P8jrZmaP5r4em8xsXqwFuPuo\n/EP28sJ/Ai4GKoHfAQ392vwd8MPc41uBVWnXXYR1/jhQlXt891hY51y7icBa4FWgMe26i/A5zwZe\nB96fez417bqLsM4rgbtzjxuAN9Kuu8B1/ktgHvD7QV5fAPwCMOBqYF2c44/mmftYvDF33nV29xfd\nvSv39FWyd8YqZVE+Z4DvAA8Bx4tZXEKirPOXgeXufgjA3Q8Uuca4RVlnB07fIHky8HYR64udu68l\ne3+LwSwEHvOsV4HzzSy2e3uO5nAf6Mbc0wZr4+4ngdM35i5VUda5rzvJ/s9fyvKuc+7X1Rnu/vNi\nFpagKJ/zJcAlZvaymb1qZk1Fqy4ZUdb528AXzWwv2ftHfK04paVmuD/vwxLpZh0y+pjZF4FG4GNp\n15IkMysDHgG+lHIpxVZOdtPMdWR/O1trZpe5+3upVpWs24Afu/vDZnYN2bu7fcTde9MurBSN5pn7\ncG7MzVA35i4hUdYZM7se+BZws7ufKFJtScm3zhOBjwAvmdkbZLdNtpb4TtUon/NeoNXde9x9N/AH\nsmFfqqKs853A0wDu/gownuw1WEIV6ed9pEZzuI/FG3PnXWczuxJYQTbYS307LORZZ3c/7O417l7v\n7vVk9zPc7O5t6ZQbiyjf28+RnbVjZjVkN9PsKmaRMYuyzm8CnwAws0vJhnumqFUWVytwR+6omauB\nw+7eHlvvae9RzrO3eQHZGcufgG/llj1A9ocbsh/+T4CdwG+Bi9OuuQjr/EvgHWBj7k9r2jUnvc79\n2r5EiR8tE/FzNrKbo7YCm4Fb0665COvcALxM9kiajcBfp11zgev7JNAO9JD9TexO4CvAV/p8xstz\nX4/NcX9f6wxVEZEAjebNMiIiMkIKdxGRACncRUQCpHAXEQmQwl1EJEAKdxGRACncRUQCpHAXEQnQ\n/wFEBbJ3oVzBWQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10d1fb0d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"kf = KFold(features.shape[0], n_folds=10)\n",
"a = []\n",
"for train_index, test_index in kf:\n",
" train_features = features.loc[features.index[train_index]]\n",
" test_features = features.loc[features.index[test_index]]\n",
" train_labels = labels.loc[labels.index[train_index]]\n",
" test_labels = labels.loc[labels.index[test_index]]\n",
" lg = LinearRegression()\n",
" lg.fit(train_features, train_labels)\n",
" pred = lg.predict(test_features)\n",
" fpr, tpr, _ = roc_curve(test_labels, pred)\n",
" plt.plot(fpr, tpr)\n",
" a.append(auc(fpr, tpr))\n",
"print np.mean(a)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.616472742943\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF1RJREFUeJzt3X9wVeWdx/H3NyERQ0BLE0wKSnCLYFYctRmrdaelW7cT\n0RVn2op21DbrVOquznba6S4dd7TamR37w3amO+wWdtrblRkVcaummqrTFsauo9SoFAqIpREUTOQC\nVgzhRwLf/ePe6DUm5CQ5557cJ5/XDMO95z73eb4nCR+enHvOc8zdERGRsJSlXYCIiMRP4S4iEiCF\nu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARoUloD19TUeENDQ1rDi4iUpBdeeGGv\nu9cO1y61cG9oaKC9vT2t4UVESpKZ7YzSTodlREQCpHAXEQmQwl1EJEAKdxGRACncRUQCNGy4m9nP\nzGyPmf1xiNfNzH5sZtvNbKOZXRB/mSIiMhJRZu4/B5pP8PplwNz8n5uA/xp7WSIiMhbDnufu7k+b\nWcMJmiwG7vXc/fqeM7NTzaze3TtjqjFYq97Yyy/efCvtMkrDO11wMJv4MH3dxzh28Fji45SiQ2Vl\nHLHytMtITF9FJccmVSQ+zuQphzjjSBf3X3VzouPEccx9JvB6wfNd+W0fYGY3mVm7mbVns8n/Qx3v\nfvHmW2zuPpR2GaXhYBaOHkx8mGMHj3G893ji45SiI1ZOn1naZSTm2KQKvCz5/7zKy/sorzyS+DhF\nvULV3VcCKwGampp0Z27gr6tP5uHz56ZdxviX+Vru75bHEx1m5/U3ADB71b2JjlOKVt+5DIAld9yd\nciXJyGQyALS0tACwZMWzAKxeenGs46x7qP8o99di7XegOGbuu4HTC57Pym8TEZGUxBHurcAN+bNm\nLgLe1vF2EZF0DXtYxszuBxYCNWa2C7gDqABw958AbcAiYDvQA7QkVayIiEQT5WyZa4d53YF/iq0i\nEREZM12hKiISIIW7iEiAFO4iIgFSuIuIBEjhLiISIIW7iEiAFO4iIgFSuIuIBEjhLiISIIW7iEiA\nFO4iIgFSuIuIBEjhLiISIIW7iEiAFO4iIgFSuIuIBEjhLiISIIW7iEiAhr3NnoiUpjWvrKGtoy22\n/ubs/wsALU+EeZvk0/afBry3fzsqD+SfT4t1nC/F2tvQNHMXCVRbRxvb9m9LuwwZhFny0auZu0jA\n5k2fR6Y5E0tfq9cvA+D25rtj6W+8yXTmvk535/dvyYpnc9ubL451nN/97+Wx9jcUhbuIxK57fSc9\nG7JplzEivfsOArBnxUYAbu7se9/zuJR92DluFmufg46T+AgiMuH0bMjS29mddhnj0nEzesuSD3fN\n3EUkERX11cxYem7aZURWkXkBgBktuZpvzR+WWR3zPrz0aHHm1Jq5i4gESOEuIhIgHZYRkaK4b/1r\nPLphd9plDOmMfbnz2vvPktnSeYDG+njPcS8mzdxFpCge3bCbLZ0H0i4jssb6aSw+b2baZYyaZu4i\nUjSN9dNYvTTe88bjksm8DMD3W8ZnfSOlmbuISIAihbuZNZvZNjPbbmbLBnn9DDNba2YvmdlGM1sU\nf6kiIhLVsOFuZuXAcuAyoBG41swaBzT7N+BBdz8fuAb4z7gLFRGR6KLM3C8Etrt7h7sfBR4AFg9o\n40D/x8qnAG/EV6KIiIxUlA9UZwKvFzzfBXx8QJtvA0+Z2a3AFODSWKqTkrHx10+w9Zl1yQ3Qmf/7\ntQ8cFYzV4d7c2RyT70x2nGLoX6K3f8GvscrueJXahjmx9CXJi+sD1WuBn7v7LGARsMoGWdPSzG4y\ns3Yza89mS2tRITmxrc+sI7vj1bTLkATVNszh7EsWpl2GRBRl5r4bOL3g+az8tkI3As0A7v6smU0G\naoA9hY3cfSWwEqCpqclHWbOMU7UNc1hyR0LLwWbyy6S2JLvc7M7rbwBgdlL7UUT9N50IdYleObEo\nM/fngblmNsfMKsl9YNo6oM1rwGcAzOxsYDKgqbmISEqGDXd37wNuAZ4EtpI7K2azmd1lZlfmm30D\n+IqZ/QG4H/iyu2tmLiKSkkhXqLp7G9A2YNvtBY+3AJfEW5qIiIyWrlAVEQmQwl1EJEAKdxGRACnc\nRUQCpCV/Y/LW6gc58NhjI3rPkcu+AMDOH34niZKKqv/Kzv7zxGPXlV/R4rcJ9Z93+OWXmTx/fqJj\nAKx5ZQ1tHW3DNxyDbfu3MW/6vETHAGhvb2fTpk3v29a77yDw3n1J4b2bYfQvrTvedHV1UVdXl3YZ\nsdHMPSYHHnuMwy+Pzx9aiW7y/PlMu+KKxMdp62hj2/5tiY4xb/o8Fp2Z/AKtmzZtoqurK/FxklZX\nV8eCBQvSLiM2mrnHaPL8+cxedW/k9ie99CcAZn8x+nvGq/61WBK7svPdK1RL/2vVb970eWSaM2mX\nEYu6ujpaWlrefb5nxUYAZrSc++62/tvXhXIzjPFOM3cRkQAp3EVEAqRwFxEJkMJdRCRA+kBVIule\n30nPhqEX+jznyEXAex+kxa7ri7m/k+q/yP5h/2UA7NlZ+vvTf9pj4fe+t7ObivrqtEoSNHOXiHo2\nZOnt7E67DCkRFfXVVJ1Xm3YZE5pm7hJZRX01M5aeO+hra++8D4AlS69KZvDMt3J/tzyeTP9F9q9P\n/AiATPPVKVcydv0XKhWe9ijp08xdRCRACncRkQAp3EVEAqRwFxEJkMJdRCRACncRkQDpVMjxoj0D\nmx5Ku4qh9V9E1H9K4kCd+b/7V2+MffxNUJf8cqy7d99P15u/jL3fPQcOs7f7yLvPP1H2OgCrHkvo\n61VE1R96B4BVj5345/dva49RVVnOCy+eUoyyxq3yk3dw7FBD4uNo5j5ebHooF2AyuLoFsODziQ/T\n9eYv6e7eEnu/e7uP0HP0WOz9lpKqynJqqk9Ku4zUHTvUQO9bf5P4OJq5jyd1C8bvRTr9l5YPVd9r\ny/KvJ7SeexFVVzfysQvui7XP7z2fW8t89dLcWuYtT+TWPs8sKv313DOZ3D4UrucuQ3t47YtFGUcz\ndxGRACncRUQCpHAXEQmQwl1EJEAKdxGRAOlsGYlFtifL/sP73z0LJAnVb1YzZe+UxPoH+NhHOwBY\ns3ZZrP3OONoHwLJ7HgVgat9UqiZVkeks/bNlurq6qKurS7sMGUAzd4nF/sP76entSXSMKXunUNlT\nmegYxVI1qYrpJ09Pu4xY1NXVsWBB8heYycho5i6xqaqoItOc3Ew005mB6cmeT/3Ci7krcb+wON7z\n9ZeseP957iJJizRzN7NmM9tmZtvNbNDfV83sajPbYmabzSzeK0BERGREhp25m1k5sBz4O2AX8LyZ\ntbr7loI2c4FvAZe4+1tmNiOpgkVEZHhRZu4XAtvdvcPdjwIPAIsHtPkKsNzd3wJw9z3xlikiIiMR\nJdxnAq8XPN+V31boLOAsM3vGzJ4zs+bBOjKzm8ys3czas9ns6CoWEZFhxXW2zCRgLrAQuBb4bzM7\ndWAjd1/p7k3u3lRbWxvT0CIiMlCUcN8NnF7wfFZ+W6FdQKu797r7q8Ar5MJeRERSECXcnwfmmtkc\nM6sErgFaB7R5hNysHTOrIXeYpiPGOkVEZASGDXd37wNuAZ4EtgIPuvtmM7vLzK7MN3sS2GdmW4C1\nwDfdfV9SRYuIyIlFuojJ3duAtgHbbi947MDX839kCN3rO+nZ8N4Hyb2zjgOw5/cb37uNXf9NMcaZ\n3s5uKuqr0y5DRCLS8gNF1LMhS29nd9pljEpFfTVV5+lDcJFSoeUHiqyivpoZS8/NPX7pTwDMuHzu\nezeeHq+32RORkqKZu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hI\ngBTuIiIBUriLiARIa8sEbs0ra2jraBu+4Rid1ttDVUVV4uMkrevNbo529/HEfeti7fejR/qoOmkS\nD9/zYqz9SunZu6ubmlnJr7CqmXvg2jra2LZ/W+LjVFVUMX3y9MTHSdrR7j7Kjnns/VadNIma6srY\n+5XSUzOrmrMuPC3xcTRznwDmTZ9HpjmT6Bir1y9LtP9iOl5u3PaDhWmXITImmrmLiARI4S4iEiCF\nu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgLS2zADd6zvp2ZAd\n8fvKai4HYM+KjUO26e3spqI++dXgREQ0cx+gZ0OW3s7uRPquqK+m6rzaRPoWESmkmfsgKuqrmbH0\n3BG9Z+f1PwBgxtIlSZQkIjIikWbuZtZsZtvMbLuZDbm2q5l9zszczJriK1FEREZq2HA3s3JgOXAZ\n0Ahca2aNg7SbCvwzsD7uIkVEZGSizNwvBLa7e4e7HwUeABYP0u47wHeBwzHWJyIioxDlmPtM4PWC\n57uAjxc2MLMLgNPd/XEz+2aM9UkCNv76CbY+sy7WPrM7XqW2YU6sfYrI6I35bBkzKwN+CHwjQtub\nzKzdzNqz2ZGfbijx2PrMOrI7Xo21z9qGOZx9ycJY+xSR0Ysyc98NnF7wfFZ+W7+pwDnAOjMDqANa\nzexKd28v7MjdVwIrAZqamuK/C7FEVtswhyV33J12GSKSkCgz9+eBuWY2x8wqgWuA1v4X3f1td69x\n9wZ3bwCeAz4Q7CIiUjzDhru79wG3AE8CW4EH3X2zmd1lZlcmXaCIiIxcpIuY3L0NaBuw7fYh2i4c\ne1kTQ+e+Tg71HGbZb38KR8/MbbxnyMsIRmVq31SqJlWR6cy8uy3rlQBkMpmh3jYudXV1UVdXl3YZ\nIiVByw+k6FDPYexYsmNUTapi+snTkx2kSOrq6liwYEHaZYiUBC0/kDIvh7u/cTdkcguP0fJ44mOu\nvjP328GSlpbExxKRdGjmLiISIIW7iEiAFO4iIgFSuIuIBEjhLiISoJI7W+aVp5p5p29X7P0ePXac\n3mNOeU09AC8/0jmi99un+wDY+cjZkd8zdcq3AXj6kbOZfMohDpedzM7HLh/RuKNRNe9NAFYVYaxS\nUzv1NQ69c0baZYiMmWbueb3HnOPH01vu5nDZyRwoOzW18SXn0DtncHjfJ9IuQ2TMSm7mftZnn0ik\n3yUrngXgP5gCMIrb7N0AwOxV90Z+zzutTwHwyau2jmisseo/z/16LRz2AQ/f8yKT0y5CJAaauYuI\nBEjhLiISIIW7iEiAFO4iIgFSuIuIBEjhLiISoJI7FbJ7fSc9G+K/ufbNnbmLkHrppqK+Ovb+RUSK\nqeRm7j0bsvR2difWf0V9NVXn1SbWv4hIMZTczB1yATzSi4yGc2v+IqbVMfcrIpKGkpu5i4jI8BTu\nIiIBUriLiARI4S4iEqCS+0D16Gs78Z4edl7/g1j7/XLnAQB2/t+0Ub3/8MsvM3n+/DhLEhEZNc3c\nYzJ5/nymXXFF2mWIiAAlOHOvPGM2ADOWLom1339591TIi2PtV0QkDZq5i4gESOEuIhIghbuISIAU\n7iIiAVK4i4gEKFK4m1mzmW0zs+1mtmyQ179uZlvMbKOZ/cbMZsdfqoiIRDVsuJtZObAcuAxoBK41\ns8YBzV4Cmtz9XOAh4HtxFyoiItFFOc/9QmC7u3cAmNkDwGJgS38Dd19b0P454Lo4iyz0l6436D1y\nmLV33hdrv/PfyF2huvrOR2Pt90TKFnw6P+YHfhlKVHbHq9Q2zCnqmCJSXFEOy8wEXi94viu/bSg3\nAr8a7AUzu8nM2s2sPZuN/25KEk1twxzOvmRh2mWISIJivULVzK4DmoBPDfa6u68EVgI0NTX5aMY4\nte4jACxZetXoihzCkvwVqncW8QrVf299Kjf2HXcXbUwRmRiihPtu4PSC57Py297HzC4FbgM+5e5H\n4ilPRERGI8phmeeBuWY2x8wqgWuA1sIGZnY+sAK40t33xF+miIiMxLDh7u59wC3Ak8BW4EF332xm\nd5nZlflm3weqgTVmtsHMWofoTkREiiDSMXd3bwPaBmy7veDxpTHXJSIiY1ByS/7u3HeQg0f7uDX/\nAWhctnQeoLF+dDfqEBEZb7T8QF5j/TQWn3eiMzxFREpHyc3cZ394CgCrl56bciUiIuOXZu4iIgFS\nuIuIBEjhLiISoJI75l75kSlplyAiMu6VXLif+vd/lXYJIiLjXsmFe7GseWUNbR1twzccg+N+NWVW\nnugYIjIx6Zj7ENo62ti2f1uiY5RZORVl+v9VROKnZDmBedPnkWnOJNb/xb/6XWJ9i8jEppm7iEiA\nFO4iIgFSuIuIBEjhLiISIIW7iEiAFO4iIgFSuIuIBEjhLiISIIW7iEiAFO4iIgFSuIuIBEjhLiIS\nIIW7iEiAFO4iIgFSuIuIBEjhLiISIIW7iEiAFO4iIgFSuIuIBChSuJtZs5ltM7PtZrZskNdPMrPV\n+dfXm1lD3IWKiEh0w4a7mZUDy4HLgEbgWjNrHNDsRuAtd/8o8CPgu3EXKiIi0UWZuV8IbHf3Dnc/\nCjwALB7QZjHwP/nHDwGfMTOLr0wRERmJSRHazAReL3i+C/j4UG3cvc/M3gY+DOyNo8hCzfc9xp7J\nVXF3+wHOdRhG0yPrEhtjz5QpzDh4kIfveTGxMWRk9u7qpmZWddpliIxZUT9QNbObzKzdzNqz2Wwx\nhx4xw0j6l48ZBw/ysX29iY4hI1Mzq5qzLjwt7TJExizKzH03cHrB81n5bYO12WVmk4BTgH0DO3L3\nlcBKgKamJh9NwU988YrRvE1EZEKJMnN/HphrZnPMrBK4Bmgd0KYV+FL+8eeB37r7qMJbRETGbtiZ\ne/4Y+i3Ak0A58DN332xmdwHt7t4K/BRYZWbbgf3k/gMQEZGURDksg7u3AW0Dtt1e8Pgw8IV4SxMR\nkdHSFaoiIgFSuIuIBEjhLiISIIW7iEiAFO4iIgGytE5HN7MssHOUb68hgaUNxjnt88SgfZ4YxrLP\ns929drhGqYX7WJhZu7s3pV1HMWmfJwbt88RQjH3WYRkRkQAp3EVEAlSq4b4y7QJSoH2eGLTPE0Pi\n+1ySx9xFROTESnXmLiIiJzCuw30i3pg7wj5/3cy2mNlGM/uNmc1Oo844DbfPBe0+Z2ZuZiV/ZkWU\nfTazq/Pf681mdl+xa4xbhJ/tM8xsrZm9lP/5XpRGnXExs5+Z2R4z++MQr5uZ/Tj/9dhoZhfEWoC7\nj8s/5JYX/jNwJlAJ/AFoHNDmH4Gf5B9fA6xOu+4i7POngar845snwj7n200FngaeA5rSrrsI3+e5\nwEvAh/LPZ6RddxH2eSVwc/5xI7Aj7brHuM+fBC4A/jjE64uAXwEGXASsj3P88Txzn4g35h52n919\nrbv35J8+R+7OWKUsyvcZ4DvAd4HDxSwuIVH2+SvAcnd/C8Dd9xS5xrhF2WcHpuUfnwK8UcT6Yufu\nT5O7v8VQFgP3es5zwKlmVh/X+OM53Ae7MffModq4ex/Qf2PuUhVlnwvdSO5//lI27D7nf1093d0f\nL2ZhCYryfT4LOMvMnjGz58ysuWjVJSPKPn8buM7MdpG7f8StxSktNSP99z4ikW7WIeOPmV0HNAGf\nSruWJJlZGfBD4Mspl1Jsk8gdmllI7rezp81sgbv/JdWqknUt8HN3v8fMLiZ3d7dz3P142oWVovE8\ncx/Jjbk50Y25S0iUfcbMLgVuA6509yNFqi0pw+3zVOAcYJ2Z7SB3bLK1xD9UjfJ93gW0unuvu78K\nvEIu7EtVlH2+EXgQwN2fBSaTW4MlVJH+vY/WeA73iXhj7mH32czOB1aQC/ZSPw4Lw+yzu7/t7jXu\n3uDuDeQ+Z7jS3dvTKTcWUX62HyE3a8fMasgdpukoZpExi7LPrwGfATCzs8mFe7aoVRZXK3BD/qyZ\ni4C33b0ztt7T/kR5mE+bF5GbsfwZuC2/7S5y/7gh981fA2wHfg+cmXbNRdjnXwNvAhvyf1rTrjnp\nfR7Qdh0lfrZMxO+zkTsctQXYBFyTds1F2OdG4BlyZ9JsAD6bds1j3N/7gU6gl9xvYjcCXwW+WvA9\nXp7/emyK++daV6iKiARoPB+WERGRUVK4i4gESOEuIhIghbuISIAU7iIiAVK4i4gESOEuIhIghbuI\nSID+H2o8i8+4ogC6AAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1071a3390>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"kf = KFold(features.shape[0], n_folds=10)\n",
"a = []\n",
"for train_index, test_index in kf:\n",
" train_features = features.loc[features.index[train_index]]\n",
" test_features = features.loc[features.index[test_index]]\n",
" train_labels = labels.loc[labels.index[train_index]]\n",
" test_labels = labels.loc[labels.index[test_index]]\n",
" lg = LogisticRegression()\n",
" lg.fit(train_features, train_labels)\n",
" pred = lg.predict_proba(test_features)[:,1]\n",
" fpr, tpr, _ = roc_curve(test_labels, pred)\n",
" plt.plot(fpr, tpr)\n",
" a.append(auc(fpr, tpr))\n",
"print np.mean(a)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Decision Trees and Random Forests"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_split=1e-07, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" presort=False, random_state=None, splitter='best')"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt = DecisionTreeClassifier()\n",
"dt.fit(features, labels)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_split=1e-07, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" presort=False, random_state=None, splitter='best')\n"
]
}
],
"source": [
"print dt"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred = dt.predict(features)\n",
"accuracy_score(labels, pred)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.65\n",
"0.8\n",
"0.55\n",
"0.8\n",
"0.7\n",
"0.6\n",
"0.6\n",
"0.55\n",
"0.526315789474\n",
"0.736842105263\n",
"0.651315789474\n"
]
}
],
"source": [
"kf = KFold(features.shape[0], n_folds=10)\n",
"a = []\n",
"for train_index, test_index in kf:\n",
" train_features = features.loc[features.index[train_index]]\n",
" test_features = features.loc[features.index[test_index]]\n",
" train_labels = labels.loc[labels.index[train_index]]\n",
" test_labels = labels.loc[labels.index[test_index]]\n",
" dt = DecisionTreeClassifier()\n",
" dt.fit(train_features, train_labels)\n",
" pred = dt.predict(test_features)\n",
" score = accuracy_score(test_labels, pred)\n",
" print score\n",
" a.append(score)\n",
"print np.mean(a)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.75\n",
"0.8\n",
"0.65\n",
"0.85\n",
"0.85\n",
"0.75\n",
"0.65\n",
"0.75\n",
"0.736842105263\n",
"0.947368421053\n",
"0.773421052632\n"
]
}
],
"source": [
"kf = KFold(features.shape[0], n_folds=10)\n",
"a = []\n",
"for train_index, test_index in kf:\n",
" train_features = features.loc[features.index[train_index]]\n",
" test_features = features.loc[features.index[test_index]]\n",
" train_labels = labels.loc[labels.index[train_index]]\n",
" test_labels = labels.loc[labels.index[test_index]]\n",
" dt = RandomForestClassifier(10)\n",
" dt.fit(train_features, train_labels)\n",
" pred = dt.predict(test_features)\n",
" score = accuracy_score(test_labels, pred)\n",
" print score\n",
" a.append(score)\n",
"print np.mean(a)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Boosting"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.ensemble import AdaBoostClassifier"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7\n",
"0.8\n",
"0.6\n",
"0.65\n",
"0.75\n",
"0.75\n",
"0.5\n",
"0.7\n",
"0.789473684211\n",
"0.789473684211\n",
"0.702894736842\n"
]
}
],
"source": [
"kf = KFold(features.shape[0], n_folds=10)\n",
"a = []\n",
"for train_index, test_index in kf:\n",
" train_features = features.loc[features.index[train_index]]\n",
" test_features = features.loc[features.index[test_index]]\n",
" train_labels = labels.loc[labels.index[train_index]]\n",
" test_labels = labels.loc[labels.index[test_index]]\n",
" dt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),\n",
" algorithm=\"SAMME\",\n",
" n_estimators=200)\n",
" dt.fit(train_features, train_labels)\n",
" pred = dt.predict(test_features)\n",
" score = accuracy_score(test_labels, pred)\n",
" print score\n",
" a.append(score)\n",
"print np.mean(a)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment