Created
January 6, 2017 11:47
-
-
Save decisionstats/c12702d0e49ba504248471cf1d2eff53 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#to be continued" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import matplotlib.pyplot as plt\n", | |
"from sklearn import datasets\n", | |
"from sklearn.cluster import KMeans\n", | |
"import sklearn.metrics as sm\n", | |
" \n", | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"wine=pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\",header=None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" <th>7</th>\n", | |
" <th>8</th>\n", | |
" <th>9</th>\n", | |
" <th>10</th>\n", | |
" <th>11</th>\n", | |
" <th>12</th>\n", | |
" <th>13</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>14.23</td>\n", | |
" <td>1.71</td>\n", | |
" <td>2.43</td>\n", | |
" <td>15.6</td>\n", | |
" <td>127</td>\n", | |
" <td>2.80</td>\n", | |
" <td>3.06</td>\n", | |
" <td>0.28</td>\n", | |
" <td>2.29</td>\n", | |
" <td>5.64</td>\n", | |
" <td>1.04</td>\n", | |
" <td>3.92</td>\n", | |
" <td>1065</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>13.20</td>\n", | |
" <td>1.78</td>\n", | |
" <td>2.14</td>\n", | |
" <td>11.2</td>\n", | |
" <td>100</td>\n", | |
" <td>2.65</td>\n", | |
" <td>2.76</td>\n", | |
" <td>0.26</td>\n", | |
" <td>1.28</td>\n", | |
" <td>4.38</td>\n", | |
" <td>1.05</td>\n", | |
" <td>3.40</td>\n", | |
" <td>1050</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>13.16</td>\n", | |
" <td>2.36</td>\n", | |
" <td>2.67</td>\n", | |
" <td>18.6</td>\n", | |
" <td>101</td>\n", | |
" <td>2.80</td>\n", | |
" <td>3.24</td>\n", | |
" <td>0.30</td>\n", | |
" <td>2.81</td>\n", | |
" <td>5.68</td>\n", | |
" <td>1.03</td>\n", | |
" <td>3.17</td>\n", | |
" <td>1185</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>14.37</td>\n", | |
" <td>1.95</td>\n", | |
" <td>2.50</td>\n", | |
" <td>16.8</td>\n", | |
" <td>113</td>\n", | |
" <td>3.85</td>\n", | |
" <td>3.49</td>\n", | |
" <td>0.24</td>\n", | |
" <td>2.18</td>\n", | |
" <td>7.80</td>\n", | |
" <td>0.86</td>\n", | |
" <td>3.45</td>\n", | |
" <td>1480</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>13.24</td>\n", | |
" <td>2.59</td>\n", | |
" <td>2.87</td>\n", | |
" <td>21.0</td>\n", | |
" <td>118</td>\n", | |
" <td>2.80</td>\n", | |
" <td>2.69</td>\n", | |
" <td>0.39</td>\n", | |
" <td>1.82</td>\n", | |
" <td>4.32</td>\n", | |
" <td>1.04</td>\n", | |
" <td>2.93</td>\n", | |
" <td>735</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2 3 4 5 6 7 8 9 10 11 12 \\\n", | |
"0 1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29 5.64 1.04 3.92 \n", | |
"1 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n", | |
"2 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 \n", | |
"3 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n", | |
"4 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 \n", | |
"\n", | |
" 13 \n", | |
"0 1065 \n", | |
"1 1050 \n", | |
"2 1185 \n", | |
"3 1480 \n", | |
"4 735 " | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wine.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"From http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.names we get the column names" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"wine.columns=['class','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315 of diluted wines','Proline']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>class</th>\n", | |
" <th>Alcohol</th>\n", | |
" <th>Malic acid</th>\n", | |
" <th>Ash</th>\n", | |
" <th>Alcalinity of ash</th>\n", | |
" <th>Magnesium</th>\n", | |
" <th>Total phenols</th>\n", | |
" <th>Flavanoids</th>\n", | |
" <th>Nonflavanoid phenols</th>\n", | |
" <th>Proanthocyanins</th>\n", | |
" <th>Color intensity</th>\n", | |
" <th>Hue</th>\n", | |
" <th>OD280/OD315 of diluted wines</th>\n", | |
" <th>Proline</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>14.23</td>\n", | |
" <td>1.71</td>\n", | |
" <td>2.43</td>\n", | |
" <td>15.6</td>\n", | |
" <td>127</td>\n", | |
" <td>2.80</td>\n", | |
" <td>3.06</td>\n", | |
" <td>0.28</td>\n", | |
" <td>2.29</td>\n", | |
" <td>5.64</td>\n", | |
" <td>1.04</td>\n", | |
" <td>3.92</td>\n", | |
" <td>1065</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>13.20</td>\n", | |
" <td>1.78</td>\n", | |
" <td>2.14</td>\n", | |
" <td>11.2</td>\n", | |
" <td>100</td>\n", | |
" <td>2.65</td>\n", | |
" <td>2.76</td>\n", | |
" <td>0.26</td>\n", | |
" <td>1.28</td>\n", | |
" <td>4.38</td>\n", | |
" <td>1.05</td>\n", | |
" <td>3.40</td>\n", | |
" <td>1050</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>13.16</td>\n", | |
" <td>2.36</td>\n", | |
" <td>2.67</td>\n", | |
" <td>18.6</td>\n", | |
" <td>101</td>\n", | |
" <td>2.80</td>\n", | |
" <td>3.24</td>\n", | |
" <td>0.30</td>\n", | |
" <td>2.81</td>\n", | |
" <td>5.68</td>\n", | |
" <td>1.03</td>\n", | |
" <td>3.17</td>\n", | |
" <td>1185</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>14.37</td>\n", | |
" <td>1.95</td>\n", | |
" <td>2.50</td>\n", | |
" <td>16.8</td>\n", | |
" <td>113</td>\n", | |
" <td>3.85</td>\n", | |
" <td>3.49</td>\n", | |
" <td>0.24</td>\n", | |
" <td>2.18</td>\n", | |
" <td>7.80</td>\n", | |
" <td>0.86</td>\n", | |
" <td>3.45</td>\n", | |
" <td>1480</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>13.24</td>\n", | |
" <td>2.59</td>\n", | |
" <td>2.87</td>\n", | |
" <td>21.0</td>\n", | |
" <td>118</td>\n", | |
" <td>2.80</td>\n", | |
" <td>2.69</td>\n", | |
" <td>0.39</td>\n", | |
" <td>1.82</td>\n", | |
" <td>4.32</td>\n", | |
" <td>1.04</td>\n", | |
" <td>2.93</td>\n", | |
" <td>735</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" class Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n", | |
"0 1 14.23 1.71 2.43 15.6 127 \n", | |
"1 1 13.20 1.78 2.14 11.2 100 \n", | |
"2 1 13.16 2.36 2.67 18.6 101 \n", | |
"3 1 14.37 1.95 2.50 16.8 113 \n", | |
"4 1 13.24 2.59 2.87 21.0 118 \n", | |
"\n", | |
" Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins \\\n", | |
"0 2.80 3.06 0.28 2.29 \n", | |
"1 2.65 2.76 0.26 1.28 \n", | |
"2 2.80 3.24 0.30 2.81 \n", | |
"3 3.85 3.49 0.24 2.18 \n", | |
"4 2.80 2.69 0.39 1.82 \n", | |
"\n", | |
" Color intensity Hue OD280/OD315 of diluted wines Proline \n", | |
"0 5.64 1.04 3.92 1065 \n", | |
"1 4.38 1.05 3.40 1050 \n", | |
"2 5.68 1.03 3.17 1185 \n", | |
"3 7.80 0.86 3.45 1480 \n", | |
"4 4.32 1.04 2.93 735 " | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wine.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 178 entries, 0 to 177\n", | |
"Data columns (total 14 columns):\n", | |
"class 178 non-null int64\n", | |
"Alcohol 178 non-null float64\n", | |
"Malic acid 178 non-null float64\n", | |
"Ash 178 non-null float64\n", | |
"Alcalinity of ash 178 non-null float64\n", | |
"Magnesium 178 non-null int64\n", | |
"Total phenols 178 non-null float64\n", | |
"Flavanoids 178 non-null float64\n", | |
"Nonflavanoid phenols 178 non-null float64\n", | |
"Proanthocyanins 178 non-null float64\n", | |
"Color intensity 178 non-null float64\n", | |
"Hue 178 non-null float64\n", | |
"OD280/OD315 of diluted wines 178 non-null float64\n", | |
"Proline 178 non-null int64\n", | |
"dtypes: float64(11), int64(3)\n", | |
"memory usage: 19.5 KB\n" | |
] | |
} | |
], | |
"source": [ | |
"wine.info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>class</th>\n", | |
" <th>Alcohol</th>\n", | |
" <th>Malic acid</th>\n", | |
" <th>Ash</th>\n", | |
" <th>Alcalinity of ash</th>\n", | |
" <th>Magnesium</th>\n", | |
" <th>Total phenols</th>\n", | |
" <th>Flavanoids</th>\n", | |
" <th>Nonflavanoid phenols</th>\n", | |
" <th>Proanthocyanins</th>\n", | |
" <th>Color intensity</th>\n", | |
" <th>Hue</th>\n", | |
" <th>OD280/OD315 of diluted wines</th>\n", | |
" <th>Proline</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" <td>178.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>1.938202</td>\n", | |
" <td>13.000618</td>\n", | |
" <td>2.336348</td>\n", | |
" <td>2.366517</td>\n", | |
" <td>19.494944</td>\n", | |
" <td>99.741573</td>\n", | |
" <td>2.295112</td>\n", | |
" <td>2.029270</td>\n", | |
" <td>0.361854</td>\n", | |
" <td>1.590899</td>\n", | |
" <td>5.058090</td>\n", | |
" <td>0.957449</td>\n", | |
" <td>2.611685</td>\n", | |
" <td>746.893258</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>0.775035</td>\n", | |
" <td>0.811827</td>\n", | |
" <td>1.117146</td>\n", | |
" <td>0.274344</td>\n", | |
" <td>3.339564</td>\n", | |
" <td>14.282484</td>\n", | |
" <td>0.625851</td>\n", | |
" <td>0.998859</td>\n", | |
" <td>0.124453</td>\n", | |
" <td>0.572359</td>\n", | |
" <td>2.318286</td>\n", | |
" <td>0.228572</td>\n", | |
" <td>0.709990</td>\n", | |
" <td>314.907474</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>11.030000</td>\n", | |
" <td>0.740000</td>\n", | |
" <td>1.360000</td>\n", | |
" <td>10.600000</td>\n", | |
" <td>70.000000</td>\n", | |
" <td>0.980000</td>\n", | |
" <td>0.340000</td>\n", | |
" <td>0.130000</td>\n", | |
" <td>0.410000</td>\n", | |
" <td>1.280000</td>\n", | |
" <td>0.480000</td>\n", | |
" <td>1.270000</td>\n", | |
" <td>278.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>12.362500</td>\n", | |
" <td>1.602500</td>\n", | |
" <td>2.210000</td>\n", | |
" <td>17.200000</td>\n", | |
" <td>88.000000</td>\n", | |
" <td>1.742500</td>\n", | |
" <td>1.205000</td>\n", | |
" <td>0.270000</td>\n", | |
" <td>1.250000</td>\n", | |
" <td>3.220000</td>\n", | |
" <td>0.782500</td>\n", | |
" <td>1.937500</td>\n", | |
" <td>500.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>2.000000</td>\n", | |
" <td>13.050000</td>\n", | |
" <td>1.865000</td>\n", | |
" <td>2.360000</td>\n", | |
" <td>19.500000</td>\n", | |
" <td>98.000000</td>\n", | |
" <td>2.355000</td>\n", | |
" <td>2.135000</td>\n", | |
" <td>0.340000</td>\n", | |
" <td>1.555000</td>\n", | |
" <td>4.690000</td>\n", | |
" <td>0.965000</td>\n", | |
" <td>2.780000</td>\n", | |
" <td>673.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>3.000000</td>\n", | |
" <td>13.677500</td>\n", | |
" <td>3.082500</td>\n", | |
" <td>2.557500</td>\n", | |
" <td>21.500000</td>\n", | |
" <td>107.000000</td>\n", | |
" <td>2.800000</td>\n", | |
" <td>2.875000</td>\n", | |
" <td>0.437500</td>\n", | |
" <td>1.950000</td>\n", | |
" <td>6.200000</td>\n", | |
" <td>1.120000</td>\n", | |
" <td>3.170000</td>\n", | |
" <td>985.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>3.000000</td>\n", | |
" <td>14.830000</td>\n", | |
" <td>5.800000</td>\n", | |
" <td>3.230000</td>\n", | |
" <td>30.000000</td>\n", | |
" <td>162.000000</td>\n", | |
" <td>3.880000</td>\n", | |
" <td>5.080000</td>\n", | |
" <td>0.660000</td>\n", | |
" <td>3.580000</td>\n", | |
" <td>13.000000</td>\n", | |
" <td>1.710000</td>\n", | |
" <td>4.000000</td>\n", | |
" <td>1680.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" class Alcohol Malic acid Ash Alcalinity of ash \\\n", | |
"count 178.000000 178.000000 178.000000 178.000000 178.000000 \n", | |
"mean 1.938202 13.000618 2.336348 2.366517 19.494944 \n", | |
"std 0.775035 0.811827 1.117146 0.274344 3.339564 \n", | |
"min 1.000000 11.030000 0.740000 1.360000 10.600000 \n", | |
"25% 1.000000 12.362500 1.602500 2.210000 17.200000 \n", | |
"50% 2.000000 13.050000 1.865000 2.360000 19.500000 \n", | |
"75% 3.000000 13.677500 3.082500 2.557500 21.500000 \n", | |
"max 3.000000 14.830000 5.800000 3.230000 30.000000 \n", | |
"\n", | |
" Magnesium Total phenols Flavanoids Nonflavanoid phenols \\\n", | |
"count 178.000000 178.000000 178.000000 178.000000 \n", | |
"mean 99.741573 2.295112 2.029270 0.361854 \n", | |
"std 14.282484 0.625851 0.998859 0.124453 \n", | |
"min 70.000000 0.980000 0.340000 0.130000 \n", | |
"25% 88.000000 1.742500 1.205000 0.270000 \n", | |
"50% 98.000000 2.355000 2.135000 0.340000 \n", | |
"75% 107.000000 2.800000 2.875000 0.437500 \n", | |
"max 162.000000 3.880000 5.080000 0.660000 \n", | |
"\n", | |
" Proanthocyanins Color intensity Hue \\\n", | |
"count 178.000000 178.000000 178.000000 \n", | |
"mean 1.590899 5.058090 0.957449 \n", | |
"std 0.572359 2.318286 0.228572 \n", | |
"min 0.410000 1.280000 0.480000 \n", | |
"25% 1.250000 3.220000 0.782500 \n", | |
"50% 1.555000 4.690000 0.965000 \n", | |
"75% 1.950000 6.200000 1.120000 \n", | |
"max 3.580000 13.000000 1.710000 \n", | |
"\n", | |
" OD280/OD315 of diluted wines Proline \n", | |
"count 178.000000 178.000000 \n", | |
"mean 2.611685 746.893258 \n", | |
"std 0.709990 314.907474 \n", | |
"min 1.270000 278.000000 \n", | |
"25% 1.937500 500.500000 \n", | |
"50% 2.780000 673.500000 \n", | |
"75% 3.170000 985.000000 \n", | |
"max 4.000000 1680.000000 " | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wine.describe()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2 71\n", | |
"1 59\n", | |
"3 48\n", | |
"Name: class, dtype: int64" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.value_counts(wine['class'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"R solution is https://rstudio-pubs-static.s3.amazonaws.com/33876_1d7794d9a86647ca90c4f182df93f0e8.html" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"x=wine.ix[:,1:14]\n", | |
"y=wine.ix[:,:1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium',\n", | |
" 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols',\n", | |
" 'Proanthocyanins', 'Color intensity', 'Hue',\n", | |
" 'OD280/OD315 of diluted wines', 'Proline'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['class'], dtype='object')" | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Alcohol</th>\n", | |
" <th>Malic acid</th>\n", | |
" <th>Ash</th>\n", | |
" <th>Alcalinity of ash</th>\n", | |
" <th>Magnesium</th>\n", | |
" <th>Total phenols</th>\n", | |
" <th>Flavanoids</th>\n", | |
" <th>Nonflavanoid phenols</th>\n", | |
" <th>Proanthocyanins</th>\n", | |
" <th>Color intensity</th>\n", | |
" <th>Hue</th>\n", | |
" <th>OD280/OD315 of diluted wines</th>\n", | |
" <th>Proline</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>14.23</td>\n", | |
" <td>1.71</td>\n", | |
" <td>2.43</td>\n", | |
" <td>15.6</td>\n", | |
" <td>127</td>\n", | |
" <td>2.80</td>\n", | |
" <td>3.06</td>\n", | |
" <td>0.28</td>\n", | |
" <td>2.29</td>\n", | |
" <td>5.64</td>\n", | |
" <td>1.04</td>\n", | |
" <td>3.92</td>\n", | |
" <td>1065</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>13.20</td>\n", | |
" <td>1.78</td>\n", | |
" <td>2.14</td>\n", | |
" <td>11.2</td>\n", | |
" <td>100</td>\n", | |
" <td>2.65</td>\n", | |
" <td>2.76</td>\n", | |
" <td>0.26</td>\n", | |
" <td>1.28</td>\n", | |
" <td>4.38</td>\n", | |
" <td>1.05</td>\n", | |
" <td>3.40</td>\n", | |
" <td>1050</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>13.16</td>\n", | |
" <td>2.36</td>\n", | |
" <td>2.67</td>\n", | |
" <td>18.6</td>\n", | |
" <td>101</td>\n", | |
" <td>2.80</td>\n", | |
" <td>3.24</td>\n", | |
" <td>0.30</td>\n", | |
" <td>2.81</td>\n", | |
" <td>5.68</td>\n", | |
" <td>1.03</td>\n", | |
" <td>3.17</td>\n", | |
" <td>1185</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>14.37</td>\n", | |
" <td>1.95</td>\n", | |
" <td>2.50</td>\n", | |
" <td>16.8</td>\n", | |
" <td>113</td>\n", | |
" <td>3.85</td>\n", | |
" <td>3.49</td>\n", | |
" <td>0.24</td>\n", | |
" <td>2.18</td>\n", | |
" <td>7.80</td>\n", | |
" <td>0.86</td>\n", | |
" <td>3.45</td>\n", | |
" <td>1480</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>13.24</td>\n", | |
" <td>2.59</td>\n", | |
" <td>2.87</td>\n", | |
" <td>21.0</td>\n", | |
" <td>118</td>\n", | |
" <td>2.80</td>\n", | |
" <td>2.69</td>\n", | |
" <td>0.39</td>\n", | |
" <td>1.82</td>\n", | |
" <td>4.32</td>\n", | |
" <td>1.04</td>\n", | |
" <td>2.93</td>\n", | |
" <td>735</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n", | |
"0 14.23 1.71 2.43 15.6 127 2.80 \n", | |
"1 13.20 1.78 2.14 11.2 100 2.65 \n", | |
"2 13.16 2.36 2.67 18.6 101 2.80 \n", | |
"3 14.37 1.95 2.50 16.8 113 3.85 \n", | |
"4 13.24 2.59 2.87 21.0 118 2.80 \n", | |
"\n", | |
" Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n", | |
"0 3.06 0.28 2.29 5.64 1.04 \n", | |
"1 2.76 0.26 1.28 4.38 1.05 \n", | |
"2 3.24 0.30 2.81 5.68 1.03 \n", | |
"3 3.49 0.24 2.18 7.80 0.86 \n", | |
"4 2.69 0.39 1.82 4.32 1.04 \n", | |
"\n", | |
" OD280/OD315 of diluted wines Proline \n", | |
"0 3.92 1065 \n", | |
"1 3.40 1050 \n", | |
"2 3.17 1185 \n", | |
"3 3.45 1480 \n", | |
"4 2.93 735 " | |
] | |
}, | |
"execution_count": 47, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>class</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" class\n", | |
"0 1\n", | |
"1 1\n", | |
"2 1\n", | |
"3 1\n", | |
"4 1" | |
] | |
}, | |
"execution_count": 48, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,\n", | |
" n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,\n", | |
" verbose=0)" | |
] | |
}, | |
"execution_count": 49, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# K Means Cluster\n", | |
"model = KMeans(n_clusters=3)\n", | |
"model.fit(x)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0,\n", | |
" 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2,\n", | |
" 2, 2, 1, 1, 0, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1,\n", | |
" 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1,\n", | |
" 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1,\n", | |
" 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1,\n", | |
" 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1], dtype=int32)" | |
] | |
}, | |
"execution_count": 50, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.labels_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1 69\n", | |
"2 62\n", | |
"0 47\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 51, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.value_counts(model.labels_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2 71\n", | |
"1 59\n", | |
"3 48\n", | |
"Name: class, dtype: int64" | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.value_counts(y['class'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# We convert all the 1s to 0s and 0s to 1s.\n", | |
"predY = np.choose(model.labels_, [2, 1, 3]).astype(np.int64)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0 1\n", | |
"1 1\n", | |
"2 1\n", | |
"3 1\n", | |
"4 1\n", | |
"5 1\n", | |
"6 1\n", | |
"7 1\n", | |
"8 1\n", | |
"9 1\n", | |
"10 1\n", | |
"11 1\n", | |
"12 1\n", | |
"13 1\n", | |
"14 1\n", | |
"15 1\n", | |
"16 1\n", | |
"17 1\n", | |
"18 1\n", | |
"19 1\n", | |
"20 1\n", | |
"21 1\n", | |
"22 1\n", | |
"23 1\n", | |
"24 1\n", | |
"25 1\n", | |
"26 1\n", | |
"27 1\n", | |
"28 1\n", | |
"29 1\n", | |
" ..\n", | |
"148 3\n", | |
"149 3\n", | |
"150 3\n", | |
"151 3\n", | |
"152 3\n", | |
"153 3\n", | |
"154 3\n", | |
"155 3\n", | |
"156 3\n", | |
"157 3\n", | |
"158 3\n", | |
"159 3\n", | |
"160 3\n", | |
"161 3\n", | |
"162 3\n", | |
"163 3\n", | |
"164 3\n", | |
"165 3\n", | |
"166 3\n", | |
"167 3\n", | |
"168 3\n", | |
"169 3\n", | |
"170 3\n", | |
"171 3\n", | |
"172 3\n", | |
"173 3\n", | |
"174 3\n", | |
"175 3\n", | |
"176 3\n", | |
"177 3\n", | |
"Name: class, dtype: int64\n", | |
"[0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 2 2 0 0 2 0 0 0 0 0 0 2 2\n", | |
" 0 0 2 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 2 1 1 2 1 1 2 2 2 1 1 0\n", | |
" 2 1 1 1 2 1 1 2 2 1 1 1 1 1 2 2 1 1 1 1 1 2 2 1 2 1 2 1 1 1 2 1 1 1 1 2 1\n", | |
" 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 2 1 1 1 2 2 1 1 2 2 1 2\n", | |
" 2 1 1 1 1 2 2 2 1 2 2 2 1 2 1 2 2 1 2 2 2 2 1 1 2 2 2 2 2 1]\n", | |
"[2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 2 2 3 3 2 2 3 2 2 2 2 2 2 3 3\n", | |
" 2 2 3 3 2 2 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 3 1 3 1 1 3 1 1 3 3 3 1 1 2\n", | |
" 3 1 1 1 3 1 1 3 3 1 1 1 1 1 3 3 1 1 1 1 1 3 3 1 3 1 3 1 1 1 3 1 1 1 1 3 1\n", | |
" 1 3 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 3 1 1 3 3 3 3 1 1 1 3 3 1 1 3 3 1 3\n", | |
" 3 1 1 1 1 3 3 3 1 3 3 3 1 3 1 3 3 1 3 3 3 3 1 1 3 3 3 3 3 1]\n" | |
] | |
} | |
], | |
"source": [ | |
"print (y['class'])\n", | |
"print (model.labels_)\n", | |
"print (predY)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.16853932584269662" | |
] | |
}, | |
"execution_count": 63, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Performance Metrics\n", | |
"sm.accuracy_score(y, predY)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0, 46, 13],\n", | |
" [50, 1, 20],\n", | |
" [19, 0, 29]])" | |
] | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Confusion Matrix\n", | |
"sm.confusion_matrix(y, predY)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [Root]", | |
"language": "python", | |
"name": "Python [Root]" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment