Skip to content

Instantly share code, notes, and snippets.

@decisionstats
Created January 6, 2017 11:47
Show Gist options
  • Save decisionstats/c12702d0e49ba504248471cf1d2eff53 to your computer and use it in GitHub Desktop.
Save decisionstats/c12702d0e49ba504248471cf1d2eff53 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#to be continued"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"from sklearn import datasets\n",
"from sklearn.cluster import KMeans\n",
"import sklearn.metrics as sm\n",
" \n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"wine=pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\",header=None)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>10</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <th>13</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>14.23</td>\n",
" <td>1.71</td>\n",
" <td>2.43</td>\n",
" <td>15.6</td>\n",
" <td>127</td>\n",
" <td>2.80</td>\n",
" <td>3.06</td>\n",
" <td>0.28</td>\n",
" <td>2.29</td>\n",
" <td>5.64</td>\n",
" <td>1.04</td>\n",
" <td>3.92</td>\n",
" <td>1065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>13.20</td>\n",
" <td>1.78</td>\n",
" <td>2.14</td>\n",
" <td>11.2</td>\n",
" <td>100</td>\n",
" <td>2.65</td>\n",
" <td>2.76</td>\n",
" <td>0.26</td>\n",
" <td>1.28</td>\n",
" <td>4.38</td>\n",
" <td>1.05</td>\n",
" <td>3.40</td>\n",
" <td>1050</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>13.16</td>\n",
" <td>2.36</td>\n",
" <td>2.67</td>\n",
" <td>18.6</td>\n",
" <td>101</td>\n",
" <td>2.80</td>\n",
" <td>3.24</td>\n",
" <td>0.30</td>\n",
" <td>2.81</td>\n",
" <td>5.68</td>\n",
" <td>1.03</td>\n",
" <td>3.17</td>\n",
" <td>1185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>14.37</td>\n",
" <td>1.95</td>\n",
" <td>2.50</td>\n",
" <td>16.8</td>\n",
" <td>113</td>\n",
" <td>3.85</td>\n",
" <td>3.49</td>\n",
" <td>0.24</td>\n",
" <td>2.18</td>\n",
" <td>7.80</td>\n",
" <td>0.86</td>\n",
" <td>3.45</td>\n",
" <td>1480</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>13.24</td>\n",
" <td>2.59</td>\n",
" <td>2.87</td>\n",
" <td>21.0</td>\n",
" <td>118</td>\n",
" <td>2.80</td>\n",
" <td>2.69</td>\n",
" <td>0.39</td>\n",
" <td>1.82</td>\n",
" <td>4.32</td>\n",
" <td>1.04</td>\n",
" <td>2.93</td>\n",
" <td>735</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 10 11 12 \\\n",
"0 1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29 5.64 1.04 3.92 \n",
"1 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n",
"2 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 \n",
"3 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n",
"4 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 \n",
"\n",
" 13 \n",
"0 1065 \n",
"1 1050 \n",
"2 1185 \n",
"3 1480 \n",
"4 735 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"From http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.names we get the column names"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"wine.columns=['class','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315 of diluted wines','Proline']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>class</th>\n",
" <th>Alcohol</th>\n",
" <th>Malic acid</th>\n",
" <th>Ash</th>\n",
" <th>Alcalinity of ash</th>\n",
" <th>Magnesium</th>\n",
" <th>Total phenols</th>\n",
" <th>Flavanoids</th>\n",
" <th>Nonflavanoid phenols</th>\n",
" <th>Proanthocyanins</th>\n",
" <th>Color intensity</th>\n",
" <th>Hue</th>\n",
" <th>OD280/OD315 of diluted wines</th>\n",
" <th>Proline</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>14.23</td>\n",
" <td>1.71</td>\n",
" <td>2.43</td>\n",
" <td>15.6</td>\n",
" <td>127</td>\n",
" <td>2.80</td>\n",
" <td>3.06</td>\n",
" <td>0.28</td>\n",
" <td>2.29</td>\n",
" <td>5.64</td>\n",
" <td>1.04</td>\n",
" <td>3.92</td>\n",
" <td>1065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>13.20</td>\n",
" <td>1.78</td>\n",
" <td>2.14</td>\n",
" <td>11.2</td>\n",
" <td>100</td>\n",
" <td>2.65</td>\n",
" <td>2.76</td>\n",
" <td>0.26</td>\n",
" <td>1.28</td>\n",
" <td>4.38</td>\n",
" <td>1.05</td>\n",
" <td>3.40</td>\n",
" <td>1050</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>13.16</td>\n",
" <td>2.36</td>\n",
" <td>2.67</td>\n",
" <td>18.6</td>\n",
" <td>101</td>\n",
" <td>2.80</td>\n",
" <td>3.24</td>\n",
" <td>0.30</td>\n",
" <td>2.81</td>\n",
" <td>5.68</td>\n",
" <td>1.03</td>\n",
" <td>3.17</td>\n",
" <td>1185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>14.37</td>\n",
" <td>1.95</td>\n",
" <td>2.50</td>\n",
" <td>16.8</td>\n",
" <td>113</td>\n",
" <td>3.85</td>\n",
" <td>3.49</td>\n",
" <td>0.24</td>\n",
" <td>2.18</td>\n",
" <td>7.80</td>\n",
" <td>0.86</td>\n",
" <td>3.45</td>\n",
" <td>1480</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>13.24</td>\n",
" <td>2.59</td>\n",
" <td>2.87</td>\n",
" <td>21.0</td>\n",
" <td>118</td>\n",
" <td>2.80</td>\n",
" <td>2.69</td>\n",
" <td>0.39</td>\n",
" <td>1.82</td>\n",
" <td>4.32</td>\n",
" <td>1.04</td>\n",
" <td>2.93</td>\n",
" <td>735</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" class Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n",
"0 1 14.23 1.71 2.43 15.6 127 \n",
"1 1 13.20 1.78 2.14 11.2 100 \n",
"2 1 13.16 2.36 2.67 18.6 101 \n",
"3 1 14.37 1.95 2.50 16.8 113 \n",
"4 1 13.24 2.59 2.87 21.0 118 \n",
"\n",
" Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins \\\n",
"0 2.80 3.06 0.28 2.29 \n",
"1 2.65 2.76 0.26 1.28 \n",
"2 2.80 3.24 0.30 2.81 \n",
"3 3.85 3.49 0.24 2.18 \n",
"4 2.80 2.69 0.39 1.82 \n",
"\n",
" Color intensity Hue OD280/OD315 of diluted wines Proline \n",
"0 5.64 1.04 3.92 1065 \n",
"1 4.38 1.05 3.40 1050 \n",
"2 5.68 1.03 3.17 1185 \n",
"3 7.80 0.86 3.45 1480 \n",
"4 4.32 1.04 2.93 735 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 178 entries, 0 to 177\n",
"Data columns (total 14 columns):\n",
"class 178 non-null int64\n",
"Alcohol 178 non-null float64\n",
"Malic acid 178 non-null float64\n",
"Ash 178 non-null float64\n",
"Alcalinity of ash 178 non-null float64\n",
"Magnesium 178 non-null int64\n",
"Total phenols 178 non-null float64\n",
"Flavanoids 178 non-null float64\n",
"Nonflavanoid phenols 178 non-null float64\n",
"Proanthocyanins 178 non-null float64\n",
"Color intensity 178 non-null float64\n",
"Hue 178 non-null float64\n",
"OD280/OD315 of diluted wines 178 non-null float64\n",
"Proline 178 non-null int64\n",
"dtypes: float64(11), int64(3)\n",
"memory usage: 19.5 KB\n"
]
}
],
"source": [
"wine.info()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>class</th>\n",
" <th>Alcohol</th>\n",
" <th>Malic acid</th>\n",
" <th>Ash</th>\n",
" <th>Alcalinity of ash</th>\n",
" <th>Magnesium</th>\n",
" <th>Total phenols</th>\n",
" <th>Flavanoids</th>\n",
" <th>Nonflavanoid phenols</th>\n",
" <th>Proanthocyanins</th>\n",
" <th>Color intensity</th>\n",
" <th>Hue</th>\n",
" <th>OD280/OD315 of diluted wines</th>\n",
" <th>Proline</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" <td>178.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.938202</td>\n",
" <td>13.000618</td>\n",
" <td>2.336348</td>\n",
" <td>2.366517</td>\n",
" <td>19.494944</td>\n",
" <td>99.741573</td>\n",
" <td>2.295112</td>\n",
" <td>2.029270</td>\n",
" <td>0.361854</td>\n",
" <td>1.590899</td>\n",
" <td>5.058090</td>\n",
" <td>0.957449</td>\n",
" <td>2.611685</td>\n",
" <td>746.893258</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.775035</td>\n",
" <td>0.811827</td>\n",
" <td>1.117146</td>\n",
" <td>0.274344</td>\n",
" <td>3.339564</td>\n",
" <td>14.282484</td>\n",
" <td>0.625851</td>\n",
" <td>0.998859</td>\n",
" <td>0.124453</td>\n",
" <td>0.572359</td>\n",
" <td>2.318286</td>\n",
" <td>0.228572</td>\n",
" <td>0.709990</td>\n",
" <td>314.907474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>11.030000</td>\n",
" <td>0.740000</td>\n",
" <td>1.360000</td>\n",
" <td>10.600000</td>\n",
" <td>70.000000</td>\n",
" <td>0.980000</td>\n",
" <td>0.340000</td>\n",
" <td>0.130000</td>\n",
" <td>0.410000</td>\n",
" <td>1.280000</td>\n",
" <td>0.480000</td>\n",
" <td>1.270000</td>\n",
" <td>278.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000</td>\n",
" <td>12.362500</td>\n",
" <td>1.602500</td>\n",
" <td>2.210000</td>\n",
" <td>17.200000</td>\n",
" <td>88.000000</td>\n",
" <td>1.742500</td>\n",
" <td>1.205000</td>\n",
" <td>0.270000</td>\n",
" <td>1.250000</td>\n",
" <td>3.220000</td>\n",
" <td>0.782500</td>\n",
" <td>1.937500</td>\n",
" <td>500.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2.000000</td>\n",
" <td>13.050000</td>\n",
" <td>1.865000</td>\n",
" <td>2.360000</td>\n",
" <td>19.500000</td>\n",
" <td>98.000000</td>\n",
" <td>2.355000</td>\n",
" <td>2.135000</td>\n",
" <td>0.340000</td>\n",
" <td>1.555000</td>\n",
" <td>4.690000</td>\n",
" <td>0.965000</td>\n",
" <td>2.780000</td>\n",
" <td>673.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>3.000000</td>\n",
" <td>13.677500</td>\n",
" <td>3.082500</td>\n",
" <td>2.557500</td>\n",
" <td>21.500000</td>\n",
" <td>107.000000</td>\n",
" <td>2.800000</td>\n",
" <td>2.875000</td>\n",
" <td>0.437500</td>\n",
" <td>1.950000</td>\n",
" <td>6.200000</td>\n",
" <td>1.120000</td>\n",
" <td>3.170000</td>\n",
" <td>985.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>3.000000</td>\n",
" <td>14.830000</td>\n",
" <td>5.800000</td>\n",
" <td>3.230000</td>\n",
" <td>30.000000</td>\n",
" <td>162.000000</td>\n",
" <td>3.880000</td>\n",
" <td>5.080000</td>\n",
" <td>0.660000</td>\n",
" <td>3.580000</td>\n",
" <td>13.000000</td>\n",
" <td>1.710000</td>\n",
" <td>4.000000</td>\n",
" <td>1680.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" class Alcohol Malic acid Ash Alcalinity of ash \\\n",
"count 178.000000 178.000000 178.000000 178.000000 178.000000 \n",
"mean 1.938202 13.000618 2.336348 2.366517 19.494944 \n",
"std 0.775035 0.811827 1.117146 0.274344 3.339564 \n",
"min 1.000000 11.030000 0.740000 1.360000 10.600000 \n",
"25% 1.000000 12.362500 1.602500 2.210000 17.200000 \n",
"50% 2.000000 13.050000 1.865000 2.360000 19.500000 \n",
"75% 3.000000 13.677500 3.082500 2.557500 21.500000 \n",
"max 3.000000 14.830000 5.800000 3.230000 30.000000 \n",
"\n",
" Magnesium Total phenols Flavanoids Nonflavanoid phenols \\\n",
"count 178.000000 178.000000 178.000000 178.000000 \n",
"mean 99.741573 2.295112 2.029270 0.361854 \n",
"std 14.282484 0.625851 0.998859 0.124453 \n",
"min 70.000000 0.980000 0.340000 0.130000 \n",
"25% 88.000000 1.742500 1.205000 0.270000 \n",
"50% 98.000000 2.355000 2.135000 0.340000 \n",
"75% 107.000000 2.800000 2.875000 0.437500 \n",
"max 162.000000 3.880000 5.080000 0.660000 \n",
"\n",
" Proanthocyanins Color intensity Hue \\\n",
"count 178.000000 178.000000 178.000000 \n",
"mean 1.590899 5.058090 0.957449 \n",
"std 0.572359 2.318286 0.228572 \n",
"min 0.410000 1.280000 0.480000 \n",
"25% 1.250000 3.220000 0.782500 \n",
"50% 1.555000 4.690000 0.965000 \n",
"75% 1.950000 6.200000 1.120000 \n",
"max 3.580000 13.000000 1.710000 \n",
"\n",
" OD280/OD315 of diluted wines Proline \n",
"count 178.000000 178.000000 \n",
"mean 2.611685 746.893258 \n",
"std 0.709990 314.907474 \n",
"min 1.270000 278.000000 \n",
"25% 1.937500 500.500000 \n",
"50% 2.780000 673.500000 \n",
"75% 3.170000 985.000000 \n",
"max 4.000000 1680.000000 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.describe()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2 71\n",
"1 59\n",
"3 48\n",
"Name: class, dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.value_counts(wine['class'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"R solution is https://rstudio-pubs-static.s3.amazonaws.com/33876_1d7794d9a86647ca90c4f182df93f0e8.html"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"x=wine.ix[:,1:14]\n",
"y=wine.ix[:,:1]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium',\n",
" 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols',\n",
" 'Proanthocyanins', 'Color intensity', 'Hue',\n",
" 'OD280/OD315 of diluted wines', 'Proline'],\n",
" dtype='object')"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.columns"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['class'], dtype='object')"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y.columns"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Alcohol</th>\n",
" <th>Malic acid</th>\n",
" <th>Ash</th>\n",
" <th>Alcalinity of ash</th>\n",
" <th>Magnesium</th>\n",
" <th>Total phenols</th>\n",
" <th>Flavanoids</th>\n",
" <th>Nonflavanoid phenols</th>\n",
" <th>Proanthocyanins</th>\n",
" <th>Color intensity</th>\n",
" <th>Hue</th>\n",
" <th>OD280/OD315 of diluted wines</th>\n",
" <th>Proline</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>14.23</td>\n",
" <td>1.71</td>\n",
" <td>2.43</td>\n",
" <td>15.6</td>\n",
" <td>127</td>\n",
" <td>2.80</td>\n",
" <td>3.06</td>\n",
" <td>0.28</td>\n",
" <td>2.29</td>\n",
" <td>5.64</td>\n",
" <td>1.04</td>\n",
" <td>3.92</td>\n",
" <td>1065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13.20</td>\n",
" <td>1.78</td>\n",
" <td>2.14</td>\n",
" <td>11.2</td>\n",
" <td>100</td>\n",
" <td>2.65</td>\n",
" <td>2.76</td>\n",
" <td>0.26</td>\n",
" <td>1.28</td>\n",
" <td>4.38</td>\n",
" <td>1.05</td>\n",
" <td>3.40</td>\n",
" <td>1050</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13.16</td>\n",
" <td>2.36</td>\n",
" <td>2.67</td>\n",
" <td>18.6</td>\n",
" <td>101</td>\n",
" <td>2.80</td>\n",
" <td>3.24</td>\n",
" <td>0.30</td>\n",
" <td>2.81</td>\n",
" <td>5.68</td>\n",
" <td>1.03</td>\n",
" <td>3.17</td>\n",
" <td>1185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14.37</td>\n",
" <td>1.95</td>\n",
" <td>2.50</td>\n",
" <td>16.8</td>\n",
" <td>113</td>\n",
" <td>3.85</td>\n",
" <td>3.49</td>\n",
" <td>0.24</td>\n",
" <td>2.18</td>\n",
" <td>7.80</td>\n",
" <td>0.86</td>\n",
" <td>3.45</td>\n",
" <td>1480</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13.24</td>\n",
" <td>2.59</td>\n",
" <td>2.87</td>\n",
" <td>21.0</td>\n",
" <td>118</td>\n",
" <td>2.80</td>\n",
" <td>2.69</td>\n",
" <td>0.39</td>\n",
" <td>1.82</td>\n",
" <td>4.32</td>\n",
" <td>1.04</td>\n",
" <td>2.93</td>\n",
" <td>735</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols \\\n",
"0 14.23 1.71 2.43 15.6 127 2.80 \n",
"1 13.20 1.78 2.14 11.2 100 2.65 \n",
"2 13.16 2.36 2.67 18.6 101 2.80 \n",
"3 14.37 1.95 2.50 16.8 113 3.85 \n",
"4 13.24 2.59 2.87 21.0 118 2.80 \n",
"\n",
" Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue \\\n",
"0 3.06 0.28 2.29 5.64 1.04 \n",
"1 2.76 0.26 1.28 4.38 1.05 \n",
"2 3.24 0.30 2.81 5.68 1.03 \n",
"3 3.49 0.24 2.18 7.80 0.86 \n",
"4 2.69 0.39 1.82 4.32 1.04 \n",
"\n",
" OD280/OD315 of diluted wines Proline \n",
"0 3.92 1065 \n",
"1 3.40 1050 \n",
"2 3.17 1185 \n",
"3 3.45 1480 \n",
"4 2.93 735 "
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.head()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" class\n",
"0 1\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 1"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y.head()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,\n",
" n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,\n",
" verbose=0)"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# K Means Cluster\n",
"model = KMeans(n_clusters=3)\n",
"model.fit(x)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0,\n",
" 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2,\n",
" 2, 2, 1, 1, 0, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1,\n",
" 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1,\n",
" 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1,\n",
" 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1,\n",
" 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1], dtype=int32)"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.labels_"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1 69\n",
"2 62\n",
"0 47\n",
"dtype: int64"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.value_counts(model.labels_)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2 71\n",
"1 59\n",
"3 48\n",
"Name: class, dtype: int64"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.value_counts(y['class'])"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# We convert all the 1s to 0s and 0s to 1s.\n",
"predY = np.choose(model.labels_, [2, 1, 3]).astype(np.int64)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 1\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 1\n",
"5 1\n",
"6 1\n",
"7 1\n",
"8 1\n",
"9 1\n",
"10 1\n",
"11 1\n",
"12 1\n",
"13 1\n",
"14 1\n",
"15 1\n",
"16 1\n",
"17 1\n",
"18 1\n",
"19 1\n",
"20 1\n",
"21 1\n",
"22 1\n",
"23 1\n",
"24 1\n",
"25 1\n",
"26 1\n",
"27 1\n",
"28 1\n",
"29 1\n",
" ..\n",
"148 3\n",
"149 3\n",
"150 3\n",
"151 3\n",
"152 3\n",
"153 3\n",
"154 3\n",
"155 3\n",
"156 3\n",
"157 3\n",
"158 3\n",
"159 3\n",
"160 3\n",
"161 3\n",
"162 3\n",
"163 3\n",
"164 3\n",
"165 3\n",
"166 3\n",
"167 3\n",
"168 3\n",
"169 3\n",
"170 3\n",
"171 3\n",
"172 3\n",
"173 3\n",
"174 3\n",
"175 3\n",
"176 3\n",
"177 3\n",
"Name: class, dtype: int64\n",
"[0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 2 2 0 0 2 0 0 0 0 0 0 2 2\n",
" 0 0 2 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 2 1 1 2 1 1 2 2 2 1 1 0\n",
" 2 1 1 1 2 1 1 2 2 1 1 1 1 1 2 2 1 1 1 1 1 2 2 1 2 1 2 1 1 1 2 1 1 1 1 2 1\n",
" 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 2 1 1 1 2 2 1 1 2 2 1 2\n",
" 2 1 1 1 1 2 2 2 1 2 2 2 1 2 1 2 2 1 2 2 2 2 1 1 2 2 2 2 2 1]\n",
"[2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 2 2 3 3 2 2 3 2 2 2 2 2 2 3 3\n",
" 2 2 3 3 2 2 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 3 1 3 1 1 3 1 1 3 3 3 1 1 2\n",
" 3 1 1 1 3 1 1 3 3 1 1 1 1 1 3 3 1 1 1 1 1 3 3 1 3 1 3 1 1 1 3 1 1 1 1 3 1\n",
" 1 3 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 3 1 1 3 3 3 3 1 1 1 3 3 1 1 3 3 1 3\n",
" 3 1 1 1 1 3 3 3 1 3 3 3 1 3 1 3 3 1 3 3 3 3 1 1 3 3 3 3 3 1]\n"
]
}
],
"source": [
"print (y['class'])\n",
"print (model.labels_)\n",
"print (predY)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.16853932584269662"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Performance Metrics\n",
"sm.accuracy_score(y, predY)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0, 46, 13],\n",
" [50, 1, 20],\n",
" [19, 0, 29]])"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Confusion Matrix\n",
"sm.confusion_matrix(y, predY)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [Root]",
"language": "python",
"name": "Python [Root]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment