executed · December 14, 2021 09:57
diff --git a/lab2_ml.ipynb b/lab2_ml.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "lab2_ML.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyPjUzHROzAv1kxW6FzZDrLc",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/executed/80bd8ad250e460dc07eb8caeed8d4d16/lab2_ml.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Importing the Libraries"
      ],
      "metadata": {
        "id": "TIugzd00K2sQ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "from sklearn.metrics import confusion_matrix\n",
        "from sklearn.metrics import accuracy_score \n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.naive_bayes import GaussianNB\n",
        "from sklearn.preprocessing import StandardScaler"
      ],
      "metadata": {
        "id": "ZybPdN33K1BC"
      },
      "execution_count": 33,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Naive Bayes Classification on Iris dataset"
      ],
      "metadata": {
        "id": "JCnY0xK0KrvL"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 34,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "D_44JWCSKjYL",
        "outputId": "cb50981f-0493-47ab-9356-e8ff0d70e4d1"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>sepal_length</th>\n",
              "      <th>sepal_width</th>\n",
              "      <th>petal_length</th>\n",
              "      <th>petal_width</th>\n",
              "      <th>species</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>5.1</td>\n",
              "      <td>3.5</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>4.9</td>\n",
              "      <td>3.0</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>4.7</td>\n",
              "      <td>3.2</td>\n",
              "      <td>1.3</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>4.6</td>\n",
              "      <td>3.1</td>\n",
              "      <td>1.5</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>5.0</td>\n",
              "      <td>3.6</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   sepal_length  sepal_width  petal_length  petal_width species\n",
              "0           5.1          3.5           1.4          0.2  setosa\n",
              "1           4.9          3.0           1.4          0.2  setosa\n",
              "2           4.7          3.2           1.3          0.2  setosa\n",
              "3           4.6          3.1           1.5          0.2  setosa\n",
              "4           5.0          3.6           1.4          0.2  setosa"
            ]
          },
          "metadata": {},
          "execution_count": 34
        }
      ],
      "source": [
        "dataset = pd.read_csv('https://raw.githubusercontent.com/mk-gurucharan/Classification/master/IrisDataset.csv')\n",
        "\n",
        "X = dataset.iloc[:,:4].values\n",
        "y = dataset['species'].values\n",
        "\n",
        "dataset.head(5)"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Splitting the dataset into the Training set and Test set"
      ],
      "metadata": {
        "id": "p5XnMpc_LAsp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)"
      ],
      "metadata": {
        "id": "ye1gWl5aLCM8"
      },
      "execution_count": 35,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Feature Scaling"
      ],
      "metadata": {
        "id": "cilubRzpLFir"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "sc = StandardScaler()\n",
        "X_train = sc.fit_transform(X_train)\n",
        "X_test = sc.transform(X_test)"
      ],
      "metadata": {
        "id": "Cej4uIYzLHAU"
      },
      "execution_count": 36,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Training the Naive Bayes Classification model on the Training Set"
      ],
      "metadata": {
        "id": "6LItoI0HLL3q"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "classifier = GaussianNB()\n",
        "classifier.fit(X_train, y_train)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "kKbDSNH3LOLu",
        "outputId": "3cd647ea-4d53-4b3b-e4e6-d684e15f70cd"
      },
      "execution_count": 37,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "GaussianNB()"
            ]
          },
          "metadata": {},
          "execution_count": 37
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Predicting the Test set results"
      ],
      "metadata": {
        "id": "drasHvdPLQTo"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "y_pred = classifier.predict(X_test) \n",
        "y_pred"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "LdFuzce5LSSG",
        "outputId": "89d86c70-e1d7-48a1-eca1-3e405a447409"
      },
      "execution_count": 38,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array(['virginica', 'virginica', 'versicolor', 'setosa', 'versicolor',\n",
              "       'versicolor', 'versicolor', 'setosa', 'versicolor', 'virginica',\n",
              "       'setosa', 'versicolor', 'versicolor', 'virginica', 'versicolor',\n",
              "       'versicolor', 'virginica', 'virginica', 'setosa', 'setosa',\n",
              "       'versicolor', 'versicolor', 'setosa', 'virginica', 'virginica',\n",
              "       'versicolor', 'setosa', 'setosa', 'versicolor', 'setosa'],\n",
              "      dtype='<U10')"
            ]
          },
          "metadata": {},
          "execution_count": 38
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Confusion Matrix and Accuracy"
      ],
      "metadata": {
        "id": "O4qEPDsLLWwh"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "cm = confusion_matrix(y_test, y_pred)\n",
        "\n",
        "print (\"Accuracy : \", accuracy_score(y_test, y_pred))\n",
        "cm"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "POZWELk4LXMI",
        "outputId": "81bf9715-3b78-43d1-bb9f-289678f3cdc0"
      },
      "execution_count": 39,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy :  0.8666666666666667\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([[ 9,  0,  0],\n",
              "       [ 0, 11,  2],\n",
              "       [ 0,  2,  6]])"
            ]
          },
          "metadata": {},
          "execution_count": 39
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Comparing the Real Values with Predicted Values"
      ],
      "metadata": {
        "id": "nMU65x8jLpgW"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df = pd.DataFrame({'Real Values':y_test, 'Predicted Values':y_pred})\n",
        "df"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 990
        },
        "id": "AmM4bJS5Lp94",
        "outputId": "af6edc49-41bc-42d6-8698-f717f559c7e2"
      },
      "execution_count": 40,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Real Values</th>\n",
              "      <th>Predicted Values</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>virginica</td>\n",
              "      <td>virginica</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>virginica</td>\n",
              "      <td>virginica</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>virginica</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>virginica</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>virginica</td>\n",
              "      <td>virginica</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>virginica</td>\n",
              "      <td>virginica</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>virginica</td>\n",
              "      <td>virginica</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>virginica</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>21</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>22</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>23</th>\n",
              "      <td>virginica</td>\n",
              "      <td>virginica</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>24</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>virginica</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>26</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>27</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>28</th>\n",
              "      <td>versicolor</td>\n",
              "      <td>versicolor</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>29</th>\n",
              "      <td>setosa</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Real Values Predicted Values\n",
              "0    virginica        virginica\n",
              "1    virginica        virginica\n",
              "2   versicolor       versicolor\n",
              "3       setosa           setosa\n",
              "4    virginica       versicolor\n",
              "5   versicolor       versicolor\n",
              "6   versicolor       versicolor\n",
              "7       setosa           setosa\n",
              "8    virginica       versicolor\n",
              "9    virginica        virginica\n",
              "10      setosa           setosa\n",
              "11  versicolor       versicolor\n",
              "12  versicolor       versicolor\n",
              "13   virginica        virginica\n",
              "14  versicolor       versicolor\n",
              "15  versicolor       versicolor\n",
              "16   virginica        virginica\n",
              "17  versicolor        virginica\n",
              "18      setosa           setosa\n",
              "19      setosa           setosa\n",
              "20  versicolor       versicolor\n",
              "21  versicolor       versicolor\n",
              "22      setosa           setosa\n",
              "23   virginica        virginica\n",
              "24  versicolor        virginica\n",
              "25  versicolor       versicolor\n",
              "26      setosa           setosa\n",
              "27      setosa           setosa\n",
              "28  versicolor       versicolor\n",
              "29      setosa           setosa"
            ]
          },
          "metadata": {},
          "execution_count": 40
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Pima dataset"
      ],
      "metadata": {
        "id": "9N6LtQTESUP6"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset = pd.read_csv('https://drive.google.com/uc?id=1aQWzKF2sn8DSy2c9B66C-FFlichp9sv8', header=0)\n",
        "\n",
        "dataset.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "BN1EgOeCSWPY",
        "outputId": "eb8c9841-e179-43f2-c4ab-af5902b138e4"
      },
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>preg</th>\n",
              "      <th>plasma</th>\n",
              "      <th>bpress</th>\n",
              "      <th>triceps_thick</th>\n",
              "      <th>insulin</th>\n",
              "      <th>BMI</th>\n",
              "      <th>pedigree_fun</th>\n",
              "      <th>age</th>\n",
              "      <th>class</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>6</td>\n",
              "      <td>148</td>\n",
              "      <td>72</td>\n",
              "      <td>35</td>\n",
              "      <td>0</td>\n",
              "      <td>33.6</td>\n",
              "      <td>0.627</td>\n",
              "      <td>50</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>85</td>\n",
              "      <td>66</td>\n",
              "      <td>29</td>\n",
              "      <td>0</td>\n",
              "      <td>26.6</td>\n",
              "      <td>0.351</td>\n",
              "      <td>31</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>8</td>\n",
              "      <td>183</td>\n",
              "      <td>64</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>23.3</td>\n",
              "      <td>0.672</td>\n",
              "      <td>32</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>89</td>\n",
              "      <td>66</td>\n",
              "      <td>23</td>\n",
              "      <td>94</td>\n",
              "      <td>28.1</td>\n",
              "      <td>0.167</td>\n",
              "      <td>21</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0</td>\n",
              "      <td>137</td>\n",
              "      <td>40</td>\n",
              "      <td>35</td>\n",
              "      <td>168</td>\n",
              "      <td>43.1</td>\n",
              "      <td>2.288</td>\n",
              "      <td>33</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   preg  plasma  bpress  triceps_thick  insulin   BMI  pedigree_fun  age  class\n",
              "0     6     148      72             35        0  33.6         0.627   50      1\n",
              "1     1      85      66             29        0  26.6         0.351   31      0\n",
              "2     8     183      64              0        0  23.3         0.672   32      1\n",
              "3     1      89      66             23       94  28.1         0.167   21      0\n",
              "4     0     137      40             35      168  43.1         2.288   33      1"
            ]
          },
          "metadata": {},
          "execution_count": 41
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Describe data"
      ],
      "metadata": {
        "id": "7asDfrIMTzy0"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.describe()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 300
        },
        "id": "999s1jhzT1HR",
        "outputId": "0ab7f951-9242-47fd-801b-7f873dd81a71"
      },
      "execution_count": 42,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>preg</th>\n",
              "      <th>plasma</th>\n",
              "      <th>bpress</th>\n",
              "      <th>triceps_thick</th>\n",
              "      <th>insulin</th>\n",
              "      <th>BMI</th>\n",
              "      <th>pedigree_fun</th>\n",
              "      <th>age</th>\n",
              "      <th>class</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "      <td>768.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>3.845052</td>\n",
              "      <td>120.894531</td>\n",
              "      <td>69.105469</td>\n",
              "      <td>20.536458</td>\n",
              "      <td>79.799479</td>\n",
              "      <td>31.992578</td>\n",
              "      <td>0.471876</td>\n",
              "      <td>33.240885</td>\n",
              "      <td>0.348958</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>3.369578</td>\n",
              "      <td>31.972618</td>\n",
              "      <td>19.355807</td>\n",
              "      <td>15.952218</td>\n",
              "      <td>115.244002</td>\n",
              "      <td>7.884160</td>\n",
              "      <td>0.331329</td>\n",
              "      <td>11.760232</td>\n",
              "      <td>0.476951</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.078000</td>\n",
              "      <td>21.000000</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>1.000000</td>\n",
              "      <td>99.000000</td>\n",
              "      <td>62.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>27.300000</td>\n",
              "      <td>0.243750</td>\n",
              "      <td>24.000000</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>3.000000</td>\n",
              "      <td>117.000000</td>\n",
              "      <td>72.000000</td>\n",
              "      <td>23.000000</td>\n",
              "      <td>30.500000</td>\n",
              "      <td>32.000000</td>\n",
              "      <td>0.372500</td>\n",
              "      <td>29.000000</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>6.000000</td>\n",
              "      <td>140.250000</td>\n",
              "      <td>80.000000</td>\n",
              "      <td>32.000000</td>\n",
              "      <td>127.250000</td>\n",
              "      <td>36.600000</td>\n",
              "      <td>0.626250</td>\n",
              "      <td>41.000000</td>\n",
              "      <td>1.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>17.000000</td>\n",
              "      <td>199.000000</td>\n",
              "      <td>122.000000</td>\n",
              "      <td>99.000000</td>\n",
              "      <td>846.000000</td>\n",
              "      <td>67.100000</td>\n",
              "      <td>2.420000</td>\n",
              "      <td>81.000000</td>\n",
              "      <td>1.000000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "             preg      plasma      bpress  ...  pedigree_fun         age       class\n",
              "count  768.000000  768.000000  768.000000  ...    768.000000  768.000000  768.000000\n",
              "mean     3.845052  120.894531   69.105469  ...      0.471876   33.240885    0.348958\n",
              "std      3.369578   31.972618   19.355807  ...      0.331329   11.760232    0.476951\n",
              "min      0.000000    0.000000    0.000000  ...      0.078000   21.000000    0.000000\n",
              "25%      1.000000   99.000000   62.000000  ...      0.243750   24.000000    0.000000\n",
              "50%      3.000000  117.000000   72.000000  ...      0.372500   29.000000    0.000000\n",
              "75%      6.000000  140.250000   80.000000  ...      0.626250   41.000000    1.000000\n",
              "max     17.000000  199.000000  122.000000  ...      2.420000   81.000000    1.000000\n",
              "\n",
              "[8 rows x 9 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 42
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Split data"
      ],
      "metadata": {
        "id": "yO9trkCoT82t"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset.drop(\"class\", axis=1)\n",
        "y = dataset[[\"class\"]]\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)"
      ],
      "metadata": {
        "id": "fRDDKG-iT_aW"
      },
      "execution_count": 43,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Model"
      ],
      "metadata": {
        "id": "dPmrHAUKUrtd"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model = GaussianNB()\n",
        "\n",
        "model.fit(X_train, y_train)\n",
        "\n",
        "y_pred = model.predict(X_test)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "DqulwUdAUsfy",
        "outputId": "b651b982-1eb9-460c-9e2e-f9695f0c7ba6"
      },
      "execution_count": 48,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py:985: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
            "  y = column_or_1d(y, warn=True)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Evaluation"
      ],
      "metadata": {
        "id": "1eL1vGyNU1bp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(\"Train accuracy: \", accuracy_score(y_train, model.predict(X_train)))\n",
        "\n",
        "print(\"Test accuracy: \", accuracy_score(y_test, y_pred))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2b2lSxpEU3K8",
        "outputId": "2b36215f-f493-40a0-a635-bbe5982ee040"
      },
      "execution_count": 49,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Train accuracy:  0.7616387337057728\n",
            "Test accuracy:  0.7835497835497836\n"
          ]
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "lab2_ML.ipynb",
	"provenance": [],
	"collapsed_sections": [],
	"authorship_tag": "ABX9TyPjUzHROzAv1kxW6FzZDrLc",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/executed/80bd8ad250e460dc07eb8caeed8d4d16/lab2_ml.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Importing the Libraries"
	],
	"metadata": {
	"id": "TIugzd00K2sQ"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"import numpy as np\n",
	"import matplotlib.pyplot as plt\n",
	"import pandas as pd\n",
	"from sklearn.metrics import confusion_matrix\n",
	"from sklearn.metrics import accuracy_score \n",
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.naive_bayes import GaussianNB\n",
	"from sklearn.preprocessing import StandardScaler"
	],
	"metadata": {
	"id": "ZybPdN33K1BC"
	},
	"execution_count": 33,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"Naive Bayes Classification on Iris dataset"
	],
	"metadata": {
	"id": "JCnY0xK0KrvL"
	}
	},
	{
	"cell_type": "code",
	"execution_count": 34,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 206
	},
	"id": "D_44JWCSKjYL",
	"outputId": "cb50981f-0493-47ab-9356-e8ff0d70e4d1"
	},
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>sepal_length</th>\n",
	" <th>sepal_width</th>\n",
	" <th>petal_length</th>\n",
	" <th>petal_width</th>\n",
	" <th>species</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>5.1</td>\n",
	" <td>3.5</td>\n",
	" <td>1.4</td>\n",
	" <td>0.2</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>4.9</td>\n",
	" <td>3.0</td>\n",
	" <td>1.4</td>\n",
	" <td>0.2</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>4.7</td>\n",
	" <td>3.2</td>\n",
	" <td>1.3</td>\n",
	" <td>0.2</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4.6</td>\n",
	" <td>3.1</td>\n",
	" <td>1.5</td>\n",
	" <td>0.2</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5.0</td>\n",
	" <td>3.6</td>\n",
	" <td>1.4</td>\n",
	" <td>0.2</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" sepal_length sepal_width petal_length petal_width species\n",
	"0 5.1 3.5 1.4 0.2 setosa\n",
	"1 4.9 3.0 1.4 0.2 setosa\n",
	"2 4.7 3.2 1.3 0.2 setosa\n",
	"3 4.6 3.1 1.5 0.2 setosa\n",
	"4 5.0 3.6 1.4 0.2 setosa"
	]
	},
	"metadata": {},
	"execution_count": 34
	}
	],
	"source": [
	"dataset = pd.read_csv('https://raw.githubusercontent.com/mk-gurucharan/Classification/master/IrisDataset.csv')\n",
	"\n",
	"X = dataset.iloc[:,:4].values\n",
	"y = dataset['species'].values\n",
	"\n",
	"dataset.head(5)"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Splitting the dataset into the Training set and Test set"
	],
	"metadata": {
	"id": "p5XnMpc_LAsp"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"\n",
	"\n",
	"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)"
	],
	"metadata": {
	"id": "ye1gWl5aLCM8"
	},
	"execution_count": 35,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"Feature Scaling"
	],
	"metadata": {
	"id": "cilubRzpLFir"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"\n",
	"\n",
	"sc = StandardScaler()\n",
	"X_train = sc.fit_transform(X_train)\n",
	"X_test = sc.transform(X_test)"
	],
	"metadata": {
	"id": "Cej4uIYzLHAU"
	},
	"execution_count": 36,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"Training the Naive Bayes Classification model on the Training Set"
	],
	"metadata": {
	"id": "6LItoI0HLL3q"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"\n",
	"\n",
	"classifier = GaussianNB()\n",
	"classifier.fit(X_train, y_train)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "kKbDSNH3LOLu",
	"outputId": "3cd647ea-4d53-4b3b-e4e6-d684e15f70cd"
	},
	"execution_count": 37,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"GaussianNB()"
	]
	},
	"metadata": {},
	"execution_count": 37
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Predicting the Test set results"
	],
	"metadata": {
	"id": "drasHvdPLQTo"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"y_pred = classifier.predict(X_test) \n",
	"y_pred"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "LdFuzce5LSSG",
	"outputId": "89d86c70-e1d7-48a1-eca1-3e405a447409"
	},
	"execution_count": 38,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"array(['virginica', 'virginica', 'versicolor', 'setosa', 'versicolor',\n",
	" 'versicolor', 'versicolor', 'setosa', 'versicolor', 'virginica',\n",
	" 'setosa', 'versicolor', 'versicolor', 'virginica', 'versicolor',\n",
	" 'versicolor', 'virginica', 'virginica', 'setosa', 'setosa',\n",
	" 'versicolor', 'versicolor', 'setosa', 'virginica', 'virginica',\n",
	" 'versicolor', 'setosa', 'setosa', 'versicolor', 'setosa'],\n",
	" dtype='<U10')"
	]
	},
	"metadata": {},
	"execution_count": 38
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Confusion Matrix and Accuracy"
	],
	"metadata": {
	"id": "O4qEPDsLLWwh"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"cm = confusion_matrix(y_test, y_pred)\n",
	"\n",
	"print (\"Accuracy : \", accuracy_score(y_test, y_pred))\n",
	"cm"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "POZWELk4LXMI",
	"outputId": "81bf9715-3b78-43d1-bb9f-289678f3cdc0"
	},
	"execution_count": 39,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Accuracy : 0.8666666666666667\n"
	]
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"array([[ 9, 0, 0],\n",
	" [ 0, 11, 2],\n",
	" [ 0, 2, 6]])"
	]
	},
	"metadata": {},
	"execution_count": 39
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Comparing the Real Values with Predicted Values"
	],
	"metadata": {
	"id": "nMU65x8jLpgW"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"df = pd.DataFrame({'Real Values':y_test, 'Predicted Values':y_pred})\n",
	"df"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 990
	},
	"id": "AmM4bJS5Lp94",
	"outputId": "af6edc49-41bc-42d6-8698-f717f559c7e2"
	},
	"execution_count": 40,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Real Values</th>\n",
	" <th>Predicted Values</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>virginica</td>\n",
	" <td>virginica</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>virginica</td>\n",
	" <td>virginica</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>virginica</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <td>virginica</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <td>virginica</td>\n",
	" <td>virginica</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>10</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>11</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>12</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>13</th>\n",
	" <td>virginica</td>\n",
	" <td>virginica</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>14</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>15</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>16</th>\n",
	" <td>virginica</td>\n",
	" <td>virginica</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>17</th>\n",
	" <td>versicolor</td>\n",
	" <td>virginica</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>18</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>19</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>20</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>21</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>22</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>23</th>\n",
	" <td>virginica</td>\n",
	" <td>virginica</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>24</th>\n",
	" <td>versicolor</td>\n",
	" <td>virginica</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>25</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>26</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>27</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>28</th>\n",
	" <td>versicolor</td>\n",
	" <td>versicolor</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>29</th>\n",
	" <td>setosa</td>\n",
	" <td>setosa</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" Real Values Predicted Values\n",
	"0 virginica virginica\n",
	"1 virginica virginica\n",
	"2 versicolor versicolor\n",
	"3 setosa setosa\n",
	"4 virginica versicolor\n",
	"5 versicolor versicolor\n",
	"6 versicolor versicolor\n",
	"7 setosa setosa\n",
	"8 virginica versicolor\n",
	"9 virginica virginica\n",
	"10 setosa setosa\n",
	"11 versicolor versicolor\n",
	"12 versicolor versicolor\n",
	"13 virginica virginica\n",
	"14 versicolor versicolor\n",
	"15 versicolor versicolor\n",
	"16 virginica virginica\n",
	"17 versicolor virginica\n",
	"18 setosa setosa\n",
	"19 setosa setosa\n",
	"20 versicolor versicolor\n",
	"21 versicolor versicolor\n",
	"22 setosa setosa\n",
	"23 virginica virginica\n",
	"24 versicolor virginica\n",
	"25 versicolor versicolor\n",
	"26 setosa setosa\n",
	"27 setosa setosa\n",
	"28 versicolor versicolor\n",
	"29 setosa setosa"
	]
	},
	"metadata": {},
	"execution_count": 40
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Pima dataset"
	],
	"metadata": {
	"id": "9N6LtQTESUP6"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"dataset = pd.read_csv('https://drive.google.com/uc?id=1aQWzKF2sn8DSy2c9B66C-FFlichp9sv8', header=0)\n",
	"\n",
	"dataset.head()"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 206
	},
	"id": "BN1EgOeCSWPY",
	"outputId": "eb8c9841-e179-43f2-c4ab-af5902b138e4"
	},
	"execution_count": 41,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>preg</th>\n",
	" <th>plasma</th>\n",
	" <th>bpress</th>\n",
	" <th>triceps_thick</th>\n",
	" <th>insulin</th>\n",
	" <th>BMI</th>\n",
	" <th>pedigree_fun</th>\n",
	" <th>age</th>\n",
	" <th>class</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>6</td>\n",
	" <td>148</td>\n",
	" <td>72</td>\n",
	" <td>35</td>\n",
	" <td>0</td>\n",
	" <td>33.6</td>\n",
	" <td>0.627</td>\n",
	" <td>50</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1</td>\n",
	" <td>85</td>\n",
	" <td>66</td>\n",
	" <td>29</td>\n",
	" <td>0</td>\n",
	" <td>26.6</td>\n",
	" <td>0.351</td>\n",
	" <td>31</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>8</td>\n",
	" <td>183</td>\n",
	" <td>64</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>23.3</td>\n",
	" <td>0.672</td>\n",
	" <td>32</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>1</td>\n",
	" <td>89</td>\n",
	" <td>66</td>\n",
	" <td>23</td>\n",
	" <td>94</td>\n",
	" <td>28.1</td>\n",
	" <td>0.167</td>\n",
	" <td>21</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>0</td>\n",
	" <td>137</td>\n",
	" <td>40</td>\n",
	" <td>35</td>\n",
	" <td>168</td>\n",
	" <td>43.1</td>\n",
	" <td>2.288</td>\n",
	" <td>33</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" preg plasma bpress triceps_thick insulin BMI pedigree_fun age class\n",
	"0 6 148 72 35 0 33.6 0.627 50 1\n",
	"1 1 85 66 29 0 26.6 0.351 31 0\n",
	"2 8 183 64 0 0 23.3 0.672 32 1\n",
	"3 1 89 66 23 94 28.1 0.167 21 0\n",
	"4 0 137 40 35 168 43.1 2.288 33 1"
	]
	},
	"metadata": {},
	"execution_count": 41
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Describe data"
	],
	"metadata": {
	"id": "7asDfrIMTzy0"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"dataset.describe()"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 300
	},
	"id": "999s1jhzT1HR",
	"outputId": "0ab7f951-9242-47fd-801b-7f873dd81a71"
	},
	"execution_count": 42,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>preg</th>\n",
	" <th>plasma</th>\n",
	" <th>bpress</th>\n",
	" <th>triceps_thick</th>\n",
	" <th>insulin</th>\n",
	" <th>BMI</th>\n",
	" <th>pedigree_fun</th>\n",
	" <th>age</th>\n",
	" <th>class</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>count</th>\n",
	" <td>768.000000</td>\n",
	" <td>768.000000</td>\n",
	" <td>768.000000</td>\n",
	" <td>768.000000</td>\n",
	" <td>768.000000</td>\n",
	" <td>768.000000</td>\n",
	" <td>768.000000</td>\n",
	" <td>768.000000</td>\n",
	" <td>768.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>mean</th>\n",
	" <td>3.845052</td>\n",
	" <td>120.894531</td>\n",
	" <td>69.105469</td>\n",
	" <td>20.536458</td>\n",
	" <td>79.799479</td>\n",
	" <td>31.992578</td>\n",
	" <td>0.471876</td>\n",
	" <td>33.240885</td>\n",
	" <td>0.348958</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>std</th>\n",
	" <td>3.369578</td>\n",
	" <td>31.972618</td>\n",
	" <td>19.355807</td>\n",
	" <td>15.952218</td>\n",
	" <td>115.244002</td>\n",
	" <td>7.884160</td>\n",
	" <td>0.331329</td>\n",
	" <td>11.760232</td>\n",
	" <td>0.476951</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>min</th>\n",
	" <td>0.000000</td>\n",
	" <td>0.000000</td>\n",
	" <td>0.000000</td>\n",
	" <td>0.000000</td>\n",
	" <td>0.000000</td>\n",
	" <td>0.000000</td>\n",
	" <td>0.078000</td>\n",
	" <td>21.000000</td>\n",
	" <td>0.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>25%</th>\n",
	" <td>1.000000</td>\n",
	" <td>99.000000</td>\n",
	" <td>62.000000</td>\n",
	" <td>0.000000</td>\n",
	" <td>0.000000</td>\n",
	" <td>27.300000</td>\n",
	" <td>0.243750</td>\n",
	" <td>24.000000</td>\n",
	" <td>0.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>50%</th>\n",
	" <td>3.000000</td>\n",
	" <td>117.000000</td>\n",
	" <td>72.000000</td>\n",
	" <td>23.000000</td>\n",
	" <td>30.500000</td>\n",
	" <td>32.000000</td>\n",
	" <td>0.372500</td>\n",
	" <td>29.000000</td>\n",
	" <td>0.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>75%</th>\n",
	" <td>6.000000</td>\n",
	" <td>140.250000</td>\n",
	" <td>80.000000</td>\n",
	" <td>32.000000</td>\n",
	" <td>127.250000</td>\n",
	" <td>36.600000</td>\n",
	" <td>0.626250</td>\n",
	" <td>41.000000</td>\n",
	" <td>1.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>max</th>\n",
	" <td>17.000000</td>\n",
	" <td>199.000000</td>\n",
	" <td>122.000000</td>\n",
	" <td>99.000000</td>\n",
	" <td>846.000000</td>\n",
	" <td>67.100000</td>\n",
	" <td>2.420000</td>\n",
	" <td>81.000000</td>\n",
	" <td>1.000000</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" preg plasma bpress ... pedigree_fun age class\n",
	"count 768.000000 768.000000 768.000000 ... 768.000000 768.000000 768.000000\n",
	"mean 3.845052 120.894531 69.105469 ... 0.471876 33.240885 0.348958\n",
	"std 3.369578 31.972618 19.355807 ... 0.331329 11.760232 0.476951\n",
	"min 0.000000 0.000000 0.000000 ... 0.078000 21.000000 0.000000\n",
	"25% 1.000000 99.000000 62.000000 ... 0.243750 24.000000 0.000000\n",
	"50% 3.000000 117.000000 72.000000 ... 0.372500 29.000000 0.000000\n",
	"75% 6.000000 140.250000 80.000000 ... 0.626250 41.000000 1.000000\n",
	"max 17.000000 199.000000 122.000000 ... 2.420000 81.000000 1.000000\n",
	"\n",
	"[8 rows x 9 columns]"
	]
	},
	"metadata": {},
	"execution_count": 42
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Split data"
	],
	"metadata": {
	"id": "yO9trkCoT82t"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"X = dataset.drop(\"class\", axis=1)\n",
	"y = dataset[[\"class\"]]\n",
	"\n",
	"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)"
	],
	"metadata": {
	"id": "fRDDKG-iT_aW"
	},
	"execution_count": 43,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"Model"
	],
	"metadata": {
	"id": "dPmrHAUKUrtd"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"model = GaussianNB()\n",
	"\n",
	"model.fit(X_train, y_train)\n",
	"\n",
	"y_pred = model.predict(X_test)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "DqulwUdAUsfy",
	"outputId": "b651b982-1eb9-460c-9e2e-f9695f0c7ba6"
	},
	"execution_count": 48,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stderr",
	"text": [
	"/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py:985: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
	" y = column_or_1d(y, warn=True)\n"
	]
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Evaluation"
	],
	"metadata": {
	"id": "1eL1vGyNU1bp"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"print(\"Train accuracy: \", accuracy_score(y_train, model.predict(X_train)))\n",
	"\n",
	"print(\"Test accuracy: \", accuracy_score(y_test, y_pred))"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "2b2lSxpEU3K8",
	"outputId": "2b36215f-f493-40a0-a635-bbe5982ee040"
	},
	"execution_count": 49,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Train accuracy: 0.7616387337057728\n",
	"Test accuracy: 0.7835497835497836\n"
	]
	}
	]
	}
	]
	}