Created
June 4, 2024 21:13
-
-
Save alonsosilvaallende/8ebba7981dcf03c5e446f6532590a308 to your computer and use it in GitHub Desktop.
homework_xai_2024-06.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyMfpJtAKfgHArMiVYVR5iXV", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/alonsosilvaallende/8ebba7981dcf03c5e446f6532590a308/homework_xai_2024-06.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%pip install --quiet shap" | |
], | |
"metadata": { | |
"id": "z_c_2ODBgoU-" | |
}, | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"id": "Hmk5CBr_e9iL" | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"# Set the file URL and filename\n", | |
"url = 'https://archive.ics.uci.edu/ml/' \\\n", | |
"'machine-learning-databases/' \\\n", | |
"'wine-quality/winequality-white.csv'\n", | |
"file_name = 'wine.csv'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"try:\n", | |
" wine = pd.read_csv(file_name)\n", | |
"except FileNotFoundError:\n", | |
" print(f'Downloading {file_name} from {url}...')\n", | |
" wine = pd.read_csv(url, sep=\";\")\n", | |
" wine.to_csv(file_name, index=False)\n", | |
" print('Download complete!')" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "aiFeYP00faga", | |
"outputId": "17ebff03-f6fd-47f7-eb5f-16073a1aec09" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Downloading wine.csv from https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv...\n", | |
"Download complete!\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"wine.head()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 223 | |
}, | |
"id": "9qripkm70rCg", | |
"outputId": "952efcd6-383f-4427-c4b2-8e70724b74f1" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", | |
"0 7.0 0.27 0.36 20.7 0.045 \n", | |
"1 6.3 0.30 0.34 1.6 0.049 \n", | |
"2 8.1 0.28 0.40 6.9 0.050 \n", | |
"3 7.2 0.23 0.32 8.5 0.058 \n", | |
"4 7.2 0.23 0.32 8.5 0.058 \n", | |
"\n", | |
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", | |
"0 45.0 170.0 1.0010 3.00 0.45 \n", | |
"1 14.0 132.0 0.9940 3.30 0.49 \n", | |
"2 30.0 97.0 0.9951 3.26 0.44 \n", | |
"3 47.0 186.0 0.9956 3.19 0.40 \n", | |
"4 47.0 186.0 0.9956 3.19 0.40 \n", | |
"\n", | |
" alcohol quality \n", | |
"0 8.8 6 \n", | |
"1 9.5 6 \n", | |
"2 10.1 6 \n", | |
"3 9.9 6 \n", | |
"4 9.9 6 " | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-58fe29bd-8a10-4c4f-a588-c14b9ecccbe0\" class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>fixed acidity</th>\n", | |
" <th>volatile acidity</th>\n", | |
" <th>citric acid</th>\n", | |
" <th>residual sugar</th>\n", | |
" <th>chlorides</th>\n", | |
" <th>free sulfur dioxide</th>\n", | |
" <th>total sulfur dioxide</th>\n", | |
" <th>density</th>\n", | |
" <th>pH</th>\n", | |
" <th>sulphates</th>\n", | |
" <th>alcohol</th>\n", | |
" <th>quality</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>7.0</td>\n", | |
" <td>0.27</td>\n", | |
" <td>0.36</td>\n", | |
" <td>20.7</td>\n", | |
" <td>0.045</td>\n", | |
" <td>45.0</td>\n", | |
" <td>170.0</td>\n", | |
" <td>1.0010</td>\n", | |
" <td>3.00</td>\n", | |
" <td>0.45</td>\n", | |
" <td>8.8</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>6.3</td>\n", | |
" <td>0.30</td>\n", | |
" <td>0.34</td>\n", | |
" <td>1.6</td>\n", | |
" <td>0.049</td>\n", | |
" <td>14.0</td>\n", | |
" <td>132.0</td>\n", | |
" <td>0.9940</td>\n", | |
" <td>3.30</td>\n", | |
" <td>0.49</td>\n", | |
" <td>9.5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>8.1</td>\n", | |
" <td>0.28</td>\n", | |
" <td>0.40</td>\n", | |
" <td>6.9</td>\n", | |
" <td>0.050</td>\n", | |
" <td>30.0</td>\n", | |
" <td>97.0</td>\n", | |
" <td>0.9951</td>\n", | |
" <td>3.26</td>\n", | |
" <td>0.44</td>\n", | |
" <td>10.1</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>7.2</td>\n", | |
" <td>0.23</td>\n", | |
" <td>0.32</td>\n", | |
" <td>8.5</td>\n", | |
" <td>0.058</td>\n", | |
" <td>47.0</td>\n", | |
" <td>186.0</td>\n", | |
" <td>0.9956</td>\n", | |
" <td>3.19</td>\n", | |
" <td>0.40</td>\n", | |
" <td>9.9</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>7.2</td>\n", | |
" <td>0.23</td>\n", | |
" <td>0.32</td>\n", | |
" <td>8.5</td>\n", | |
" <td>0.058</td>\n", | |
" <td>47.0</td>\n", | |
" <td>186.0</td>\n", | |
" <td>0.9956</td>\n", | |
" <td>3.19</td>\n", | |
" <td>0.40</td>\n", | |
" <td>9.9</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <div class=\"colab-df-buttons\">\n", | |
"\n", | |
" <div class=\"colab-df-container\">\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-58fe29bd-8a10-4c4f-a588-c14b9ecccbe0')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
"\n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" .colab-df-buttons div {\n", | |
" margin-bottom: 4px;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-58fe29bd-8a10-4c4f-a588-c14b9ecccbe0 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-58fe29bd-8a10-4c4f-a588-c14b9ecccbe0');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
"\n", | |
"\n", | |
"<div id=\"df-0642b497-460d-4c89-882b-36674f633424\">\n", | |
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-0642b497-460d-4c89-882b-36674f633424')\"\n", | |
" title=\"Suggest charts\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <g>\n", | |
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
" </g>\n", | |
"</svg>\n", | |
" </button>\n", | |
"\n", | |
"<style>\n", | |
" .colab-df-quickchart {\n", | |
" --bg-color: #E8F0FE;\n", | |
" --fill-color: #1967D2;\n", | |
" --hover-bg-color: #E2EBFA;\n", | |
" --hover-fill-color: #174EA6;\n", | |
" --disabled-fill-color: #AAA;\n", | |
" --disabled-bg-color: #DDD;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-quickchart {\n", | |
" --bg-color: #3B4455;\n", | |
" --fill-color: #D2E3FC;\n", | |
" --hover-bg-color: #434B5C;\n", | |
" --hover-fill-color: #FFFFFF;\n", | |
" --disabled-bg-color: #3B4455;\n", | |
" --disabled-fill-color: #666;\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart {\n", | |
" background-color: var(--bg-color);\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: var(--fill-color);\n", | |
" height: 32px;\n", | |
" padding: 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart:hover {\n", | |
" background-color: var(--hover-bg-color);\n", | |
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: var(--button-hover-fill-color);\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart-complete:disabled,\n", | |
" .colab-df-quickchart-complete:disabled:hover {\n", | |
" background-color: var(--disabled-bg-color);\n", | |
" fill: var(--disabled-fill-color);\n", | |
" box-shadow: none;\n", | |
" }\n", | |
"\n", | |
" .colab-df-spinner {\n", | |
" border: 2px solid var(--fill-color);\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" animation:\n", | |
" spin 1s steps(1) infinite;\n", | |
" }\n", | |
"\n", | |
" @keyframes spin {\n", | |
" 0% {\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" border-left-color: var(--fill-color);\n", | |
" }\n", | |
" 20% {\n", | |
" border-color: transparent;\n", | |
" border-left-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" }\n", | |
" 30% {\n", | |
" border-color: transparent;\n", | |
" border-left-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" border-right-color: var(--fill-color);\n", | |
" }\n", | |
" 40% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" }\n", | |
" 60% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" }\n", | |
" 80% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" }\n", | |
" 90% {\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" }\n", | |
" }\n", | |
"</style>\n", | |
"\n", | |
" <script>\n", | |
" async function quickchart(key) {\n", | |
" const quickchartButtonEl =\n", | |
" document.querySelector('#' + key + ' button');\n", | |
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
" quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
" try {\n", | |
" const charts = await google.colab.kernel.invokeFunction(\n", | |
" 'suggestCharts', [key], {});\n", | |
" } catch (error) {\n", | |
" console.error('Error during call to suggestCharts:', error);\n", | |
" }\n", | |
" quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
" }\n", | |
" (() => {\n", | |
" let quickchartButtonEl =\n", | |
" document.querySelector('#df-0642b497-460d-4c89-882b-36674f633424 button');\n", | |
" quickchartButtonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
" })();\n", | |
" </script>\n", | |
"</div>\n", | |
" </div>\n", | |
" </div>\n" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "dataframe", | |
"variable_name": "wine", | |
"summary": "{\n \"name\": \"wine\",\n \"rows\": 4898,\n \"fields\": [\n {\n \"column\": \"fixed acidity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.843868227687513,\n \"min\": 3.8,\n \"max\": 14.2,\n \"num_unique_values\": 68,\n \"samples\": [\n 10.3,\n 5.8,\n 6.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"volatile acidity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10079454842486534,\n \"min\": 0.08,\n \"max\": 1.1,\n \"num_unique_values\": 125,\n \"samples\": [\n 0.14,\n 0.595,\n 0.13\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"citric acid\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12101980420298249,\n \"min\": 0.0,\n \"max\": 1.66,\n \"num_unique_values\": 87,\n \"samples\": [\n 0.64,\n 0.36,\n 0.24\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"residual sugar\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.072057784014881,\n \"min\": 0.6,\n \"max\": 65.8,\n \"num_unique_values\": 310,\n \"samples\": [\n 15.5,\n 19.25,\n 3.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chlorides\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.021847968093728798,\n \"min\": 0.009,\n \"max\": 0.346,\n \"num_unique_values\": 160,\n \"samples\": [\n 0.167,\n 0.133,\n 0.015\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"free sulfur dioxide\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17.00713732523259,\n \"min\": 2.0,\n \"max\": 289.0,\n \"num_unique_values\": 132,\n \"samples\": [\n 24.0,\n 122.5,\n 7.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total sulfur dioxide\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 42.49806455414291,\n \"min\": 9.0,\n \"max\": 440.0,\n \"num_unique_values\": 251,\n \"samples\": [\n 260.0,\n 63.0,\n 70.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"density\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0029909069169369337,\n \"min\": 0.98711,\n \"max\": 1.03898,\n \"num_unique_values\": 890,\n \"samples\": [\n 0.99362,\n 0.99388,\n 0.9929\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pH\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1510005996150668,\n \"min\": 2.72,\n \"max\": 3.82,\n \"num_unique_values\": 103,\n \"samples\": [\n 3.34,\n 3.41,\n 3.49\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sulphates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1141258339488323,\n \"min\": 0.22,\n \"max\": 1.08,\n \"num_unique_values\": 79,\n \"samples\": [\n 0.41,\n 0.45,\n 0.46\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alcohol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.230620567757318,\n \"min\": 8.0,\n \"max\": 14.2,\n \"num_unique_values\": 103,\n \"samples\": [\n 12.6,\n 11.3666666666667,\n 10.0333333333333\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"quality\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 3,\n \"max\": 9,\n \"num_unique_values\": 7,\n \"samples\": [\n 6,\n 5,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"wine[\"quality\"].unique()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "-hiYGT3874Pm", | |
"outputId": "3de32a82-08fb-4c39-967c-3d133b0bf1cd" | |
}, | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([6, 5, 7, 8, 4, 3, 9])" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"wine.describe().transpose().round(2).drop(columns=\"count\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 425 | |
}, | |
"id": "H103r4Sv36r0", | |
"outputId": "d8a079d9-1576-4f83-eddb-6770451e799f" | |
}, | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" mean std min 25% 50% 75% max\n", | |
"fixed acidity 6.85 0.84 3.80 6.30 6.80 7.30 14.20\n", | |
"volatile acidity 0.28 0.10 0.08 0.21 0.26 0.32 1.10\n", | |
"citric acid 0.33 0.12 0.00 0.27 0.32 0.39 1.66\n", | |
"residual sugar 6.39 5.07 0.60 1.70 5.20 9.90 65.80\n", | |
"chlorides 0.05 0.02 0.01 0.04 0.04 0.05 0.35\n", | |
"free sulfur dioxide 35.31 17.01 2.00 23.00 34.00 46.00 289.00\n", | |
"total sulfur dioxide 138.36 42.50 9.00 108.00 134.00 167.00 440.00\n", | |
"density 0.99 0.00 0.99 0.99 0.99 1.00 1.04\n", | |
"pH 3.19 0.15 2.72 3.09 3.18 3.28 3.82\n", | |
"sulphates 0.49 0.11 0.22 0.41 0.47 0.55 1.08\n", | |
"alcohol 10.51 1.23 8.00 9.50 10.40 11.40 14.20\n", | |
"quality 5.88 0.89 3.00 5.00 6.00 6.00 9.00" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-d37f35ce-05fd-4af3-97fc-070431dcf4af\" class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mean</th>\n", | |
" <th>std</th>\n", | |
" <th>min</th>\n", | |
" <th>25%</th>\n", | |
" <th>50%</th>\n", | |
" <th>75%</th>\n", | |
" <th>max</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>fixed acidity</th>\n", | |
" <td>6.85</td>\n", | |
" <td>0.84</td>\n", | |
" <td>3.80</td>\n", | |
" <td>6.30</td>\n", | |
" <td>6.80</td>\n", | |
" <td>7.30</td>\n", | |
" <td>14.20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>volatile acidity</th>\n", | |
" <td>0.28</td>\n", | |
" <td>0.10</td>\n", | |
" <td>0.08</td>\n", | |
" <td>0.21</td>\n", | |
" <td>0.26</td>\n", | |
" <td>0.32</td>\n", | |
" <td>1.10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>citric acid</th>\n", | |
" <td>0.33</td>\n", | |
" <td>0.12</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.27</td>\n", | |
" <td>0.32</td>\n", | |
" <td>0.39</td>\n", | |
" <td>1.66</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>residual sugar</th>\n", | |
" <td>6.39</td>\n", | |
" <td>5.07</td>\n", | |
" <td>0.60</td>\n", | |
" <td>1.70</td>\n", | |
" <td>5.20</td>\n", | |
" <td>9.90</td>\n", | |
" <td>65.80</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>chlorides</th>\n", | |
" <td>0.05</td>\n", | |
" <td>0.02</td>\n", | |
" <td>0.01</td>\n", | |
" <td>0.04</td>\n", | |
" <td>0.04</td>\n", | |
" <td>0.05</td>\n", | |
" <td>0.35</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>free sulfur dioxide</th>\n", | |
" <td>35.31</td>\n", | |
" <td>17.01</td>\n", | |
" <td>2.00</td>\n", | |
" <td>23.00</td>\n", | |
" <td>34.00</td>\n", | |
" <td>46.00</td>\n", | |
" <td>289.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>total sulfur dioxide</th>\n", | |
" <td>138.36</td>\n", | |
" <td>42.50</td>\n", | |
" <td>9.00</td>\n", | |
" <td>108.00</td>\n", | |
" <td>134.00</td>\n", | |
" <td>167.00</td>\n", | |
" <td>440.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>density</th>\n", | |
" <td>0.99</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.99</td>\n", | |
" <td>0.99</td>\n", | |
" <td>0.99</td>\n", | |
" <td>1.00</td>\n", | |
" <td>1.04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>pH</th>\n", | |
" <td>3.19</td>\n", | |
" <td>0.15</td>\n", | |
" <td>2.72</td>\n", | |
" <td>3.09</td>\n", | |
" <td>3.18</td>\n", | |
" <td>3.28</td>\n", | |
" <td>3.82</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sulphates</th>\n", | |
" <td>0.49</td>\n", | |
" <td>0.11</td>\n", | |
" <td>0.22</td>\n", | |
" <td>0.41</td>\n", | |
" <td>0.47</td>\n", | |
" <td>0.55</td>\n", | |
" <td>1.08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>alcohol</th>\n", | |
" <td>10.51</td>\n", | |
" <td>1.23</td>\n", | |
" <td>8.00</td>\n", | |
" <td>9.50</td>\n", | |
" <td>10.40</td>\n", | |
" <td>11.40</td>\n", | |
" <td>14.20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>quality</th>\n", | |
" <td>5.88</td>\n", | |
" <td>0.89</td>\n", | |
" <td>3.00</td>\n", | |
" <td>5.00</td>\n", | |
" <td>6.00</td>\n", | |
" <td>6.00</td>\n", | |
" <td>9.00</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <div class=\"colab-df-buttons\">\n", | |
"\n", | |
" <div class=\"colab-df-container\">\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d37f35ce-05fd-4af3-97fc-070431dcf4af')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
"\n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" .colab-df-buttons div {\n", | |
" margin-bottom: 4px;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-d37f35ce-05fd-4af3-97fc-070431dcf4af button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-d37f35ce-05fd-4af3-97fc-070431dcf4af');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
"\n", | |
"\n", | |
"<div id=\"df-a441b647-864f-4519-addd-bfede96ab66e\">\n", | |
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a441b647-864f-4519-addd-bfede96ab66e')\"\n", | |
" title=\"Suggest charts\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <g>\n", | |
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
" </g>\n", | |
"</svg>\n", | |
" </button>\n", | |
"\n", | |
"<style>\n", | |
" .colab-df-quickchart {\n", | |
" --bg-color: #E8F0FE;\n", | |
" --fill-color: #1967D2;\n", | |
" --hover-bg-color: #E2EBFA;\n", | |
" --hover-fill-color: #174EA6;\n", | |
" --disabled-fill-color: #AAA;\n", | |
" --disabled-bg-color: #DDD;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-quickchart {\n", | |
" --bg-color: #3B4455;\n", | |
" --fill-color: #D2E3FC;\n", | |
" --hover-bg-color: #434B5C;\n", | |
" --hover-fill-color: #FFFFFF;\n", | |
" --disabled-bg-color: #3B4455;\n", | |
" --disabled-fill-color: #666;\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart {\n", | |
" background-color: var(--bg-color);\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: var(--fill-color);\n", | |
" height: 32px;\n", | |
" padding: 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart:hover {\n", | |
" background-color: var(--hover-bg-color);\n", | |
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: var(--button-hover-fill-color);\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart-complete:disabled,\n", | |
" .colab-df-quickchart-complete:disabled:hover {\n", | |
" background-color: var(--disabled-bg-color);\n", | |
" fill: var(--disabled-fill-color);\n", | |
" box-shadow: none;\n", | |
" }\n", | |
"\n", | |
" .colab-df-spinner {\n", | |
" border: 2px solid var(--fill-color);\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" animation:\n", | |
" spin 1s steps(1) infinite;\n", | |
" }\n", | |
"\n", | |
" @keyframes spin {\n", | |
" 0% {\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" border-left-color: var(--fill-color);\n", | |
" }\n", | |
" 20% {\n", | |
" border-color: transparent;\n", | |
" border-left-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" }\n", | |
" 30% {\n", | |
" border-color: transparent;\n", | |
" border-left-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" border-right-color: var(--fill-color);\n", | |
" }\n", | |
" 40% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" }\n", | |
" 60% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" }\n", | |
" 80% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" }\n", | |
" 90% {\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" }\n", | |
" }\n", | |
"</style>\n", | |
"\n", | |
" <script>\n", | |
" async function quickchart(key) {\n", | |
" const quickchartButtonEl =\n", | |
" document.querySelector('#' + key + ' button');\n", | |
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
" quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
" try {\n", | |
" const charts = await google.colab.kernel.invokeFunction(\n", | |
" 'suggestCharts', [key], {});\n", | |
" } catch (error) {\n", | |
" console.error('Error during call to suggestCharts:', error);\n", | |
" }\n", | |
" quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
" }\n", | |
" (() => {\n", | |
" let quickchartButtonEl =\n", | |
" document.querySelector('#df-a441b647-864f-4519-addd-bfede96ab66e button');\n", | |
" quickchartButtonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
" })();\n", | |
" </script>\n", | |
"</div>\n", | |
" </div>\n", | |
" </div>\n" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "dataframe", | |
"summary": "{\n \"name\": \"wine\",\n \"rows\": 12,\n \"fields\": [\n {\n \"column\": \"mean\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 39.320887916159165,\n \"min\": 0.05,\n \"max\": 138.36,\n \"num_unique_values\": 12,\n \"samples\": [\n 10.51,\n 0.49,\n 6.85\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"std\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12.571895785296807,\n \"min\": 0.0,\n \"max\": 42.5,\n \"num_unique_values\": 12,\n \"samples\": [\n 1.23,\n 0.11,\n 0.84\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"min\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.076578678278253,\n \"min\": 0.0,\n \"max\": 9.0,\n \"num_unique_values\": 12,\n \"samples\": [\n 8.0,\n 0.22,\n 3.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"25%\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30.55227992038128,\n \"min\": 0.04,\n \"max\": 108.0,\n \"num_unique_values\": 12,\n \"samples\": [\n 9.5,\n 0.41,\n 6.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"50%\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 38.08128710105171,\n \"min\": 0.04,\n \"max\": 134.0,\n \"num_unique_values\": 12,\n \"samples\": [\n 10.4,\n 0.47,\n 6.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"75%\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 47.665958781033154,\n \"min\": 0.05,\n \"max\": 167.0,\n \"num_unique_values\": 12,\n \"samples\": [\n 11.4,\n 0.55,\n 7.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"max\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 142.37617212915762,\n \"min\": 0.35,\n \"max\": 440.0,\n \"num_unique_values\": 11,\n \"samples\": [\n 289.0,\n 14.2,\n 1.08\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 6 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"# Extract the target variable (wine quality) from the data\n", | |
"y = wine['quality']\n", | |
"X = wine.drop('quality', axis=1)\n", | |
"X_train, X_test, y_train, y_test = train_test_split(\n", | |
" X, y, test_size=0.2, random_state=42\n", | |
")" | |
], | |
"metadata": { | |
"id": "CFBayYQ-fzHG" | |
}, | |
"execution_count": 7, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment