Created
May 27, 2023 03:07
-
-
Save SiddheshKukade/ec63222f4b78406e70031b89bbdf0569 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "8dc82e34", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "a4b5bd5b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" <th>Unnamed: 32</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>M</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>M</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>84300903</td>\n", | |
" <td>M</td>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.19740</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>...</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.14440</td>\n", | |
" <td>0.42450</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>84348301</td>\n", | |
" <td>M</td>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.24140</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>...</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.20980</td>\n", | |
" <td>0.86630</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>84358402</td>\n", | |
" <td>M</td>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.19800</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>...</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.13740</td>\n", | |
" <td>0.20500</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>564</th>\n", | |
" <td>926424</td>\n", | |
" <td>M</td>\n", | |
" <td>21.56</td>\n", | |
" <td>22.39</td>\n", | |
" <td>142.00</td>\n", | |
" <td>1479.0</td>\n", | |
" <td>0.11100</td>\n", | |
" <td>0.11590</td>\n", | |
" <td>0.24390</td>\n", | |
" <td>0.13890</td>\n", | |
" <td>...</td>\n", | |
" <td>26.40</td>\n", | |
" <td>166.10</td>\n", | |
" <td>2027.0</td>\n", | |
" <td>0.14100</td>\n", | |
" <td>0.21130</td>\n", | |
" <td>0.4107</td>\n", | |
" <td>0.2216</td>\n", | |
" <td>0.2060</td>\n", | |
" <td>0.07115</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>565</th>\n", | |
" <td>926682</td>\n", | |
" <td>M</td>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>...</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>926954</td>\n", | |
" <td>M</td>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>...</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567</th>\n", | |
" <td>927241</td>\n", | |
" <td>M</td>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>...</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>568</th>\n", | |
" <td>92751</td>\n", | |
" <td>B</td>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>569 rows × 33 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \n", | |
"0 842302 M 17.99 10.38 122.80 1001.0 \\\n", | |
"1 842517 M 20.57 17.77 132.90 1326.0 \n", | |
"2 84300903 M 19.69 21.25 130.00 1203.0 \n", | |
"3 84348301 M 11.42 20.38 77.58 386.1 \n", | |
"4 84358402 M 20.29 14.34 135.10 1297.0 \n", | |
".. ... ... ... ... ... ... \n", | |
"564 926424 M 21.56 22.39 142.00 1479.0 \n", | |
"565 926682 M 20.13 28.25 131.20 1261.0 \n", | |
"566 926954 M 16.60 28.08 108.30 858.1 \n", | |
"567 927241 M 20.60 29.33 140.10 1265.0 \n", | |
"568 92751 B 7.76 24.54 47.92 181.0 \n", | |
"\n", | |
" smoothness_mean compactness_mean concavity_mean concave points_mean \n", | |
"0 0.11840 0.27760 0.30010 0.14710 \\\n", | |
"1 0.08474 0.07864 0.08690 0.07017 \n", | |
"2 0.10960 0.15990 0.19740 0.12790 \n", | |
"3 0.14250 0.28390 0.24140 0.10520 \n", | |
"4 0.10030 0.13280 0.19800 0.10430 \n", | |
".. ... ... ... ... \n", | |
"564 0.11100 0.11590 0.24390 0.13890 \n", | |
"565 0.09780 0.10340 0.14400 0.09791 \n", | |
"566 0.08455 0.10230 0.09251 0.05302 \n", | |
"567 0.11780 0.27700 0.35140 0.15200 \n", | |
"568 0.05263 0.04362 0.00000 0.00000 \n", | |
"\n", | |
" ... texture_worst perimeter_worst area_worst smoothness_worst \n", | |
"0 ... 17.33 184.60 2019.0 0.16220 \\\n", | |
"1 ... 23.41 158.80 1956.0 0.12380 \n", | |
"2 ... 25.53 152.50 1709.0 0.14440 \n", | |
"3 ... 26.50 98.87 567.7 0.20980 \n", | |
"4 ... 16.67 152.20 1575.0 0.13740 \n", | |
".. ... ... ... ... ... \n", | |
"564 ... 26.40 166.10 2027.0 0.14100 \n", | |
"565 ... 38.25 155.00 1731.0 0.11660 \n", | |
"566 ... 34.12 126.70 1124.0 0.11390 \n", | |
"567 ... 39.42 184.60 1821.0 0.16500 \n", | |
"568 ... 30.37 59.16 268.6 0.08996 \n", | |
"\n", | |
" compactness_worst concavity_worst concave points_worst symmetry_worst \n", | |
"0 0.66560 0.7119 0.2654 0.4601 \\\n", | |
"1 0.18660 0.2416 0.1860 0.2750 \n", | |
"2 0.42450 0.4504 0.2430 0.3613 \n", | |
"3 0.86630 0.6869 0.2575 0.6638 \n", | |
"4 0.20500 0.4000 0.1625 0.2364 \n", | |
".. ... ... ... ... \n", | |
"564 0.21130 0.4107 0.2216 0.2060 \n", | |
"565 0.19220 0.3215 0.1628 0.2572 \n", | |
"566 0.30940 0.3403 0.1418 0.2218 \n", | |
"567 0.86810 0.9387 0.2650 0.4087 \n", | |
"568 0.06444 0.0000 0.0000 0.2871 \n", | |
"\n", | |
" fractal_dimension_worst Unnamed: 32 \n", | |
"0 0.11890 NaN \n", | |
"1 0.08902 NaN \n", | |
"2 0.08758 NaN \n", | |
"3 0.17300 NaN \n", | |
"4 0.07678 NaN \n", | |
".. ... ... \n", | |
"564 0.07115 NaN \n", | |
"565 0.06637 NaN \n", | |
"566 0.07820 NaN \n", | |
"567 0.12400 NaN \n", | |
"568 0.07039 NaN \n", | |
"\n", | |
"[569 rows x 33 columns]" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.read_csv('cancer_data_set_batch_3.csv')\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "350cb03e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(569, 33)" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "589c5f5e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',\n", | |
" 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',\n", | |
" 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',\n", | |
" 'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',\n", | |
" 'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',\n", | |
" 'fractal_dimension_se', 'radius_worst', 'texture_worst',\n", | |
" 'perimeter_worst', 'area_worst', 'smoothness_worst',\n", | |
" 'compactness_worst', 'concavity_worst', 'concave points_worst',\n", | |
" 'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "ca45cc7b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# ********************************* 1. Data Cleaning ******************************************" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "d8eef388", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"id 0\n", | |
"diagnosis 0\n", | |
"radius_mean 0\n", | |
"texture_mean 0\n", | |
"perimeter_mean 0\n", | |
"area_mean 0\n", | |
"smoothness_mean 0\n", | |
"compactness_mean 0\n", | |
"concavity_mean 0\n", | |
"concave points_mean 0\n", | |
"symmetry_mean 0\n", | |
"fractal_dimension_mean 0\n", | |
"radius_se 0\n", | |
"texture_se 0\n", | |
"perimeter_se 0\n", | |
"area_se 0\n", | |
"smoothness_se 0\n", | |
"compactness_se 0\n", | |
"concavity_se 0\n", | |
"concave points_se 0\n", | |
"symmetry_se 0\n", | |
"fractal_dimension_se 0\n", | |
"radius_worst 0\n", | |
"texture_worst 0\n", | |
"perimeter_worst 0\n", | |
"area_worst 0\n", | |
"smoothness_worst 0\n", | |
"compactness_worst 0\n", | |
"concavity_worst 0\n", | |
"concave points_worst 0\n", | |
"symmetry_worst 0\n", | |
"fractal_dimension_worst 0\n", | |
"Unnamed: 32 569\n", | |
"dtype: int64\n" | |
] | |
} | |
], | |
"source": [ | |
"print(df.isna().sum())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "f5a68d7a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Dropping the Unnamed column\n", | |
"df = df.drop([\"Unnamed: 32\"],axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "1fa408c7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>M</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>25.380</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>M</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>84300903</td>\n", | |
" <td>M</td>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.19740</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>...</td>\n", | |
" <td>23.570</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.14440</td>\n", | |
" <td>0.42450</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>84348301</td>\n", | |
" <td>M</td>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.24140</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>...</td>\n", | |
" <td>14.910</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.20980</td>\n", | |
" <td>0.86630</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>84358402</td>\n", | |
" <td>M</td>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.19800</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>...</td>\n", | |
" <td>22.540</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.13740</td>\n", | |
" <td>0.20500</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>564</th>\n", | |
" <td>926424</td>\n", | |
" <td>M</td>\n", | |
" <td>21.56</td>\n", | |
" <td>22.39</td>\n", | |
" <td>142.00</td>\n", | |
" <td>1479.0</td>\n", | |
" <td>0.11100</td>\n", | |
" <td>0.11590</td>\n", | |
" <td>0.24390</td>\n", | |
" <td>0.13890</td>\n", | |
" <td>...</td>\n", | |
" <td>25.450</td>\n", | |
" <td>26.40</td>\n", | |
" <td>166.10</td>\n", | |
" <td>2027.0</td>\n", | |
" <td>0.14100</td>\n", | |
" <td>0.21130</td>\n", | |
" <td>0.4107</td>\n", | |
" <td>0.2216</td>\n", | |
" <td>0.2060</td>\n", | |
" <td>0.07115</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>565</th>\n", | |
" <td>926682</td>\n", | |
" <td>M</td>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>...</td>\n", | |
" <td>23.690</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>926954</td>\n", | |
" <td>M</td>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>...</td>\n", | |
" <td>18.980</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567</th>\n", | |
" <td>927241</td>\n", | |
" <td>M</td>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>...</td>\n", | |
" <td>25.740</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>568</th>\n", | |
" <td>92751</td>\n", | |
" <td>B</td>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>9.456</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>569 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \n", | |
"0 842302 M 17.99 10.38 122.80 1001.0 \\\n", | |
"1 842517 M 20.57 17.77 132.90 1326.0 \n", | |
"2 84300903 M 19.69 21.25 130.00 1203.0 \n", | |
"3 84348301 M 11.42 20.38 77.58 386.1 \n", | |
"4 84358402 M 20.29 14.34 135.10 1297.0 \n", | |
".. ... ... ... ... ... ... \n", | |
"564 926424 M 21.56 22.39 142.00 1479.0 \n", | |
"565 926682 M 20.13 28.25 131.20 1261.0 \n", | |
"566 926954 M 16.60 28.08 108.30 858.1 \n", | |
"567 927241 M 20.60 29.33 140.10 1265.0 \n", | |
"568 92751 B 7.76 24.54 47.92 181.0 \n", | |
"\n", | |
" smoothness_mean compactness_mean concavity_mean concave points_mean \n", | |
"0 0.11840 0.27760 0.30010 0.14710 \\\n", | |
"1 0.08474 0.07864 0.08690 0.07017 \n", | |
"2 0.10960 0.15990 0.19740 0.12790 \n", | |
"3 0.14250 0.28390 0.24140 0.10520 \n", | |
"4 0.10030 0.13280 0.19800 0.10430 \n", | |
".. ... ... ... ... \n", | |
"564 0.11100 0.11590 0.24390 0.13890 \n", | |
"565 0.09780 0.10340 0.14400 0.09791 \n", | |
"566 0.08455 0.10230 0.09251 0.05302 \n", | |
"567 0.11780 0.27700 0.35140 0.15200 \n", | |
"568 0.05263 0.04362 0.00000 0.00000 \n", | |
"\n", | |
" ... radius_worst texture_worst perimeter_worst area_worst \n", | |
"0 ... 25.380 17.33 184.60 2019.0 \\\n", | |
"1 ... 24.990 23.41 158.80 1956.0 \n", | |
"2 ... 23.570 25.53 152.50 1709.0 \n", | |
"3 ... 14.910 26.50 98.87 567.7 \n", | |
"4 ... 22.540 16.67 152.20 1575.0 \n", | |
".. ... ... ... ... ... \n", | |
"564 ... 25.450 26.40 166.10 2027.0 \n", | |
"565 ... 23.690 38.25 155.00 1731.0 \n", | |
"566 ... 18.980 34.12 126.70 1124.0 \n", | |
"567 ... 25.740 39.42 184.60 1821.0 \n", | |
"568 ... 9.456 30.37 59.16 268.6 \n", | |
"\n", | |
" smoothness_worst compactness_worst concavity_worst \n", | |
"0 0.16220 0.66560 0.7119 \\\n", | |
"1 0.12380 0.18660 0.2416 \n", | |
"2 0.14440 0.42450 0.4504 \n", | |
"3 0.20980 0.86630 0.6869 \n", | |
"4 0.13740 0.20500 0.4000 \n", | |
".. ... ... ... \n", | |
"564 0.14100 0.21130 0.4107 \n", | |
"565 0.11660 0.19220 0.3215 \n", | |
"566 0.11390 0.30940 0.3403 \n", | |
"567 0.16500 0.86810 0.9387 \n", | |
"568 0.08996 0.06444 0.0000 \n", | |
"\n", | |
" concave points_worst symmetry_worst fractal_dimension_worst \n", | |
"0 0.2654 0.4601 0.11890 \n", | |
"1 0.1860 0.2750 0.08902 \n", | |
"2 0.2430 0.3613 0.08758 \n", | |
"3 0.2575 0.6638 0.17300 \n", | |
"4 0.1625 0.2364 0.07678 \n", | |
".. ... ... ... \n", | |
"564 0.2216 0.2060 0.07115 \n", | |
"565 0.1628 0.2572 0.06637 \n", | |
"566 0.1418 0.2218 0.07820 \n", | |
"567 0.2650 0.4087 0.12400 \n", | |
"568 0.0000 0.2871 0.07039 \n", | |
"\n", | |
"[569 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "14377433", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>M</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>25.380</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>M</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>84300903</td>\n", | |
" <td>M</td>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.19740</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>...</td>\n", | |
" <td>23.570</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.14440</td>\n", | |
" <td>0.42450</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>84348301</td>\n", | |
" <td>M</td>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.24140</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>...</td>\n", | |
" <td>14.910</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.20980</td>\n", | |
" <td>0.86630</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>84358402</td>\n", | |
" <td>M</td>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.19800</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>...</td>\n", | |
" <td>22.540</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.13740</td>\n", | |
" <td>0.20500</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>564</th>\n", | |
" <td>926424</td>\n", | |
" <td>M</td>\n", | |
" <td>21.56</td>\n", | |
" <td>22.39</td>\n", | |
" <td>142.00</td>\n", | |
" <td>1479.0</td>\n", | |
" <td>0.11100</td>\n", | |
" <td>0.11590</td>\n", | |
" <td>0.24390</td>\n", | |
" <td>0.13890</td>\n", | |
" <td>...</td>\n", | |
" <td>25.450</td>\n", | |
" <td>26.40</td>\n", | |
" <td>166.10</td>\n", | |
" <td>2027.0</td>\n", | |
" <td>0.14100</td>\n", | |
" <td>0.21130</td>\n", | |
" <td>0.4107</td>\n", | |
" <td>0.2216</td>\n", | |
" <td>0.2060</td>\n", | |
" <td>0.07115</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>565</th>\n", | |
" <td>926682</td>\n", | |
" <td>M</td>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>...</td>\n", | |
" <td>23.690</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>926954</td>\n", | |
" <td>M</td>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>...</td>\n", | |
" <td>18.980</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567</th>\n", | |
" <td>927241</td>\n", | |
" <td>M</td>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>...</td>\n", | |
" <td>25.740</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>568</th>\n", | |
" <td>92751</td>\n", | |
" <td>B</td>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>9.456</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>569 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \n", | |
"0 842302 M 17.99 10.38 122.80 1001.0 \\\n", | |
"1 842517 M 20.57 17.77 132.90 1326.0 \n", | |
"2 84300903 M 19.69 21.25 130.00 1203.0 \n", | |
"3 84348301 M 11.42 20.38 77.58 386.1 \n", | |
"4 84358402 M 20.29 14.34 135.10 1297.0 \n", | |
".. ... ... ... ... ... ... \n", | |
"564 926424 M 21.56 22.39 142.00 1479.0 \n", | |
"565 926682 M 20.13 28.25 131.20 1261.0 \n", | |
"566 926954 M 16.60 28.08 108.30 858.1 \n", | |
"567 927241 M 20.60 29.33 140.10 1265.0 \n", | |
"568 92751 B 7.76 24.54 47.92 181.0 \n", | |
"\n", | |
" smoothness_mean compactness_mean concavity_mean concave points_mean \n", | |
"0 0.11840 0.27760 0.30010 0.14710 \\\n", | |
"1 0.08474 0.07864 0.08690 0.07017 \n", | |
"2 0.10960 0.15990 0.19740 0.12790 \n", | |
"3 0.14250 0.28390 0.24140 0.10520 \n", | |
"4 0.10030 0.13280 0.19800 0.10430 \n", | |
".. ... ... ... ... \n", | |
"564 0.11100 0.11590 0.24390 0.13890 \n", | |
"565 0.09780 0.10340 0.14400 0.09791 \n", | |
"566 0.08455 0.10230 0.09251 0.05302 \n", | |
"567 0.11780 0.27700 0.35140 0.15200 \n", | |
"568 0.05263 0.04362 0.00000 0.00000 \n", | |
"\n", | |
" ... radius_worst texture_worst perimeter_worst area_worst \n", | |
"0 ... 25.380 17.33 184.60 2019.0 \\\n", | |
"1 ... 24.990 23.41 158.80 1956.0 \n", | |
"2 ... 23.570 25.53 152.50 1709.0 \n", | |
"3 ... 14.910 26.50 98.87 567.7 \n", | |
"4 ... 22.540 16.67 152.20 1575.0 \n", | |
".. ... ... ... ... ... \n", | |
"564 ... 25.450 26.40 166.10 2027.0 \n", | |
"565 ... 23.690 38.25 155.00 1731.0 \n", | |
"566 ... 18.980 34.12 126.70 1124.0 \n", | |
"567 ... 25.740 39.42 184.60 1821.0 \n", | |
"568 ... 9.456 30.37 59.16 268.6 \n", | |
"\n", | |
" smoothness_worst compactness_worst concavity_worst \n", | |
"0 0.16220 0.66560 0.7119 \\\n", | |
"1 0.12380 0.18660 0.2416 \n", | |
"2 0.14440 0.42450 0.4504 \n", | |
"3 0.20980 0.86630 0.6869 \n", | |
"4 0.13740 0.20500 0.4000 \n", | |
".. ... ... ... \n", | |
"564 0.14100 0.21130 0.4107 \n", | |
"565 0.11660 0.19220 0.3215 \n", | |
"566 0.11390 0.30940 0.3403 \n", | |
"567 0.16500 0.86810 0.9387 \n", | |
"568 0.08996 0.06444 0.0000 \n", | |
"\n", | |
" concave points_worst symmetry_worst fractal_dimension_worst \n", | |
"0 0.2654 0.4601 0.11890 \n", | |
"1 0.1860 0.2750 0.08902 \n", | |
"2 0.2430 0.3613 0.08758 \n", | |
"3 0.2575 0.6638 0.17300 \n", | |
"4 0.1625 0.2364 0.07678 \n", | |
".. ... ... ... \n", | |
"564 0.2216 0.2060 0.07115 \n", | |
"565 0.1628 0.2572 0.06637 \n", | |
"566 0.1418 0.2218 0.07820 \n", | |
"567 0.2650 0.4087 0.12400 \n", | |
"568 0.0000 0.2871 0.07039 \n", | |
"\n", | |
"[569 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Removoving the null values \n", | |
"df = df.dropna(axis=1)\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"id": "a826fdbe", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>1</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>25.380</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>1</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>84300903</td>\n", | |
" <td>1</td>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.19740</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>...</td>\n", | |
" <td>23.570</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.14440</td>\n", | |
" <td>0.42450</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>84348301</td>\n", | |
" <td>1</td>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.24140</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>...</td>\n", | |
" <td>14.910</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.20980</td>\n", | |
" <td>0.86630</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>84358402</td>\n", | |
" <td>1</td>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.19800</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>...</td>\n", | |
" <td>22.540</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.13740</td>\n", | |
" <td>0.20500</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>564</th>\n", | |
" <td>926424</td>\n", | |
" <td>1</td>\n", | |
" <td>21.56</td>\n", | |
" <td>22.39</td>\n", | |
" <td>142.00</td>\n", | |
" <td>1479.0</td>\n", | |
" <td>0.11100</td>\n", | |
" <td>0.11590</td>\n", | |
" <td>0.24390</td>\n", | |
" <td>0.13890</td>\n", | |
" <td>...</td>\n", | |
" <td>25.450</td>\n", | |
" <td>26.40</td>\n", | |
" <td>166.10</td>\n", | |
" <td>2027.0</td>\n", | |
" <td>0.14100</td>\n", | |
" <td>0.21130</td>\n", | |
" <td>0.4107</td>\n", | |
" <td>0.2216</td>\n", | |
" <td>0.2060</td>\n", | |
" <td>0.07115</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>565</th>\n", | |
" <td>926682</td>\n", | |
" <td>1</td>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>...</td>\n", | |
" <td>23.690</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>926954</td>\n", | |
" <td>1</td>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>...</td>\n", | |
" <td>18.980</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567</th>\n", | |
" <td>927241</td>\n", | |
" <td>1</td>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>...</td>\n", | |
" <td>25.740</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>568</th>\n", | |
" <td>92751</td>\n", | |
" <td>0</td>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>9.456</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>569 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean \n", | |
"0 842302 1 17.99 10.38 122.80 \\\n", | |
"1 842517 1 20.57 17.77 132.90 \n", | |
"2 84300903 1 19.69 21.25 130.00 \n", | |
"3 84348301 1 11.42 20.38 77.58 \n", | |
"4 84358402 1 20.29 14.34 135.10 \n", | |
".. ... ... ... ... ... \n", | |
"564 926424 1 21.56 22.39 142.00 \n", | |
"565 926682 1 20.13 28.25 131.20 \n", | |
"566 926954 1 16.60 28.08 108.30 \n", | |
"567 927241 1 20.60 29.33 140.10 \n", | |
"568 92751 0 7.76 24.54 47.92 \n", | |
"\n", | |
" area_mean smoothness_mean compactness_mean concavity_mean \n", | |
"0 1001.0 0.11840 0.27760 0.30010 \\\n", | |
"1 1326.0 0.08474 0.07864 0.08690 \n", | |
"2 1203.0 0.10960 0.15990 0.19740 \n", | |
"3 386.1 0.14250 0.28390 0.24140 \n", | |
"4 1297.0 0.10030 0.13280 0.19800 \n", | |
".. ... ... ... ... \n", | |
"564 1479.0 0.11100 0.11590 0.24390 \n", | |
"565 1261.0 0.09780 0.10340 0.14400 \n", | |
"566 858.1 0.08455 0.10230 0.09251 \n", | |
"567 1265.0 0.11780 0.27700 0.35140 \n", | |
"568 181.0 0.05263 0.04362 0.00000 \n", | |
"\n", | |
" concave points_mean ... radius_worst texture_worst perimeter_worst \n", | |
"0 0.14710 ... 25.380 17.33 184.60 \\\n", | |
"1 0.07017 ... 24.990 23.41 158.80 \n", | |
"2 0.12790 ... 23.570 25.53 152.50 \n", | |
"3 0.10520 ... 14.910 26.50 98.87 \n", | |
"4 0.10430 ... 22.540 16.67 152.20 \n", | |
".. ... ... ... ... ... \n", | |
"564 0.13890 ... 25.450 26.40 166.10 \n", | |
"565 0.09791 ... 23.690 38.25 155.00 \n", | |
"566 0.05302 ... 18.980 34.12 126.70 \n", | |
"567 0.15200 ... 25.740 39.42 184.60 \n", | |
"568 0.00000 ... 9.456 30.37 59.16 \n", | |
"\n", | |
" area_worst smoothness_worst compactness_worst concavity_worst \n", | |
"0 2019.0 0.16220 0.66560 0.7119 \\\n", | |
"1 1956.0 0.12380 0.18660 0.2416 \n", | |
"2 1709.0 0.14440 0.42450 0.4504 \n", | |
"3 567.7 0.20980 0.86630 0.6869 \n", | |
"4 1575.0 0.13740 0.20500 0.4000 \n", | |
".. ... ... ... ... \n", | |
"564 2027.0 0.14100 0.21130 0.4107 \n", | |
"565 1731.0 0.11660 0.19220 0.3215 \n", | |
"566 1124.0 0.11390 0.30940 0.3403 \n", | |
"567 1821.0 0.16500 0.86810 0.9387 \n", | |
"568 268.6 0.08996 0.06444 0.0000 \n", | |
"\n", | |
" concave points_worst symmetry_worst fractal_dimension_worst \n", | |
"0 0.2654 0.4601 0.11890 \n", | |
"1 0.1860 0.2750 0.08902 \n", | |
"2 0.2430 0.3613 0.08758 \n", | |
"3 0.2575 0.6638 0.17300 \n", | |
"4 0.1625 0.2364 0.07678 \n", | |
".. ... ... ... \n", | |
"564 0.2216 0.2060 0.07115 \n", | |
"565 0.1628 0.2572 0.06637 \n", | |
"566 0.1418 0.2218 0.07820 \n", | |
"567 0.2650 0.4087 0.12400 \n", | |
"568 0.0000 0.2871 0.07039 \n", | |
"\n", | |
"[569 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Encoding categorical data \n", | |
"#Making Male as 1 and Female as 0\n", | |
"from sklearn import preprocessing\n", | |
"le = preprocessing.LabelEncoder()\n", | |
"\n", | |
"# Encode labels in column 'diagnosis'.\n", | |
"df['diagnosis']= le.fit_transform(df['diagnosis'])\n", | |
" \n", | |
"df['diagnosis'].unique()\n", | |
"df\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"id": "ab60d42a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>5.690000e+02</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>3.037183e+07</td>\n", | |
" <td>0.372583</td>\n", | |
" <td>14.127292</td>\n", | |
" <td>19.289649</td>\n", | |
" <td>91.969033</td>\n", | |
" <td>654.889104</td>\n", | |
" <td>0.096360</td>\n", | |
" <td>0.104341</td>\n", | |
" <td>0.088799</td>\n", | |
" <td>0.048919</td>\n", | |
" <td>...</td>\n", | |
" <td>16.269190</td>\n", | |
" <td>25.677223</td>\n", | |
" <td>107.261213</td>\n", | |
" <td>880.583128</td>\n", | |
" <td>0.132369</td>\n", | |
" <td>0.254265</td>\n", | |
" <td>0.272188</td>\n", | |
" <td>0.114606</td>\n", | |
" <td>0.290076</td>\n", | |
" <td>0.083946</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>1.250206e+08</td>\n", | |
" <td>0.483918</td>\n", | |
" <td>3.524049</td>\n", | |
" <td>4.301036</td>\n", | |
" <td>24.298981</td>\n", | |
" <td>351.914129</td>\n", | |
" <td>0.014064</td>\n", | |
" <td>0.052813</td>\n", | |
" <td>0.079720</td>\n", | |
" <td>0.038803</td>\n", | |
" <td>...</td>\n", | |
" <td>4.833242</td>\n", | |
" <td>6.146258</td>\n", | |
" <td>33.602542</td>\n", | |
" <td>569.356993</td>\n", | |
" <td>0.022832</td>\n", | |
" <td>0.157336</td>\n", | |
" <td>0.208624</td>\n", | |
" <td>0.065732</td>\n", | |
" <td>0.061867</td>\n", | |
" <td>0.018061</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>8.670000e+03</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>6.981000</td>\n", | |
" <td>9.710000</td>\n", | |
" <td>43.790000</td>\n", | |
" <td>143.500000</td>\n", | |
" <td>0.052630</td>\n", | |
" <td>0.019380</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>7.930000</td>\n", | |
" <td>12.020000</td>\n", | |
" <td>50.410000</td>\n", | |
" <td>185.200000</td>\n", | |
" <td>0.071170</td>\n", | |
" <td>0.027290</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.156500</td>\n", | |
" <td>0.055040</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>8.692180e+05</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>11.700000</td>\n", | |
" <td>16.170000</td>\n", | |
" <td>75.170000</td>\n", | |
" <td>420.300000</td>\n", | |
" <td>0.086370</td>\n", | |
" <td>0.064920</td>\n", | |
" <td>0.029560</td>\n", | |
" <td>0.020310</td>\n", | |
" <td>...</td>\n", | |
" <td>13.010000</td>\n", | |
" <td>21.080000</td>\n", | |
" <td>84.110000</td>\n", | |
" <td>515.300000</td>\n", | |
" <td>0.116600</td>\n", | |
" <td>0.147200</td>\n", | |
" <td>0.114500</td>\n", | |
" <td>0.064930</td>\n", | |
" <td>0.250400</td>\n", | |
" <td>0.071460</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>9.060240e+05</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>13.370000</td>\n", | |
" <td>18.840000</td>\n", | |
" <td>86.240000</td>\n", | |
" <td>551.100000</td>\n", | |
" <td>0.095870</td>\n", | |
" <td>0.092630</td>\n", | |
" <td>0.061540</td>\n", | |
" <td>0.033500</td>\n", | |
" <td>...</td>\n", | |
" <td>14.970000</td>\n", | |
" <td>25.410000</td>\n", | |
" <td>97.660000</td>\n", | |
" <td>686.500000</td>\n", | |
" <td>0.131300</td>\n", | |
" <td>0.211900</td>\n", | |
" <td>0.226700</td>\n", | |
" <td>0.099930</td>\n", | |
" <td>0.282200</td>\n", | |
" <td>0.080040</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>8.813129e+06</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>15.780000</td>\n", | |
" <td>21.800000</td>\n", | |
" <td>104.100000</td>\n", | |
" <td>782.700000</td>\n", | |
" <td>0.105300</td>\n", | |
" <td>0.130400</td>\n", | |
" <td>0.130700</td>\n", | |
" <td>0.074000</td>\n", | |
" <td>...</td>\n", | |
" <td>18.790000</td>\n", | |
" <td>29.720000</td>\n", | |
" <td>125.400000</td>\n", | |
" <td>1084.000000</td>\n", | |
" <td>0.146000</td>\n", | |
" <td>0.339100</td>\n", | |
" <td>0.382900</td>\n", | |
" <td>0.161400</td>\n", | |
" <td>0.317900</td>\n", | |
" <td>0.092080</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>9.113205e+08</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>28.110000</td>\n", | |
" <td>39.280000</td>\n", | |
" <td>188.500000</td>\n", | |
" <td>2501.000000</td>\n", | |
" <td>0.163400</td>\n", | |
" <td>0.345400</td>\n", | |
" <td>0.426800</td>\n", | |
" <td>0.201200</td>\n", | |
" <td>...</td>\n", | |
" <td>36.040000</td>\n", | |
" <td>49.540000</td>\n", | |
" <td>251.200000</td>\n", | |
" <td>4254.000000</td>\n", | |
" <td>0.222600</td>\n", | |
" <td>1.058000</td>\n", | |
" <td>1.252000</td>\n", | |
" <td>0.291000</td>\n", | |
" <td>0.663800</td>\n", | |
" <td>0.207500</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>8 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean \n", | |
"count 5.690000e+02 569.000000 569.000000 569.000000 569.000000 \\\n", | |
"mean 3.037183e+07 0.372583 14.127292 19.289649 91.969033 \n", | |
"std 1.250206e+08 0.483918 3.524049 4.301036 24.298981 \n", | |
"min 8.670000e+03 0.000000 6.981000 9.710000 43.790000 \n", | |
"25% 8.692180e+05 0.000000 11.700000 16.170000 75.170000 \n", | |
"50% 9.060240e+05 0.000000 13.370000 18.840000 86.240000 \n", | |
"75% 8.813129e+06 1.000000 15.780000 21.800000 104.100000 \n", | |
"max 9.113205e+08 1.000000 28.110000 39.280000 188.500000 \n", | |
"\n", | |
" area_mean smoothness_mean compactness_mean concavity_mean \n", | |
"count 569.000000 569.000000 569.000000 569.000000 \\\n", | |
"mean 654.889104 0.096360 0.104341 0.088799 \n", | |
"std 351.914129 0.014064 0.052813 0.079720 \n", | |
"min 143.500000 0.052630 0.019380 0.000000 \n", | |
"25% 420.300000 0.086370 0.064920 0.029560 \n", | |
"50% 551.100000 0.095870 0.092630 0.061540 \n", | |
"75% 782.700000 0.105300 0.130400 0.130700 \n", | |
"max 2501.000000 0.163400 0.345400 0.426800 \n", | |
"\n", | |
" concave points_mean ... radius_worst texture_worst perimeter_worst \n", | |
"count 569.000000 ... 569.000000 569.000000 569.000000 \\\n", | |
"mean 0.048919 ... 16.269190 25.677223 107.261213 \n", | |
"std 0.038803 ... 4.833242 6.146258 33.602542 \n", | |
"min 0.000000 ... 7.930000 12.020000 50.410000 \n", | |
"25% 0.020310 ... 13.010000 21.080000 84.110000 \n", | |
"50% 0.033500 ... 14.970000 25.410000 97.660000 \n", | |
"75% 0.074000 ... 18.790000 29.720000 125.400000 \n", | |
"max 0.201200 ... 36.040000 49.540000 251.200000 \n", | |
"\n", | |
" area_worst smoothness_worst compactness_worst concavity_worst \n", | |
"count 569.000000 569.000000 569.000000 569.000000 \\\n", | |
"mean 880.583128 0.132369 0.254265 0.272188 \n", | |
"std 569.356993 0.022832 0.157336 0.208624 \n", | |
"min 185.200000 0.071170 0.027290 0.000000 \n", | |
"25% 515.300000 0.116600 0.147200 0.114500 \n", | |
"50% 686.500000 0.131300 0.211900 0.226700 \n", | |
"75% 1084.000000 0.146000 0.339100 0.382900 \n", | |
"max 4254.000000 0.222600 1.058000 1.252000 \n", | |
"\n", | |
" concave points_worst symmetry_worst fractal_dimension_worst \n", | |
"count 569.000000 569.000000 569.000000 \n", | |
"mean 0.114606 0.290076 0.083946 \n", | |
"std 0.065732 0.061867 0.018061 \n", | |
"min 0.000000 0.156500 0.055040 \n", | |
"25% 0.064930 0.250400 0.071460 \n", | |
"50% 0.099930 0.282200 0.080040 \n", | |
"75% 0.161400 0.317900 0.092080 \n", | |
"max 0.291000 0.663800 0.207500 \n", | |
"\n", | |
"[8 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.describe()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"id": "e23bcfd5", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# ***************************** 2. Data Integration *******************************" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"id": "240bebaf", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>1</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>25.380</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>1</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>84300903</td>\n", | |
" <td>1</td>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.19740</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>...</td>\n", | |
" <td>23.570</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.14440</td>\n", | |
" <td>0.42450</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>84348301</td>\n", | |
" <td>1</td>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.24140</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>...</td>\n", | |
" <td>14.910</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.20980</td>\n", | |
" <td>0.86630</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>84358402</td>\n", | |
" <td>1</td>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.19800</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>...</td>\n", | |
" <td>22.540</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.13740</td>\n", | |
" <td>0.20500</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>564</th>\n", | |
" <td>926424</td>\n", | |
" <td>1</td>\n", | |
" <td>21.56</td>\n", | |
" <td>22.39</td>\n", | |
" <td>142.00</td>\n", | |
" <td>1479.0</td>\n", | |
" <td>0.11100</td>\n", | |
" <td>0.11590</td>\n", | |
" <td>0.24390</td>\n", | |
" <td>0.13890</td>\n", | |
" <td>...</td>\n", | |
" <td>25.450</td>\n", | |
" <td>26.40</td>\n", | |
" <td>166.10</td>\n", | |
" <td>2027.0</td>\n", | |
" <td>0.14100</td>\n", | |
" <td>0.21130</td>\n", | |
" <td>0.4107</td>\n", | |
" <td>0.2216</td>\n", | |
" <td>0.2060</td>\n", | |
" <td>0.07115</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>565</th>\n", | |
" <td>926682</td>\n", | |
" <td>1</td>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>...</td>\n", | |
" <td>23.690</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>926954</td>\n", | |
" <td>1</td>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>...</td>\n", | |
" <td>18.980</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567</th>\n", | |
" <td>927241</td>\n", | |
" <td>1</td>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>...</td>\n", | |
" <td>25.740</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>568</th>\n", | |
" <td>92751</td>\n", | |
" <td>0</td>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>9.456</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>569 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean \n", | |
"0 842302 1 17.99 10.38 122.80 \\\n", | |
"1 842517 1 20.57 17.77 132.90 \n", | |
"2 84300903 1 19.69 21.25 130.00 \n", | |
"3 84348301 1 11.42 20.38 77.58 \n", | |
"4 84358402 1 20.29 14.34 135.10 \n", | |
".. ... ... ... ... ... \n", | |
"564 926424 1 21.56 22.39 142.00 \n", | |
"565 926682 1 20.13 28.25 131.20 \n", | |
"566 926954 1 16.60 28.08 108.30 \n", | |
"567 927241 1 20.60 29.33 140.10 \n", | |
"568 92751 0 7.76 24.54 47.92 \n", | |
"\n", | |
" area_mean smoothness_mean compactness_mean concavity_mean \n", | |
"0 1001.0 0.11840 0.27760 0.30010 \\\n", | |
"1 1326.0 0.08474 0.07864 0.08690 \n", | |
"2 1203.0 0.10960 0.15990 0.19740 \n", | |
"3 386.1 0.14250 0.28390 0.24140 \n", | |
"4 1297.0 0.10030 0.13280 0.19800 \n", | |
".. ... ... ... ... \n", | |
"564 1479.0 0.11100 0.11590 0.24390 \n", | |
"565 1261.0 0.09780 0.10340 0.14400 \n", | |
"566 858.1 0.08455 0.10230 0.09251 \n", | |
"567 1265.0 0.11780 0.27700 0.35140 \n", | |
"568 181.0 0.05263 0.04362 0.00000 \n", | |
"\n", | |
" concave points_mean ... radius_worst texture_worst perimeter_worst \n", | |
"0 0.14710 ... 25.380 17.33 184.60 \\\n", | |
"1 0.07017 ... 24.990 23.41 158.80 \n", | |
"2 0.12790 ... 23.570 25.53 152.50 \n", | |
"3 0.10520 ... 14.910 26.50 98.87 \n", | |
"4 0.10430 ... 22.540 16.67 152.20 \n", | |
".. ... ... ... ... ... \n", | |
"564 0.13890 ... 25.450 26.40 166.10 \n", | |
"565 0.09791 ... 23.690 38.25 155.00 \n", | |
"566 0.05302 ... 18.980 34.12 126.70 \n", | |
"567 0.15200 ... 25.740 39.42 184.60 \n", | |
"568 0.00000 ... 9.456 30.37 59.16 \n", | |
"\n", | |
" area_worst smoothness_worst compactness_worst concavity_worst \n", | |
"0 2019.0 0.16220 0.66560 0.7119 \\\n", | |
"1 1956.0 0.12380 0.18660 0.2416 \n", | |
"2 1709.0 0.14440 0.42450 0.4504 \n", | |
"3 567.7 0.20980 0.86630 0.6869 \n", | |
"4 1575.0 0.13740 0.20500 0.4000 \n", | |
".. ... ... ... ... \n", | |
"564 2027.0 0.14100 0.21130 0.4107 \n", | |
"565 1731.0 0.11660 0.19220 0.3215 \n", | |
"566 1124.0 0.11390 0.30940 0.3403 \n", | |
"567 1821.0 0.16500 0.86810 0.9387 \n", | |
"568 268.6 0.08996 0.06444 0.0000 \n", | |
"\n", | |
" concave points_worst symmetry_worst fractal_dimension_worst \n", | |
"0 0.2654 0.4601 0.11890 \n", | |
"1 0.1860 0.2750 0.08902 \n", | |
"2 0.2430 0.3613 0.08758 \n", | |
"3 0.2575 0.6638 0.17300 \n", | |
"4 0.1625 0.2364 0.07678 \n", | |
".. ... ... ... \n", | |
"564 0.2216 0.2060 0.07115 \n", | |
"565 0.1628 0.2572 0.06637 \n", | |
"566 0.1418 0.2218 0.07820 \n", | |
"567 0.2650 0.4087 0.12400 \n", | |
"568 0.0000 0.2871 0.07039 \n", | |
"\n", | |
"[569 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 48, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"id": "c6ff5d71", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>1</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>1</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>84300903</td>\n", | |
" <td>1</td>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>84348301</td>\n", | |
" <td>1</td>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>84358402</td>\n", | |
" <td>1</td>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>564</th>\n", | |
" <td>926424</td>\n", | |
" <td>1</td>\n", | |
" <td>21.56</td>\n", | |
" <td>22.39</td>\n", | |
" <td>142.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>565</th>\n", | |
" <td>926682</td>\n", | |
" <td>1</td>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>926954</td>\n", | |
" <td>1</td>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567</th>\n", | |
" <td>927241</td>\n", | |
" <td>1</td>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>568</th>\n", | |
" <td>92751</td>\n", | |
" <td>0</td>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>569 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean\n", | |
"0 842302 1 17.99 10.38 122.80\n", | |
"1 842517 1 20.57 17.77 132.90\n", | |
"2 84300903 1 19.69 21.25 130.00\n", | |
"3 84348301 1 11.42 20.38 77.58\n", | |
"4 84358402 1 20.29 14.34 135.10\n", | |
".. ... ... ... ... ...\n", | |
"564 926424 1 21.56 22.39 142.00\n", | |
"565 926682 1 20.13 28.25 131.20\n", | |
"566 926954 1 16.60 28.08 108.30\n", | |
"567 927241 1 20.60 29.33 140.10\n", | |
"568 92751 0 7.76 24.54 47.92\n", | |
"\n", | |
"[569 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 49, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data1 = df.iloc[:,0:5]\n", | |
"data1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"id": "2b600a9a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>symmetry_mean</th>\n", | |
" <th>fractal_dimension_mean</th>\n", | |
" <th>radius_se</th>\n", | |
" <th>texture_se</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>0.2419</td>\n", | |
" <td>0.07871</td>\n", | |
" <td>1.0950</td>\n", | |
" <td>0.9053</td>\n", | |
" <td>...</td>\n", | |
" <td>25.380</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>0.1812</td>\n", | |
" <td>0.05667</td>\n", | |
" <td>0.5435</td>\n", | |
" <td>0.7339</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.19740</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>0.2069</td>\n", | |
" <td>0.05999</td>\n", | |
" <td>0.7456</td>\n", | |
" <td>0.7869</td>\n", | |
" <td>...</td>\n", | |
" <td>23.570</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.14440</td>\n", | |
" <td>0.42450</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.24140</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>0.2597</td>\n", | |
" <td>0.09744</td>\n", | |
" <td>0.4956</td>\n", | |
" <td>1.1560</td>\n", | |
" <td>...</td>\n", | |
" <td>14.910</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.20980</td>\n", | |
" <td>0.86630</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.19800</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>0.1809</td>\n", | |
" <td>0.05883</td>\n", | |
" <td>0.7572</td>\n", | |
" <td>0.7813</td>\n", | |
" <td>...</td>\n", | |
" <td>22.540</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.13740</td>\n", | |
" <td>0.20500</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>564</th>\n", | |
" <td>142.00</td>\n", | |
" <td>1479.0</td>\n", | |
" <td>0.11100</td>\n", | |
" <td>0.11590</td>\n", | |
" <td>0.24390</td>\n", | |
" <td>0.13890</td>\n", | |
" <td>0.1726</td>\n", | |
" <td>0.05623</td>\n", | |
" <td>1.1760</td>\n", | |
" <td>1.2560</td>\n", | |
" <td>...</td>\n", | |
" <td>25.450</td>\n", | |
" <td>26.40</td>\n", | |
" <td>166.10</td>\n", | |
" <td>2027.0</td>\n", | |
" <td>0.14100</td>\n", | |
" <td>0.21130</td>\n", | |
" <td>0.4107</td>\n", | |
" <td>0.2216</td>\n", | |
" <td>0.2060</td>\n", | |
" <td>0.07115</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>565</th>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>0.1752</td>\n", | |
" <td>0.05533</td>\n", | |
" <td>0.7655</td>\n", | |
" <td>2.4630</td>\n", | |
" <td>...</td>\n", | |
" <td>23.690</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>0.1590</td>\n", | |
" <td>0.05648</td>\n", | |
" <td>0.4564</td>\n", | |
" <td>1.0750</td>\n", | |
" <td>...</td>\n", | |
" <td>18.980</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567</th>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>0.2397</td>\n", | |
" <td>0.07016</td>\n", | |
" <td>0.7260</td>\n", | |
" <td>1.5950</td>\n", | |
" <td>...</td>\n", | |
" <td>25.740</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>568</th>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.1587</td>\n", | |
" <td>0.05884</td>\n", | |
" <td>0.3857</td>\n", | |
" <td>1.4280</td>\n", | |
" <td>...</td>\n", | |
" <td>9.456</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>569 rows × 28 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" perimeter_mean area_mean smoothness_mean compactness_mean \n", | |
"0 122.80 1001.0 0.11840 0.27760 \\\n", | |
"1 132.90 1326.0 0.08474 0.07864 \n", | |
"2 130.00 1203.0 0.10960 0.15990 \n", | |
"3 77.58 386.1 0.14250 0.28390 \n", | |
"4 135.10 1297.0 0.10030 0.13280 \n", | |
".. ... ... ... ... \n", | |
"564 142.00 1479.0 0.11100 0.11590 \n", | |
"565 131.20 1261.0 0.09780 0.10340 \n", | |
"566 108.30 858.1 0.08455 0.10230 \n", | |
"567 140.10 1265.0 0.11780 0.27700 \n", | |
"568 47.92 181.0 0.05263 0.04362 \n", | |
"\n", | |
" concavity_mean concave points_mean symmetry_mean \n", | |
"0 0.30010 0.14710 0.2419 \\\n", | |
"1 0.08690 0.07017 0.1812 \n", | |
"2 0.19740 0.12790 0.2069 \n", | |
"3 0.24140 0.10520 0.2597 \n", | |
"4 0.19800 0.10430 0.1809 \n", | |
".. ... ... ... \n", | |
"564 0.24390 0.13890 0.1726 \n", | |
"565 0.14400 0.09791 0.1752 \n", | |
"566 0.09251 0.05302 0.1590 \n", | |
"567 0.35140 0.15200 0.2397 \n", | |
"568 0.00000 0.00000 0.1587 \n", | |
"\n", | |
" fractal_dimension_mean radius_se texture_se ... radius_worst \n", | |
"0 0.07871 1.0950 0.9053 ... 25.380 \\\n", | |
"1 0.05667 0.5435 0.7339 ... 24.990 \n", | |
"2 0.05999 0.7456 0.7869 ... 23.570 \n", | |
"3 0.09744 0.4956 1.1560 ... 14.910 \n", | |
"4 0.05883 0.7572 0.7813 ... 22.540 \n", | |
".. ... ... ... ... ... \n", | |
"564 0.05623 1.1760 1.2560 ... 25.450 \n", | |
"565 0.05533 0.7655 2.4630 ... 23.690 \n", | |
"566 0.05648 0.4564 1.0750 ... 18.980 \n", | |
"567 0.07016 0.7260 1.5950 ... 25.740 \n", | |
"568 0.05884 0.3857 1.4280 ... 9.456 \n", | |
"\n", | |
" texture_worst perimeter_worst area_worst smoothness_worst \n", | |
"0 17.33 184.60 2019.0 0.16220 \\\n", | |
"1 23.41 158.80 1956.0 0.12380 \n", | |
"2 25.53 152.50 1709.0 0.14440 \n", | |
"3 26.50 98.87 567.7 0.20980 \n", | |
"4 16.67 152.20 1575.0 0.13740 \n", | |
".. ... ... ... ... \n", | |
"564 26.40 166.10 2027.0 0.14100 \n", | |
"565 38.25 155.00 1731.0 0.11660 \n", | |
"566 34.12 126.70 1124.0 0.11390 \n", | |
"567 39.42 184.60 1821.0 0.16500 \n", | |
"568 30.37 59.16 268.6 0.08996 \n", | |
"\n", | |
" compactness_worst concavity_worst concave points_worst symmetry_worst \n", | |
"0 0.66560 0.7119 0.2654 0.4601 \\\n", | |
"1 0.18660 0.2416 0.1860 0.2750 \n", | |
"2 0.42450 0.4504 0.2430 0.3613 \n", | |
"3 0.86630 0.6869 0.2575 0.6638 \n", | |
"4 0.20500 0.4000 0.1625 0.2364 \n", | |
".. ... ... ... ... \n", | |
"564 0.21130 0.4107 0.2216 0.2060 \n", | |
"565 0.19220 0.3215 0.1628 0.2572 \n", | |
"566 0.30940 0.3403 0.1418 0.2218 \n", | |
"567 0.86810 0.9387 0.2650 0.4087 \n", | |
"568 0.06444 0.0000 0.0000 0.2871 \n", | |
"\n", | |
" fractal_dimension_worst \n", | |
"0 0.11890 \n", | |
"1 0.08902 \n", | |
"2 0.08758 \n", | |
"3 0.17300 \n", | |
"4 0.07678 \n", | |
".. ... \n", | |
"564 0.07115 \n", | |
"565 0.06637 \n", | |
"566 0.07820 \n", | |
"567 0.12400 \n", | |
"568 0.07039 \n", | |
"\n", | |
"[569 rows x 28 columns]" | |
] | |
}, | |
"execution_count": 50, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data2 = df.iloc[:,4:]\n", | |
"data2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"id": "29a8953f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>1</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>25.380</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>1</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>842517</td>\n", | |
" <td>1</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1288.0</td>\n", | |
" <td>0.10000</td>\n", | |
" <td>0.10880</td>\n", | |
" <td>0.15190</td>\n", | |
" <td>0.09333</td>\n", | |
" <td>...</td>\n", | |
" <td>24.330</td>\n", | |
" <td>39.16</td>\n", | |
" <td>162.30</td>\n", | |
" <td>1844.0</td>\n", | |
" <td>0.15220</td>\n", | |
" <td>0.29450</td>\n", | |
" <td>0.3788</td>\n", | |
" <td>0.1697</td>\n", | |
" <td>0.3151</td>\n", | |
" <td>0.07999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>887549</td>\n", | |
" <td>1</td>\n", | |
" <td>20.31</td>\n", | |
" <td>27.06</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>887549</td>\n", | |
" <td>1</td>\n", | |
" <td>20.31</td>\n", | |
" <td>27.06</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1288.0</td>\n", | |
" <td>0.10000</td>\n", | |
" <td>0.10880</td>\n", | |
" <td>0.15190</td>\n", | |
" <td>0.09333</td>\n", | |
" <td>...</td>\n", | |
" <td>24.330</td>\n", | |
" <td>39.16</td>\n", | |
" <td>162.30</td>\n", | |
" <td>1844.0</td>\n", | |
" <td>0.15220</td>\n", | |
" <td>0.29450</td>\n", | |
" <td>0.3788</td>\n", | |
" <td>0.1697</td>\n", | |
" <td>0.3151</td>\n", | |
" <td>0.07999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>664</th>\n", | |
" <td>926424</td>\n", | |
" <td>1</td>\n", | |
" <td>21.56</td>\n", | |
" <td>22.39</td>\n", | |
" <td>142.00</td>\n", | |
" <td>1479.0</td>\n", | |
" <td>0.11100</td>\n", | |
" <td>0.11590</td>\n", | |
" <td>0.24390</td>\n", | |
" <td>0.13890</td>\n", | |
" <td>...</td>\n", | |
" <td>25.450</td>\n", | |
" <td>26.40</td>\n", | |
" <td>166.10</td>\n", | |
" <td>2027.0</td>\n", | |
" <td>0.14100</td>\n", | |
" <td>0.21130</td>\n", | |
" <td>0.4107</td>\n", | |
" <td>0.2216</td>\n", | |
" <td>0.2060</td>\n", | |
" <td>0.07115</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>665</th>\n", | |
" <td>926682</td>\n", | |
" <td>1</td>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>...</td>\n", | |
" <td>23.690</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>666</th>\n", | |
" <td>926954</td>\n", | |
" <td>1</td>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>...</td>\n", | |
" <td>18.980</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>667</th>\n", | |
" <td>927241</td>\n", | |
" <td>1</td>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>...</td>\n", | |
" <td>25.740</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>668</th>\n", | |
" <td>92751</td>\n", | |
" <td>0</td>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>9.456</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>669 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \n", | |
"0 842302 1 17.99 10.38 122.80 1001.0 \\\n", | |
"1 842517 1 20.57 17.77 132.90 1326.0 \n", | |
"2 842517 1 20.57 17.77 132.90 1288.0 \n", | |
"3 887549 1 20.31 27.06 132.90 1326.0 \n", | |
"4 887549 1 20.31 27.06 132.90 1288.0 \n", | |
".. ... ... ... ... ... ... \n", | |
"664 926424 1 21.56 22.39 142.00 1479.0 \n", | |
"665 926682 1 20.13 28.25 131.20 1261.0 \n", | |
"666 926954 1 16.60 28.08 108.30 858.1 \n", | |
"667 927241 1 20.60 29.33 140.10 1265.0 \n", | |
"668 92751 0 7.76 24.54 47.92 181.0 \n", | |
"\n", | |
" smoothness_mean compactness_mean concavity_mean concave points_mean \n", | |
"0 0.11840 0.27760 0.30010 0.14710 \\\n", | |
"1 0.08474 0.07864 0.08690 0.07017 \n", | |
"2 0.10000 0.10880 0.15190 0.09333 \n", | |
"3 0.08474 0.07864 0.08690 0.07017 \n", | |
"4 0.10000 0.10880 0.15190 0.09333 \n", | |
".. ... ... ... ... \n", | |
"664 0.11100 0.11590 0.24390 0.13890 \n", | |
"665 0.09780 0.10340 0.14400 0.09791 \n", | |
"666 0.08455 0.10230 0.09251 0.05302 \n", | |
"667 0.11780 0.27700 0.35140 0.15200 \n", | |
"668 0.05263 0.04362 0.00000 0.00000 \n", | |
"\n", | |
" ... radius_worst texture_worst perimeter_worst area_worst \n", | |
"0 ... 25.380 17.33 184.60 2019.0 \\\n", | |
"1 ... 24.990 23.41 158.80 1956.0 \n", | |
"2 ... 24.330 39.16 162.30 1844.0 \n", | |
"3 ... 24.990 23.41 158.80 1956.0 \n", | |
"4 ... 24.330 39.16 162.30 1844.0 \n", | |
".. ... ... ... ... ... \n", | |
"664 ... 25.450 26.40 166.10 2027.0 \n", | |
"665 ... 23.690 38.25 155.00 1731.0 \n", | |
"666 ... 18.980 34.12 126.70 1124.0 \n", | |
"667 ... 25.740 39.42 184.60 1821.0 \n", | |
"668 ... 9.456 30.37 59.16 268.6 \n", | |
"\n", | |
" smoothness_worst compactness_worst concavity_worst \n", | |
"0 0.16220 0.66560 0.7119 \\\n", | |
"1 0.12380 0.18660 0.2416 \n", | |
"2 0.15220 0.29450 0.3788 \n", | |
"3 0.12380 0.18660 0.2416 \n", | |
"4 0.15220 0.29450 0.3788 \n", | |
".. ... ... ... \n", | |
"664 0.14100 0.21130 0.4107 \n", | |
"665 0.11660 0.19220 0.3215 \n", | |
"666 0.11390 0.30940 0.3403 \n", | |
"667 0.16500 0.86810 0.9387 \n", | |
"668 0.08996 0.06444 0.0000 \n", | |
"\n", | |
" concave points_worst symmetry_worst fractal_dimension_worst \n", | |
"0 0.2654 0.4601 0.11890 \n", | |
"1 0.1860 0.2750 0.08902 \n", | |
"2 0.1697 0.3151 0.07999 \n", | |
"3 0.1860 0.2750 0.08902 \n", | |
"4 0.1697 0.3151 0.07999 \n", | |
".. ... ... ... \n", | |
"664 0.2216 0.2060 0.07115 \n", | |
"665 0.1628 0.2572 0.06637 \n", | |
"666 0.1418 0.2218 0.07820 \n", | |
"667 0.2650 0.4087 0.12400 \n", | |
"668 0.0000 0.2871 0.07039 \n", | |
"\n", | |
"[669 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 51, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Data mearging \n", | |
"# It overrides the perimeter_mean \n", | |
"\n", | |
"data = pd.merge(data1,data2)\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"id": "f06d6cd0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# ************************************ 3. Data Transformation *****************************" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"id": "27d92a0d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"new_row = {'id':670, 'diagnosis':1, 'radius_mean':20, 'texture_mean':50, 'perimeter_mean':50,\n", | |
" 'area_mean':50, 'smoothness_mean':90, 'compactness_mean':90, 'concavity_mean':90,\n", | |
" 'concave points_mean':90, 'symmetry_mean':90, 'fractal_dimension_mean':90,\n", | |
" 'radius_se':90, 'texture_se':90, 'perimeter_se':90, 'area_se':90, 'smoothness_se':90,\n", | |
" 'compactness_se':90, 'concavity_se':90, 'concave points_se':90, 'symmetry_se':90,\n", | |
" 'fractal_dimension_se':90, 'radius_worst':90, 'texture_worst':90,\n", | |
" 'perimeter_worst':90, 'area_worst':90, 'smoothness_worst':90,\n", | |
" 'compactness_worst':90, 'concavity_worst':90, 'concave points_worst':90,\n", | |
" 'symmetry_worst':90, 'fractal_dimension_worst':90}\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"id": "fbf41fd4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'id': 670,\n", | |
" 'diagnosis': 1,\n", | |
" 'radius_mean': 20,\n", | |
" 'texture_mean': 50,\n", | |
" 'perimeter_mean': 50,\n", | |
" 'area_mean': 50,\n", | |
" 'smoothness_mean': 90,\n", | |
" 'compactness_mean': 90,\n", | |
" 'concavity_mean': 90,\n", | |
" 'concave points_mean': 90,\n", | |
" 'symmetry_mean': 90,\n", | |
" 'fractal_dimension_mean': 90,\n", | |
" 'radius_se': 90,\n", | |
" 'texture_se': 90,\n", | |
" 'perimeter_se': 90,\n", | |
" 'area_se': 90,\n", | |
" 'smoothness_se': 90,\n", | |
" 'compactness_se': 90,\n", | |
" 'concavity_se': 90,\n", | |
" 'concave points_se': 90,\n", | |
" 'symmetry_se': 90,\n", | |
" 'fractal_dimension_se': 90,\n", | |
" 'radius_worst': 90,\n", | |
" 'texture_worst': 90,\n", | |
" 'perimeter_worst': 90,\n", | |
" 'area_worst': 90,\n", | |
" 'smoothness_worst': 90,\n", | |
" 'compactness_worst': 90,\n", | |
" 'concavity_worst': 90,\n", | |
" 'concave points_worst': 90,\n", | |
" 'symmetry_worst': 90,\n", | |
" 'fractal_dimension_worst': 90}" | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"new_row" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"id": "3fdb3140", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>1</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>25.380</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>1</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>842517</td>\n", | |
" <td>1</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1288.0</td>\n", | |
" <td>0.10000</td>\n", | |
" <td>0.10880</td>\n", | |
" <td>0.15190</td>\n", | |
" <td>0.09333</td>\n", | |
" <td>...</td>\n", | |
" <td>24.330</td>\n", | |
" <td>39.16</td>\n", | |
" <td>162.30</td>\n", | |
" <td>1844.0</td>\n", | |
" <td>0.15220</td>\n", | |
" <td>0.29450</td>\n", | |
" <td>0.3788</td>\n", | |
" <td>0.1697</td>\n", | |
" <td>0.3151</td>\n", | |
" <td>0.07999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>887549</td>\n", | |
" <td>1</td>\n", | |
" <td>20.31</td>\n", | |
" <td>27.06</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>887549</td>\n", | |
" <td>1</td>\n", | |
" <td>20.31</td>\n", | |
" <td>27.06</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1288.0</td>\n", | |
" <td>0.10000</td>\n", | |
" <td>0.10880</td>\n", | |
" <td>0.15190</td>\n", | |
" <td>0.09333</td>\n", | |
" <td>...</td>\n", | |
" <td>24.330</td>\n", | |
" <td>39.16</td>\n", | |
" <td>162.30</td>\n", | |
" <td>1844.0</td>\n", | |
" <td>0.15220</td>\n", | |
" <td>0.29450</td>\n", | |
" <td>0.3788</td>\n", | |
" <td>0.1697</td>\n", | |
" <td>0.3151</td>\n", | |
" <td>0.07999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>665</th>\n", | |
" <td>926682</td>\n", | |
" <td>1</td>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>...</td>\n", | |
" <td>23.690</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>666</th>\n", | |
" <td>926954</td>\n", | |
" <td>1</td>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>...</td>\n", | |
" <td>18.980</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>667</th>\n", | |
" <td>927241</td>\n", | |
" <td>1</td>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>...</td>\n", | |
" <td>25.740</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>668</th>\n", | |
" <td>92751</td>\n", | |
" <td>0</td>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>9.456</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>669</th>\n", | |
" <td>670</td>\n", | |
" <td>1</td>\n", | |
" <td>20.00</td>\n", | |
" <td>50.00</td>\n", | |
" <td>50.00</td>\n", | |
" <td>50.0</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>90.000</td>\n", | |
" <td>90.00</td>\n", | |
" <td>90.00</td>\n", | |
" <td>90.0</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.0000</td>\n", | |
" <td>90.0000</td>\n", | |
" <td>90.0000</td>\n", | |
" <td>90.00000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>670 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \n", | |
"0 842302 1 17.99 10.38 122.80 1001.0 \\\n", | |
"1 842517 1 20.57 17.77 132.90 1326.0 \n", | |
"2 842517 1 20.57 17.77 132.90 1288.0 \n", | |
"3 887549 1 20.31 27.06 132.90 1326.0 \n", | |
"4 887549 1 20.31 27.06 132.90 1288.0 \n", | |
".. ... ... ... ... ... ... \n", | |
"665 926682 1 20.13 28.25 131.20 1261.0 \n", | |
"666 926954 1 16.60 28.08 108.30 858.1 \n", | |
"667 927241 1 20.60 29.33 140.10 1265.0 \n", | |
"668 92751 0 7.76 24.54 47.92 181.0 \n", | |
"669 670 1 20.00 50.00 50.00 50.0 \n", | |
"\n", | |
" smoothness_mean compactness_mean concavity_mean concave points_mean \n", | |
"0 0.11840 0.27760 0.30010 0.14710 \\\n", | |
"1 0.08474 0.07864 0.08690 0.07017 \n", | |
"2 0.10000 0.10880 0.15190 0.09333 \n", | |
"3 0.08474 0.07864 0.08690 0.07017 \n", | |
"4 0.10000 0.10880 0.15190 0.09333 \n", | |
".. ... ... ... ... \n", | |
"665 0.09780 0.10340 0.14400 0.09791 \n", | |
"666 0.08455 0.10230 0.09251 0.05302 \n", | |
"667 0.11780 0.27700 0.35140 0.15200 \n", | |
"668 0.05263 0.04362 0.00000 0.00000 \n", | |
"669 90.00000 90.00000 90.00000 90.00000 \n", | |
"\n", | |
" ... radius_worst texture_worst perimeter_worst area_worst \n", | |
"0 ... 25.380 17.33 184.60 2019.0 \\\n", | |
"1 ... 24.990 23.41 158.80 1956.0 \n", | |
"2 ... 24.330 39.16 162.30 1844.0 \n", | |
"3 ... 24.990 23.41 158.80 1956.0 \n", | |
"4 ... 24.330 39.16 162.30 1844.0 \n", | |
".. ... ... ... ... ... \n", | |
"665 ... 23.690 38.25 155.00 1731.0 \n", | |
"666 ... 18.980 34.12 126.70 1124.0 \n", | |
"667 ... 25.740 39.42 184.60 1821.0 \n", | |
"668 ... 9.456 30.37 59.16 268.6 \n", | |
"669 ... 90.000 90.00 90.00 90.0 \n", | |
"\n", | |
" smoothness_worst compactness_worst concavity_worst \n", | |
"0 0.16220 0.66560 0.7119 \\\n", | |
"1 0.12380 0.18660 0.2416 \n", | |
"2 0.15220 0.29450 0.3788 \n", | |
"3 0.12380 0.18660 0.2416 \n", | |
"4 0.15220 0.29450 0.3788 \n", | |
".. ... ... ... \n", | |
"665 0.11660 0.19220 0.3215 \n", | |
"666 0.11390 0.30940 0.3403 \n", | |
"667 0.16500 0.86810 0.9387 \n", | |
"668 0.08996 0.06444 0.0000 \n", | |
"669 90.00000 90.00000 90.0000 \n", | |
"\n", | |
" concave points_worst symmetry_worst fractal_dimension_worst \n", | |
"0 0.2654 0.4601 0.11890 \n", | |
"1 0.1860 0.2750 0.08902 \n", | |
"2 0.1697 0.3151 0.07999 \n", | |
"3 0.1860 0.2750 0.08902 \n", | |
"4 0.1697 0.3151 0.07999 \n", | |
".. ... ... ... \n", | |
"665 0.1628 0.2572 0.06637 \n", | |
"666 0.1418 0.2218 0.07820 \n", | |
"667 0.2650 0.4087 0.12400 \n", | |
"668 0.0000 0.2871 0.07039 \n", | |
"669 90.0000 90.0000 90.00000 \n", | |
"\n", | |
"[670 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 55, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data = pd.concat([data, pd.DataFrame([new_row])], ignore_index=True)\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"id": "399ab7c8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# ************************* 4. Error Detecting ************************************************" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"id": "67fca29e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[<matplotlib.lines.Line2D at 0x7f9810559d60>]" | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"plt.plot(data['radius_mean'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b3600dd3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"id": "776d4a31", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[<matplotlib.lines.Line2D at 0x7f98104b5250>]" | |
] | |
}, | |
"execution_count": 58, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"plt.plot(data['texture_mean'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"id": "a6ba30ed", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# ****************************** 5. Model Building ********************************" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"id": "8078fd4b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn import linear_model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"id": "c33594f3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>symmetry_mean</th>\n", | |
" <th>fractal_dimension_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>0.2419</td>\n", | |
" <td>0.07871</td>\n", | |
" <td>...</td>\n", | |
" <td>25.380</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.16220</td>\n", | |
" <td>0.66560</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>0.1812</td>\n", | |
" <td>0.05667</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1288.0</td>\n", | |
" <td>0.10000</td>\n", | |
" <td>0.10880</td>\n", | |
" <td>0.15190</td>\n", | |
" <td>0.09333</td>\n", | |
" <td>0.1814</td>\n", | |
" <td>0.05572</td>\n", | |
" <td>...</td>\n", | |
" <td>24.330</td>\n", | |
" <td>39.16</td>\n", | |
" <td>162.30</td>\n", | |
" <td>1844.0</td>\n", | |
" <td>0.15220</td>\n", | |
" <td>0.29450</td>\n", | |
" <td>0.3788</td>\n", | |
" <td>0.1697</td>\n", | |
" <td>0.3151</td>\n", | |
" <td>0.07999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>20.31</td>\n", | |
" <td>27.06</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>0.1812</td>\n", | |
" <td>0.05667</td>\n", | |
" <td>...</td>\n", | |
" <td>24.990</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.12380</td>\n", | |
" <td>0.18660</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>20.31</td>\n", | |
" <td>27.06</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1288.0</td>\n", | |
" <td>0.10000</td>\n", | |
" <td>0.10880</td>\n", | |
" <td>0.15190</td>\n", | |
" <td>0.09333</td>\n", | |
" <td>0.1814</td>\n", | |
" <td>0.05572</td>\n", | |
" <td>...</td>\n", | |
" <td>24.330</td>\n", | |
" <td>39.16</td>\n", | |
" <td>162.30</td>\n", | |
" <td>1844.0</td>\n", | |
" <td>0.15220</td>\n", | |
" <td>0.29450</td>\n", | |
" <td>0.3788</td>\n", | |
" <td>0.1697</td>\n", | |
" <td>0.3151</td>\n", | |
" <td>0.07999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>665</th>\n", | |
" <td>20.13</td>\n", | |
" <td>28.25</td>\n", | |
" <td>131.20</td>\n", | |
" <td>1261.0</td>\n", | |
" <td>0.09780</td>\n", | |
" <td>0.10340</td>\n", | |
" <td>0.14400</td>\n", | |
" <td>0.09791</td>\n", | |
" <td>0.1752</td>\n", | |
" <td>0.05533</td>\n", | |
" <td>...</td>\n", | |
" <td>23.690</td>\n", | |
" <td>38.25</td>\n", | |
" <td>155.00</td>\n", | |
" <td>1731.0</td>\n", | |
" <td>0.11660</td>\n", | |
" <td>0.19220</td>\n", | |
" <td>0.3215</td>\n", | |
" <td>0.1628</td>\n", | |
" <td>0.2572</td>\n", | |
" <td>0.06637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>666</th>\n", | |
" <td>16.60</td>\n", | |
" <td>28.08</td>\n", | |
" <td>108.30</td>\n", | |
" <td>858.1</td>\n", | |
" <td>0.08455</td>\n", | |
" <td>0.10230</td>\n", | |
" <td>0.09251</td>\n", | |
" <td>0.05302</td>\n", | |
" <td>0.1590</td>\n", | |
" <td>0.05648</td>\n", | |
" <td>...</td>\n", | |
" <td>18.980</td>\n", | |
" <td>34.12</td>\n", | |
" <td>126.70</td>\n", | |
" <td>1124.0</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.30940</td>\n", | |
" <td>0.3403</td>\n", | |
" <td>0.1418</td>\n", | |
" <td>0.2218</td>\n", | |
" <td>0.07820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>667</th>\n", | |
" <td>20.60</td>\n", | |
" <td>29.33</td>\n", | |
" <td>140.10</td>\n", | |
" <td>1265.0</td>\n", | |
" <td>0.11780</td>\n", | |
" <td>0.27700</td>\n", | |
" <td>0.35140</td>\n", | |
" <td>0.15200</td>\n", | |
" <td>0.2397</td>\n", | |
" <td>0.07016</td>\n", | |
" <td>...</td>\n", | |
" <td>25.740</td>\n", | |
" <td>39.42</td>\n", | |
" <td>184.60</td>\n", | |
" <td>1821.0</td>\n", | |
" <td>0.16500</td>\n", | |
" <td>0.86810</td>\n", | |
" <td>0.9387</td>\n", | |
" <td>0.2650</td>\n", | |
" <td>0.4087</td>\n", | |
" <td>0.12400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>668</th>\n", | |
" <td>7.76</td>\n", | |
" <td>24.54</td>\n", | |
" <td>47.92</td>\n", | |
" <td>181.0</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.04362</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.00000</td>\n", | |
" <td>0.1587</td>\n", | |
" <td>0.05884</td>\n", | |
" <td>...</td>\n", | |
" <td>9.456</td>\n", | |
" <td>30.37</td>\n", | |
" <td>59.16</td>\n", | |
" <td>268.6</td>\n", | |
" <td>0.08996</td>\n", | |
" <td>0.06444</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.0000</td>\n", | |
" <td>0.2871</td>\n", | |
" <td>0.07039</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>669</th>\n", | |
" <td>20.00</td>\n", | |
" <td>50.00</td>\n", | |
" <td>50.00</td>\n", | |
" <td>50.0</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.0000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>...</td>\n", | |
" <td>90.000</td>\n", | |
" <td>90.00</td>\n", | |
" <td>90.00</td>\n", | |
" <td>90.0</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.00000</td>\n", | |
" <td>90.0000</td>\n", | |
" <td>90.0000</td>\n", | |
" <td>90.0000</td>\n", | |
" <td>90.00000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>670 rows × 30 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" radius_mean texture_mean perimeter_mean area_mean smoothness_mean \n", | |
"0 17.99 10.38 122.80 1001.0 0.11840 \\\n", | |
"1 20.57 17.77 132.90 1326.0 0.08474 \n", | |
"2 20.57 17.77 132.90 1288.0 0.10000 \n", | |
"3 20.31 27.06 132.90 1326.0 0.08474 \n", | |
"4 20.31 27.06 132.90 1288.0 0.10000 \n", | |
".. ... ... ... ... ... \n", | |
"665 20.13 28.25 131.20 1261.0 0.09780 \n", | |
"666 16.60 28.08 108.30 858.1 0.08455 \n", | |
"667 20.60 29.33 140.10 1265.0 0.11780 \n", | |
"668 7.76 24.54 47.92 181.0 0.05263 \n", | |
"669 20.00 50.00 50.00 50.0 90.00000 \n", | |
"\n", | |
" compactness_mean concavity_mean concave points_mean symmetry_mean \n", | |
"0 0.27760 0.30010 0.14710 0.2419 \\\n", | |
"1 0.07864 0.08690 0.07017 0.1812 \n", | |
"2 0.10880 0.15190 0.09333 0.1814 \n", | |
"3 0.07864 0.08690 0.07017 0.1812 \n", | |
"4 0.10880 0.15190 0.09333 0.1814 \n", | |
".. ... ... ... ... \n", | |
"665 0.10340 0.14400 0.09791 0.1752 \n", | |
"666 0.10230 0.09251 0.05302 0.1590 \n", | |
"667 0.27700 0.35140 0.15200 0.2397 \n", | |
"668 0.04362 0.00000 0.00000 0.1587 \n", | |
"669 90.00000 90.00000 90.00000 90.0000 \n", | |
"\n", | |
" fractal_dimension_mean ... radius_worst texture_worst \n", | |
"0 0.07871 ... 25.380 17.33 \\\n", | |
"1 0.05667 ... 24.990 23.41 \n", | |
"2 0.05572 ... 24.330 39.16 \n", | |
"3 0.05667 ... 24.990 23.41 \n", | |
"4 0.05572 ... 24.330 39.16 \n", | |
".. ... ... ... ... \n", | |
"665 0.05533 ... 23.690 38.25 \n", | |
"666 0.05648 ... 18.980 34.12 \n", | |
"667 0.07016 ... 25.740 39.42 \n", | |
"668 0.05884 ... 9.456 30.37 \n", | |
"669 90.00000 ... 90.000 90.00 \n", | |
"\n", | |
" perimeter_worst area_worst smoothness_worst compactness_worst \n", | |
"0 184.60 2019.0 0.16220 0.66560 \\\n", | |
"1 158.80 1956.0 0.12380 0.18660 \n", | |
"2 162.30 1844.0 0.15220 0.29450 \n", | |
"3 158.80 1956.0 0.12380 0.18660 \n", | |
"4 162.30 1844.0 0.15220 0.29450 \n", | |
".. ... ... ... ... \n", | |
"665 155.00 1731.0 0.11660 0.19220 \n", | |
"666 126.70 1124.0 0.11390 0.30940 \n", | |
"667 184.60 1821.0 0.16500 0.86810 \n", | |
"668 59.16 268.6 0.08996 0.06444 \n", | |
"669 90.00 90.0 90.00000 90.00000 \n", | |
"\n", | |
" concavity_worst concave points_worst symmetry_worst \n", | |
"0 0.7119 0.2654 0.4601 \\\n", | |
"1 0.2416 0.1860 0.2750 \n", | |
"2 0.3788 0.1697 0.3151 \n", | |
"3 0.2416 0.1860 0.2750 \n", | |
"4 0.3788 0.1697 0.3151 \n", | |
".. ... ... ... \n", | |
"665 0.3215 0.1628 0.2572 \n", | |
"666 0.3403 0.1418 0.2218 \n", | |
"667 0.9387 0.2650 0.4087 \n", | |
"668 0.0000 0.0000 0.2871 \n", | |
"669 90.0000 90.0000 90.0000 \n", | |
"\n", | |
" fractal_dimension_worst \n", | |
"0 0.11890 \n", | |
"1 0.08902 \n", | |
"2 0.07999 \n", | |
"3 0.08902 \n", | |
"4 0.07999 \n", | |
".. ... \n", | |
"665 0.06637 \n", | |
"666 0.07820 \n", | |
"667 0.12400 \n", | |
"668 0.07039 \n", | |
"669 90.00000 \n", | |
"\n", | |
"[670 rows x 30 columns]" | |
] | |
}, | |
"execution_count": 61, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Defining feature set\n", | |
"X = data.iloc[:,2:]\n", | |
"X" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"id": "89287f14", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>diagnosis</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>665</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>666</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>667</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>668</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>669</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>670 rows × 1 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" diagnosis\n", | |
"0 1\n", | |
"1 1\n", | |
"2 1\n", | |
"3 1\n", | |
"4 1\n", | |
".. ...\n", | |
"665 1\n", | |
"666 1\n", | |
"667 1\n", | |
"668 0\n", | |
"669 1\n", | |
"\n", | |
"[670 rows x 1 columns]" | |
] | |
}, | |
"execution_count": 62, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Label\n", | |
"y = data.iloc[:,1:2]\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"id": "0c7551a1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"id": "ff45c27d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>symmetry_mean</th>\n", | |
" <th>fractal_dimension_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>187</th>\n", | |
" <td>13.61</td>\n", | |
" <td>24.69</td>\n", | |
" <td>87.76</td>\n", | |
" <td>572.6</td>\n", | |
" <td>0.09258</td>\n", | |
" <td>0.07862</td>\n", | |
" <td>0.05285</td>\n", | |
" <td>0.03085</td>\n", | |
" <td>0.1761</td>\n", | |
" <td>0.06130</td>\n", | |
" <td>...</td>\n", | |
" <td>16.89</td>\n", | |
" <td>35.64</td>\n", | |
" <td>113.20</td>\n", | |
" <td>848.7</td>\n", | |
" <td>0.1471</td>\n", | |
" <td>0.2884</td>\n", | |
" <td>0.37960</td>\n", | |
" <td>0.13290</td>\n", | |
" <td>0.3470</td>\n", | |
" <td>0.07900</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>326</th>\n", | |
" <td>17.05</td>\n", | |
" <td>19.08</td>\n", | |
" <td>113.40</td>\n", | |
" <td>895.0</td>\n", | |
" <td>0.11410</td>\n", | |
" <td>0.15720</td>\n", | |
" <td>0.19100</td>\n", | |
" <td>0.10900</td>\n", | |
" <td>0.2131</td>\n", | |
" <td>0.06325</td>\n", | |
" <td>...</td>\n", | |
" <td>19.59</td>\n", | |
" <td>24.89</td>\n", | |
" <td>133.50</td>\n", | |
" <td>1189.0</td>\n", | |
" <td>0.1703</td>\n", | |
" <td>0.3934</td>\n", | |
" <td>0.50180</td>\n", | |
" <td>0.25430</td>\n", | |
" <td>0.3109</td>\n", | |
" <td>0.09061</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>353</th>\n", | |
" <td>19.45</td>\n", | |
" <td>19.33</td>\n", | |
" <td>126.50</td>\n", | |
" <td>1169.0</td>\n", | |
" <td>0.10350</td>\n", | |
" <td>0.11880</td>\n", | |
" <td>0.13790</td>\n", | |
" <td>0.08591</td>\n", | |
" <td>0.1776</td>\n", | |
" <td>0.05647</td>\n", | |
" <td>...</td>\n", | |
" <td>25.70</td>\n", | |
" <td>24.57</td>\n", | |
" <td>163.10</td>\n", | |
" <td>1972.0</td>\n", | |
" <td>0.1497</td>\n", | |
" <td>0.3161</td>\n", | |
" <td>0.43170</td>\n", | |
" <td>0.19990</td>\n", | |
" <td>0.3379</td>\n", | |
" <td>0.08950</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>192</th>\n", | |
" <td>13.70</td>\n", | |
" <td>17.64</td>\n", | |
" <td>87.76</td>\n", | |
" <td>575.5</td>\n", | |
" <td>0.09277</td>\n", | |
" <td>0.07255</td>\n", | |
" <td>0.01752</td>\n", | |
" <td>0.01880</td>\n", | |
" <td>0.1631</td>\n", | |
" <td>0.06155</td>\n", | |
" <td>...</td>\n", | |
" <td>15.85</td>\n", | |
" <td>20.20</td>\n", | |
" <td>101.60</td>\n", | |
" <td>773.4</td>\n", | |
" <td>0.1264</td>\n", | |
" <td>0.1564</td>\n", | |
" <td>0.12060</td>\n", | |
" <td>0.08704</td>\n", | |
" <td>0.2806</td>\n", | |
" <td>0.07782</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>424</th>\n", | |
" <td>12.67</td>\n", | |
" <td>17.30</td>\n", | |
" <td>81.25</td>\n", | |
" <td>476.3</td>\n", | |
" <td>0.11580</td>\n", | |
" <td>0.10850</td>\n", | |
" <td>0.05928</td>\n", | |
" <td>0.03279</td>\n", | |
" <td>0.1943</td>\n", | |
" <td>0.06612</td>\n", | |
" <td>...</td>\n", | |
" <td>13.57</td>\n", | |
" <td>21.40</td>\n", | |
" <td>86.67</td>\n", | |
" <td>552.0</td>\n", | |
" <td>0.1580</td>\n", | |
" <td>0.1751</td>\n", | |
" <td>0.18890</td>\n", | |
" <td>0.08411</td>\n", | |
" <td>0.3155</td>\n", | |
" <td>0.07538</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>422</th>\n", | |
" <td>12.20</td>\n", | |
" <td>15.21</td>\n", | |
" <td>78.01</td>\n", | |
" <td>457.9</td>\n", | |
" <td>0.08673</td>\n", | |
" <td>0.06545</td>\n", | |
" <td>0.01994</td>\n", | |
" <td>0.01692</td>\n", | |
" <td>0.1638</td>\n", | |
" <td>0.06129</td>\n", | |
" <td>...</td>\n", | |
" <td>13.75</td>\n", | |
" <td>21.38</td>\n", | |
" <td>91.11</td>\n", | |
" <td>583.1</td>\n", | |
" <td>0.1256</td>\n", | |
" <td>0.1928</td>\n", | |
" <td>0.11670</td>\n", | |
" <td>0.05556</td>\n", | |
" <td>0.2661</td>\n", | |
" <td>0.07961</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>595</th>\n", | |
" <td>13.71</td>\n", | |
" <td>18.68</td>\n", | |
" <td>88.73</td>\n", | |
" <td>571.0</td>\n", | |
" <td>0.09916</td>\n", | |
" <td>0.10700</td>\n", | |
" <td>0.05385</td>\n", | |
" <td>0.03783</td>\n", | |
" <td>0.1714</td>\n", | |
" <td>0.06843</td>\n", | |
" <td>...</td>\n", | |
" <td>15.11</td>\n", | |
" <td>25.63</td>\n", | |
" <td>99.43</td>\n", | |
" <td>701.9</td>\n", | |
" <td>0.1425</td>\n", | |
" <td>0.2566</td>\n", | |
" <td>0.19350</td>\n", | |
" <td>0.12840</td>\n", | |
" <td>0.2849</td>\n", | |
" <td>0.09031</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>14.54</td>\n", | |
" <td>27.54</td>\n", | |
" <td>96.73</td>\n", | |
" <td>658.8</td>\n", | |
" <td>0.11390</td>\n", | |
" <td>0.15950</td>\n", | |
" <td>0.16390</td>\n", | |
" <td>0.07364</td>\n", | |
" <td>0.2303</td>\n", | |
" <td>0.07077</td>\n", | |
" <td>...</td>\n", | |
" <td>17.46</td>\n", | |
" <td>37.13</td>\n", | |
" <td>124.10</td>\n", | |
" <td>943.2</td>\n", | |
" <td>0.1678</td>\n", | |
" <td>0.6577</td>\n", | |
" <td>0.70260</td>\n", | |
" <td>0.17120</td>\n", | |
" <td>0.4218</td>\n", | |
" <td>0.13410</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>504</th>\n", | |
" <td>10.26</td>\n", | |
" <td>12.22</td>\n", | |
" <td>65.75</td>\n", | |
" <td>321.6</td>\n", | |
" <td>0.09996</td>\n", | |
" <td>0.07542</td>\n", | |
" <td>0.01923</td>\n", | |
" <td>0.01968</td>\n", | |
" <td>0.1800</td>\n", | |
" <td>0.06569</td>\n", | |
" <td>...</td>\n", | |
" <td>11.38</td>\n", | |
" <td>15.65</td>\n", | |
" <td>73.23</td>\n", | |
" <td>394.5</td>\n", | |
" <td>0.1343</td>\n", | |
" <td>0.1650</td>\n", | |
" <td>0.08615</td>\n", | |
" <td>0.06696</td>\n", | |
" <td>0.2937</td>\n", | |
" <td>0.07722</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>13.38</td>\n", | |
" <td>30.72</td>\n", | |
" <td>86.34</td>\n", | |
" <td>557.2</td>\n", | |
" <td>0.09245</td>\n", | |
" <td>0.07426</td>\n", | |
" <td>0.02819</td>\n", | |
" <td>0.03264</td>\n", | |
" <td>0.1375</td>\n", | |
" <td>0.06016</td>\n", | |
" <td>...</td>\n", | |
" <td>15.05</td>\n", | |
" <td>41.61</td>\n", | |
" <td>96.69</td>\n", | |
" <td>705.6</td>\n", | |
" <td>0.1172</td>\n", | |
" <td>0.1421</td>\n", | |
" <td>0.07003</td>\n", | |
" <td>0.07763</td>\n", | |
" <td>0.2196</td>\n", | |
" <td>0.07675</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>502 rows × 30 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" radius_mean texture_mean perimeter_mean area_mean smoothness_mean \n", | |
"187 13.61 24.69 87.76 572.6 0.09258 \\\n", | |
"326 17.05 19.08 113.40 895.0 0.11410 \n", | |
"353 19.45 19.33 126.50 1169.0 0.10350 \n", | |
"192 13.70 17.64 87.76 575.5 0.09277 \n", | |
"424 12.67 17.30 81.25 476.3 0.11580 \n", | |
".. ... ... ... ... ... \n", | |
"422 12.20 15.21 78.01 457.9 0.08673 \n", | |
"595 13.71 18.68 88.73 571.0 0.09916 \n", | |
"27 14.54 27.54 96.73 658.8 0.11390 \n", | |
"504 10.26 12.22 65.75 321.6 0.09996 \n", | |
"566 13.38 30.72 86.34 557.2 0.09245 \n", | |
"\n", | |
" compactness_mean concavity_mean concave points_mean symmetry_mean \n", | |
"187 0.07862 0.05285 0.03085 0.1761 \\\n", | |
"326 0.15720 0.19100 0.10900 0.2131 \n", | |
"353 0.11880 0.13790 0.08591 0.1776 \n", | |
"192 0.07255 0.01752 0.01880 0.1631 \n", | |
"424 0.10850 0.05928 0.03279 0.1943 \n", | |
".. ... ... ... ... \n", | |
"422 0.06545 0.01994 0.01692 0.1638 \n", | |
"595 0.10700 0.05385 0.03783 0.1714 \n", | |
"27 0.15950 0.16390 0.07364 0.2303 \n", | |
"504 0.07542 0.01923 0.01968 0.1800 \n", | |
"566 0.07426 0.02819 0.03264 0.1375 \n", | |
"\n", | |
" fractal_dimension_mean ... radius_worst texture_worst \n", | |
"187 0.06130 ... 16.89 35.64 \\\n", | |
"326 0.06325 ... 19.59 24.89 \n", | |
"353 0.05647 ... 25.70 24.57 \n", | |
"192 0.06155 ... 15.85 20.20 \n", | |
"424 0.06612 ... 13.57 21.40 \n", | |
".. ... ... ... ... \n", | |
"422 0.06129 ... 13.75 21.38 \n", | |
"595 0.06843 ... 15.11 25.63 \n", | |
"27 0.07077 ... 17.46 37.13 \n", | |
"504 0.06569 ... 11.38 15.65 \n", | |
"566 0.06016 ... 15.05 41.61 \n", | |
"\n", | |
" perimeter_worst area_worst smoothness_worst compactness_worst \n", | |
"187 113.20 848.7 0.1471 0.2884 \\\n", | |
"326 133.50 1189.0 0.1703 0.3934 \n", | |
"353 163.10 1972.0 0.1497 0.3161 \n", | |
"192 101.60 773.4 0.1264 0.1564 \n", | |
"424 86.67 552.0 0.1580 0.1751 \n", | |
".. ... ... ... ... \n", | |
"422 91.11 583.1 0.1256 0.1928 \n", | |
"595 99.43 701.9 0.1425 0.2566 \n", | |
"27 124.10 943.2 0.1678 0.6577 \n", | |
"504 73.23 394.5 0.1343 0.1650 \n", | |
"566 96.69 705.6 0.1172 0.1421 \n", | |
"\n", | |
" concavity_worst concave points_worst symmetry_worst \n", | |
"187 0.37960 0.13290 0.3470 \\\n", | |
"326 0.50180 0.25430 0.3109 \n", | |
"353 0.43170 0.19990 0.3379 \n", | |
"192 0.12060 0.08704 0.2806 \n", | |
"424 0.18890 0.08411 0.3155 \n", | |
".. ... ... ... \n", | |
"422 0.11670 0.05556 0.2661 \n", | |
"595 0.19350 0.12840 0.2849 \n", | |
"27 0.70260 0.17120 0.4218 \n", | |
"504 0.08615 0.06696 0.2937 \n", | |
"566 0.07003 0.07763 0.2196 \n", | |
"\n", | |
" fractal_dimension_worst \n", | |
"187 0.07900 \n", | |
"326 0.09061 \n", | |
"353 0.08950 \n", | |
"192 0.07782 \n", | |
"424 0.07538 \n", | |
".. ... \n", | |
"422 0.07961 \n", | |
"595 0.09031 \n", | |
"27 0.13410 \n", | |
"504 0.07722 \n", | |
"566 0.07675 \n", | |
"\n", | |
"[502 rows x 30 columns]" | |
] | |
}, | |
"execution_count": 64, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X_train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"id": "978b866f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>diagnosis</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>187</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>326</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>353</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>192</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>424</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>422</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>595</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>504</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>566</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>502 rows × 1 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" diagnosis\n", | |
"187 1\n", | |
"326 1\n", | |
"353 1\n", | |
"192 0\n", | |
"424 0\n", | |
".. ...\n", | |
"422 0\n", | |
"595 0\n", | |
"27 1\n", | |
"504 0\n", | |
"566 0\n", | |
"\n", | |
"[502 rows x 1 columns]" | |
] | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"id": "ac419b8a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>area_mean</th>\n", | |
" <th>smoothness_mean</th>\n", | |
" <th>compactness_mean</th>\n", | |
" <th>concavity_mean</th>\n", | |
" <th>concave points_mean</th>\n", | |
" <th>symmetry_mean</th>\n", | |
" <th>fractal_dimension_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_worst</th>\n", | |
" <th>area_worst</th>\n", | |
" <th>smoothness_worst</th>\n", | |
" <th>compactness_worst</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave points_worst</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>613</th>\n", | |
" <td>13.820</td>\n", | |
" <td>24.49</td>\n", | |
" <td>92.33</td>\n", | |
" <td>595.9</td>\n", | |
" <td>0.11620</td>\n", | |
" <td>0.16810</td>\n", | |
" <td>0.13570</td>\n", | |
" <td>0.06759</td>\n", | |
" <td>0.2275</td>\n", | |
" <td>0.07237</td>\n", | |
" <td>...</td>\n", | |
" <td>16.010</td>\n", | |
" <td>32.94</td>\n", | |
" <td>106.00</td>\n", | |
" <td>788.0</td>\n", | |
" <td>0.1794</td>\n", | |
" <td>0.3966</td>\n", | |
" <td>0.33810</td>\n", | |
" <td>0.15210</td>\n", | |
" <td>0.3651</td>\n", | |
" <td>0.11830</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>423</th>\n", | |
" <td>12.670</td>\n", | |
" <td>17.30</td>\n", | |
" <td>81.25</td>\n", | |
" <td>489.9</td>\n", | |
" <td>0.10280</td>\n", | |
" <td>0.07664</td>\n", | |
" <td>0.03193</td>\n", | |
" <td>0.02107</td>\n", | |
" <td>0.1707</td>\n", | |
" <td>0.05984</td>\n", | |
" <td>...</td>\n", | |
" <td>13.710</td>\n", | |
" <td>21.10</td>\n", | |
" <td>88.70</td>\n", | |
" <td>574.4</td>\n", | |
" <td>0.1384</td>\n", | |
" <td>0.1212</td>\n", | |
" <td>0.10200</td>\n", | |
" <td>0.05602</td>\n", | |
" <td>0.2688</td>\n", | |
" <td>0.06888</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>14.680</td>\n", | |
" <td>20.13</td>\n", | |
" <td>94.74</td>\n", | |
" <td>684.5</td>\n", | |
" <td>0.09867</td>\n", | |
" <td>0.07200</td>\n", | |
" <td>0.07395</td>\n", | |
" <td>0.05259</td>\n", | |
" <td>0.1586</td>\n", | |
" <td>0.05922</td>\n", | |
" <td>...</td>\n", | |
" <td>19.070</td>\n", | |
" <td>30.88</td>\n", | |
" <td>123.40</td>\n", | |
" <td>1138.0</td>\n", | |
" <td>0.1464</td>\n", | |
" <td>0.1871</td>\n", | |
" <td>0.29140</td>\n", | |
" <td>0.16090</td>\n", | |
" <td>0.3029</td>\n", | |
" <td>0.08216</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>489</th>\n", | |
" <td>11.270</td>\n", | |
" <td>12.96</td>\n", | |
" <td>73.16</td>\n", | |
" <td>386.3</td>\n", | |
" <td>0.12370</td>\n", | |
" <td>0.11110</td>\n", | |
" <td>0.07900</td>\n", | |
" <td>0.05550</td>\n", | |
" <td>0.2018</td>\n", | |
" <td>0.06914</td>\n", | |
" <td>...</td>\n", | |
" <td>12.840</td>\n", | |
" <td>20.53</td>\n", | |
" <td>84.93</td>\n", | |
" <td>476.1</td>\n", | |
" <td>0.1610</td>\n", | |
" <td>0.2429</td>\n", | |
" <td>0.22470</td>\n", | |
" <td>0.13180</td>\n", | |
" <td>0.3343</td>\n", | |
" <td>0.09215</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>289</th>\n", | |
" <td>12.470</td>\n", | |
" <td>18.60</td>\n", | |
" <td>81.09</td>\n", | |
" <td>481.9</td>\n", | |
" <td>0.09965</td>\n", | |
" <td>0.10580</td>\n", | |
" <td>0.08005</td>\n", | |
" <td>0.03821</td>\n", | |
" <td>0.1925</td>\n", | |
" <td>0.06373</td>\n", | |
" <td>...</td>\n", | |
" <td>14.970</td>\n", | |
" <td>24.64</td>\n", | |
" <td>96.05</td>\n", | |
" <td>677.9</td>\n", | |
" <td>0.1426</td>\n", | |
" <td>0.2378</td>\n", | |
" <td>0.26710</td>\n", | |
" <td>0.10150</td>\n", | |
" <td>0.3014</td>\n", | |
" <td>0.08750</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>206</th>\n", | |
" <td>11.280</td>\n", | |
" <td>13.39</td>\n", | |
" <td>73.00</td>\n", | |
" <td>384.8</td>\n", | |
" <td>0.11640</td>\n", | |
" <td>0.11360</td>\n", | |
" <td>0.04635</td>\n", | |
" <td>0.04796</td>\n", | |
" <td>0.1771</td>\n", | |
" <td>0.06072</td>\n", | |
" <td>...</td>\n", | |
" <td>11.920</td>\n", | |
" <td>15.77</td>\n", | |
" <td>76.53</td>\n", | |
" <td>434.0</td>\n", | |
" <td>0.1367</td>\n", | |
" <td>0.1822</td>\n", | |
" <td>0.08669</td>\n", | |
" <td>0.08611</td>\n", | |
" <td>0.2102</td>\n", | |
" <td>0.06784</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>332</th>\n", | |
" <td>11.220</td>\n", | |
" <td>33.81</td>\n", | |
" <td>70.79</td>\n", | |
" <td>365.6</td>\n", | |
" <td>0.09687</td>\n", | |
" <td>0.09752</td>\n", | |
" <td>0.05263</td>\n", | |
" <td>0.02788</td>\n", | |
" <td>0.1619</td>\n", | |
" <td>0.06408</td>\n", | |
" <td>...</td>\n", | |
" <td>11.620</td>\n", | |
" <td>26.51</td>\n", | |
" <td>76.43</td>\n", | |
" <td>407.5</td>\n", | |
" <td>0.1428</td>\n", | |
" <td>0.2510</td>\n", | |
" <td>0.21230</td>\n", | |
" <td>0.09861</td>\n", | |
" <td>0.2289</td>\n", | |
" <td>0.08278</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>19.170</td>\n", | |
" <td>24.80</td>\n", | |
" <td>132.40</td>\n", | |
" <td>1123.0</td>\n", | |
" <td>0.09740</td>\n", | |
" <td>0.24580</td>\n", | |
" <td>0.20650</td>\n", | |
" <td>0.11180</td>\n", | |
" <td>0.2397</td>\n", | |
" <td>0.07800</td>\n", | |
" <td>...</td>\n", | |
" <td>20.960</td>\n", | |
" <td>29.94</td>\n", | |
" <td>151.70</td>\n", | |
" <td>1332.0</td>\n", | |
" <td>0.1037</td>\n", | |
" <td>0.3903</td>\n", | |
" <td>0.36390</td>\n", | |
" <td>0.17670</td>\n", | |
" <td>0.3176</td>\n", | |
" <td>0.10230</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>177</th>\n", | |
" <td>8.726</td>\n", | |
" <td>15.83</td>\n", | |
" <td>55.84</td>\n", | |
" <td>230.9</td>\n", | |
" <td>0.11500</td>\n", | |
" <td>0.08201</td>\n", | |
" <td>0.04132</td>\n", | |
" <td>0.01924</td>\n", | |
" <td>0.1649</td>\n", | |
" <td>0.07633</td>\n", | |
" <td>...</td>\n", | |
" <td>9.628</td>\n", | |
" <td>19.62</td>\n", | |
" <td>64.48</td>\n", | |
" <td>284.4</td>\n", | |
" <td>0.1724</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.24560</td>\n", | |
" <td>0.10500</td>\n", | |
" <td>0.2926</td>\n", | |
" <td>0.10170</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>127</th>\n", | |
" <td>12.310</td>\n", | |
" <td>16.52</td>\n", | |
" <td>79.19</td>\n", | |
" <td>470.9</td>\n", | |
" <td>0.09172</td>\n", | |
" <td>0.06829</td>\n", | |
" <td>0.03372</td>\n", | |
" <td>0.02272</td>\n", | |
" <td>0.1720</td>\n", | |
" <td>0.05914</td>\n", | |
" <td>...</td>\n", | |
" <td>14.110</td>\n", | |
" <td>23.21</td>\n", | |
" <td>89.71</td>\n", | |
" <td>611.1</td>\n", | |
" <td>0.1176</td>\n", | |
" <td>0.1843</td>\n", | |
" <td>0.17030</td>\n", | |
" <td>0.08660</td>\n", | |
" <td>0.2618</td>\n", | |
" <td>0.07609</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>168 rows × 30 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" radius_mean texture_mean perimeter_mean area_mean smoothness_mean \n", | |
"613 13.820 24.49 92.33 595.9 0.11620 \\\n", | |
"423 12.670 17.30 81.25 489.9 0.10280 \n", | |
"28 14.680 20.13 94.74 684.5 0.09867 \n", | |
"489 11.270 12.96 73.16 386.3 0.12370 \n", | |
"289 12.470 18.60 81.09 481.9 0.09965 \n", | |
".. ... ... ... ... ... \n", | |
"206 11.280 13.39 73.00 384.8 0.11640 \n", | |
"332 11.220 33.81 70.79 365.6 0.09687 \n", | |
"18 19.170 24.80 132.40 1123.0 0.09740 \n", | |
"177 8.726 15.83 55.84 230.9 0.11500 \n", | |
"127 12.310 16.52 79.19 470.9 0.09172 \n", | |
"\n", | |
" compactness_mean concavity_mean concave points_mean symmetry_mean \n", | |
"613 0.16810 0.13570 0.06759 0.2275 \\\n", | |
"423 0.07664 0.03193 0.02107 0.1707 \n", | |
"28 0.07200 0.07395 0.05259 0.1586 \n", | |
"489 0.11110 0.07900 0.05550 0.2018 \n", | |
"289 0.10580 0.08005 0.03821 0.1925 \n", | |
".. ... ... ... ... \n", | |
"206 0.11360 0.04635 0.04796 0.1771 \n", | |
"332 0.09752 0.05263 0.02788 0.1619 \n", | |
"18 0.24580 0.20650 0.11180 0.2397 \n", | |
"177 0.08201 0.04132 0.01924 0.1649 \n", | |
"127 0.06829 0.03372 0.02272 0.1720 \n", | |
"\n", | |
" fractal_dimension_mean ... radius_worst texture_worst \n", | |
"613 0.07237 ... 16.010 32.94 \\\n", | |
"423 0.05984 ... 13.710 21.10 \n", | |
"28 0.05922 ... 19.070 30.88 \n", | |
"489 0.06914 ... 12.840 20.53 \n", | |
"289 0.06373 ... 14.970 24.64 \n", | |
".. ... ... ... ... \n", | |
"206 0.06072 ... 11.920 15.77 \n", | |
"332 0.06408 ... 11.620 26.51 \n", | |
"18 0.07800 ... 20.960 29.94 \n", | |
"177 0.07633 ... 9.628 19.62 \n", | |
"127 0.05914 ... 14.110 23.21 \n", | |
"\n", | |
" perimeter_worst area_worst smoothness_worst compactness_worst \n", | |
"613 106.00 788.0 0.1794 0.3966 \\\n", | |
"423 88.70 574.4 0.1384 0.1212 \n", | |
"28 123.40 1138.0 0.1464 0.1871 \n", | |
"489 84.93 476.1 0.1610 0.2429 \n", | |
"289 96.05 677.9 0.1426 0.2378 \n", | |
".. ... ... ... ... \n", | |
"206 76.53 434.0 0.1367 0.1822 \n", | |
"332 76.43 407.5 0.1428 0.2510 \n", | |
"18 151.70 1332.0 0.1037 0.3903 \n", | |
"177 64.48 284.4 0.1724 0.2364 \n", | |
"127 89.71 611.1 0.1176 0.1843 \n", | |
"\n", | |
" concavity_worst concave points_worst symmetry_worst \n", | |
"613 0.33810 0.15210 0.3651 \\\n", | |
"423 0.10200 0.05602 0.2688 \n", | |
"28 0.29140 0.16090 0.3029 \n", | |
"489 0.22470 0.13180 0.3343 \n", | |
"289 0.26710 0.10150 0.3014 \n", | |
".. ... ... ... \n", | |
"206 0.08669 0.08611 0.2102 \n", | |
"332 0.21230 0.09861 0.2289 \n", | |
"18 0.36390 0.17670 0.3176 \n", | |
"177 0.24560 0.10500 0.2926 \n", | |
"127 0.17030 0.08660 0.2618 \n", | |
"\n", | |
" fractal_dimension_worst \n", | |
"613 0.11830 \n", | |
"423 0.06888 \n", | |
"28 0.08216 \n", | |
"489 0.09215 \n", | |
"289 0.08750 \n", | |
".. ... \n", | |
"206 0.06784 \n", | |
"332 0.08278 \n", | |
"18 0.10230 \n", | |
"177 0.10170 \n", | |
"127 0.07609 \n", | |
"\n", | |
"[168 rows x 30 columns]" | |
] | |
}, | |
"execution_count": 66, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X_test" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"id": "564ac922", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>diagnosis</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>613</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>423</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>489</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>289</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>206</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>332</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>177</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>127</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>168 rows × 1 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" diagnosis\n", | |
"613 1\n", | |
"423 0\n", | |
"28 1\n", | |
"489 0\n", | |
"289 0\n", | |
".. ...\n", | |
"206 0\n", | |
"332 0\n", | |
"18 1\n", | |
"177 0\n", | |
"127 0\n", | |
"\n", | |
"[168 rows x 1 columns]" | |
] | |
}, | |
"execution_count": 67, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_test" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"id": "b49b4ff5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier(criterion='entropy')</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier(criterion='entropy')</pre></div></div></div></div></div>" | |
], | |
"text/plain": [ | |
"DecisionTreeClassifier(criterion='entropy')" | |
] | |
}, | |
"execution_count": 68, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Using tree classifier for model Building \n", | |
"from sklearn import tree\n", | |
"tree_clas = tree.DecisionTreeClassifier(criterion='entropy')\n", | |
"tree_clas.fit(X_train,y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"id": "537be80a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1,\n", | |
" 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,\n", | |
" 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,\n", | |
" 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0,\n", | |
" 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0,\n", | |
" 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,\n", | |
" 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0])" | |
] | |
}, | |
"execution_count": 69, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_test_predict = tree_clas.predict(X_test)\n", | |
"y_test_predict" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"id": "89c29a02", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0]])" | |
] | |
}, | |
"execution_count": 70, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.array(y_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"id": "0cb26c4d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,\n", | |
" 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", | |
" 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1,\n", | |
" 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,\n", | |
" 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,\n", | |
" 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,\n", | |
" 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,\n", | |
" 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n", | |
" 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,\n", | |
" 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,\n", | |
" 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1,\n", | |
" 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | |
" 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,\n", | |
" 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,\n", | |
" 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,\n", | |
" 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,\n", | |
" 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,\n", | |
" 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,\n", | |
" 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,\n", | |
" 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1,\n", | |
" 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0,\n", | |
" 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0])" | |
] | |
}, | |
"execution_count": 71, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Prediction on training dataset\n", | |
"y_train_predict = tree_clas.predict(X_train)\n", | |
"y_train_predict" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 72, | |
"id": "75b8c8f0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [1],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0],\n", | |
" [0],\n", | |
" [1],\n", | |
" [0],\n", | |
" [0]])" | |
] | |
}, | |
"execution_count": 72, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.array(y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 73, | |
"id": "3a2f8b4b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.9166666666666666\n" | |
] | |
} | |
], | |
"source": [ | |
"#Calculating the accuracy of prediction on the training set\n", | |
"from sklearn.metrics import accuracy_score\n", | |
"\n", | |
"test_accuracy = accuracy_score(y_test,y_test_predict)\n", | |
"print(test_accuracy)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"id": "fe0c404a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.0\n" | |
] | |
} | |
], | |
"source": [ | |
"train_accuracy = accuracy_score(y_train,y_train_predict)\n", | |
"print(train_accuracy)\n", | |
"#this model is overfitted due to accuracy is 1 " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"id": "b2717d8b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[90, 2],\n", | |
" [12, 64]])" | |
] | |
}, | |
"execution_count": 75, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from sklearn.metrics import confusion_matrix\n", | |
"confusion_matrix(y_test,y_test_predict)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"id": "2b8e519e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[Text(0.6614583333333334, 0.9375, 'x[22] <= 116.05\\nentropy = 0.956\\nsamples = 502\\nvalue = [313, 189]'),\n", | |
" Text(0.40625, 0.8125, 'x[27] <= 0.111\\nentropy = 0.465\\nsamples = 344\\nvalue = [310, 34]'),\n", | |
" Text(0.20833333333333334, 0.6875, 'x[3] <= 694.1\\nentropy = 0.112\\nsamples = 268\\nvalue = [264, 4]'),\n", | |
" Text(0.125, 0.5625, 'x[18] <= 0.011\\nentropy = 0.036\\nsamples = 263\\nvalue = [262, 1]'),\n", | |
" Text(0.08333333333333333, 0.4375, 'x[1] <= 21.165\\nentropy = 0.918\\nsamples = 3\\nvalue = [2, 1]'),\n", | |
" Text(0.041666666666666664, 0.3125, 'entropy = 0.0\\nsamples = 2\\nvalue = [2, 0]'),\n", | |
" Text(0.125, 0.3125, 'entropy = 0.0\\nsamples = 1\\nvalue = [0, 1]'),\n", | |
" Text(0.16666666666666666, 0.4375, 'entropy = 0.0\\nsamples = 260\\nvalue = [260, 0]'),\n", | |
" Text(0.2916666666666667, 0.5625, 'x[10] <= 0.222\\nentropy = 0.971\\nsamples = 5\\nvalue = [2, 3]'),\n", | |
" Text(0.25, 0.4375, 'entropy = 0.0\\nsamples = 2\\nvalue = [2, 0]'),\n", | |
" Text(0.3333333333333333, 0.4375, 'entropy = 0.0\\nsamples = 3\\nvalue = [0, 3]'),\n", | |
" Text(0.6041666666666666, 0.6875, 'x[1] <= 19.71\\nentropy = 0.968\\nsamples = 76\\nvalue = [46, 30]'),\n", | |
" Text(0.4583333333333333, 0.5625, 'x[21] <= 25.43\\nentropy = 0.469\\nsamples = 40\\nvalue = [36, 4]'),\n", | |
" Text(0.4166666666666667, 0.4375, 'entropy = 0.0\\nsamples = 22\\nvalue = [22, 0]'),\n", | |
" Text(0.5, 0.4375, 'x[28] <= 0.355\\nentropy = 0.764\\nsamples = 18\\nvalue = [14, 4]'),\n", | |
" Text(0.4583333333333333, 0.3125, 'x[27] <= 0.12\\nentropy = 0.544\\nsamples = 16\\nvalue = [14, 2]'),\n", | |
" Text(0.4166666666666667, 0.1875, 'x[11] <= 1.38\\nentropy = 0.918\\nsamples = 3\\nvalue = [1, 2]'),\n", | |
" Text(0.375, 0.0625, 'entropy = 0.0\\nsamples = 2\\nvalue = [0, 2]'),\n", | |
" Text(0.4583333333333333, 0.0625, 'entropy = 0.0\\nsamples = 1\\nvalue = [1, 0]'),\n", | |
" Text(0.5, 0.1875, 'entropy = 0.0\\nsamples = 13\\nvalue = [13, 0]'),\n", | |
" Text(0.5416666666666666, 0.3125, 'entropy = 0.0\\nsamples = 2\\nvalue = [0, 2]'),\n", | |
" Text(0.75, 0.5625, 'x[7] <= 0.045\\nentropy = 0.852\\nsamples = 36\\nvalue = [10, 26]'),\n", | |
" Text(0.6666666666666666, 0.4375, 'x[18] <= 0.013\\nentropy = 0.684\\nsamples = 11\\nvalue = [9, 2]'),\n", | |
" Text(0.625, 0.3125, 'entropy = 0.0\\nsamples = 2\\nvalue = [0, 2]'),\n", | |
" Text(0.7083333333333334, 0.3125, 'entropy = 0.0\\nsamples = 9\\nvalue = [9, 0]'),\n", | |
" Text(0.8333333333333334, 0.4375, 'x[10] <= 0.193\\nentropy = 0.242\\nsamples = 25\\nvalue = [1, 24]'),\n", | |
" Text(0.7916666666666666, 0.3125, 'x[8] <= 0.181\\nentropy = 1.0\\nsamples = 2\\nvalue = [1, 1]'),\n", | |
" Text(0.75, 0.1875, 'entropy = 0.0\\nsamples = 1\\nvalue = [0, 1]'),\n", | |
" Text(0.8333333333333334, 0.1875, 'entropy = 0.0\\nsamples = 1\\nvalue = [1, 0]'),\n", | |
" Text(0.875, 0.3125, 'entropy = 0.0\\nsamples = 23\\nvalue = [0, 23]'),\n", | |
" Text(0.9166666666666666, 0.8125, 'x[27] <= 0.092\\nentropy = 0.136\\nsamples = 158\\nvalue = [3, 155]'),\n", | |
" Text(0.875, 0.6875, 'x[6] <= 0.04\\nentropy = 0.971\\nsamples = 5\\nvalue = [3, 2]'),\n", | |
" Text(0.8333333333333334, 0.5625, 'entropy = 0.0\\nsamples = 2\\nvalue = [0, 2]'),\n", | |
" Text(0.9166666666666666, 0.5625, 'entropy = 0.0\\nsamples = 3\\nvalue = [3, 0]'),\n", | |
" Text(0.9583333333333334, 0.6875, 'entropy = 0.0\\nsamples = 153\\nvalue = [0, 153]')]" | |
] | |
}, | |
"execution_count": 76, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"tree.plot_tree(tree_clas)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f3095f82", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "2927d682", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b6343aa1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment