Created
August 23, 2017 01:06
-
-
Save emmafreeman/14f3b0a28ee9725e31c2244be87c5961 to your computer and use it in GitHub Desktop.
GA DSI Instructor Challenge
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Part 1: Modeling" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline\n", | |
"import seaborn as sns\n", | |
"import requests\n", | |
"import statistics\n", | |
"\n", | |
"from sklearn.preprocessing import StandardScaler\n", | |
"from sklearn.feature_selection import f_classif, SelectKBest, SelectFromModel\n", | |
"from sklearn.model_selection import train_test_split, GridSearchCV, ShuffleSplit\n", | |
"from sklearn.pipeline import Pipeline\n", | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"from sklearn.linear_model import Lasso, Ridge, LogisticRegression" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# read in data file\n", | |
"url = 'https://gist.githubusercontent.com/jeff-boykin/b5c536467c30d66ab97cd1f5c9a3497d/raw/5233c792af49c9b78f20c35d5cd729e1307a7df7/breast-cancer.csv'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"bc_data = pd.read_csv(url, header=None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" <th>7</th>\n", | |
" <th>8</th>\n", | |
" <th>9</th>\n", | |
" <th>...</th>\n", | |
" <th>22</th>\n", | |
" <th>23</th>\n", | |
" <th>24</th>\n", | |
" <th>25</th>\n", | |
" <th>26</th>\n", | |
" <th>27</th>\n", | |
" <th>28</th>\n", | |
" <th>29</th>\n", | |
" <th>30</th>\n", | |
" <th>31</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>M</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.3001</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>25.38</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.1622</td>\n", | |
" <td>0.6656</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>M</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.0869</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.99</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.1238</td>\n", | |
" <td>0.1866</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>84300903</td>\n", | |
" <td>M</td>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.1974</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>...</td>\n", | |
" <td>23.57</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.1444</td>\n", | |
" <td>0.4245</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>84348301</td>\n", | |
" <td>M</td>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.2414</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>...</td>\n", | |
" <td>14.91</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.2098</td>\n", | |
" <td>0.8663</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>84358402</td>\n", | |
" <td>M</td>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.1980</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>...</td>\n", | |
" <td>22.54</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.1374</td>\n", | |
" <td>0.2050</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2 3 4 5 6 7 8 \\\n", | |
"0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 \n", | |
"1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 \n", | |
"2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 \n", | |
"3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 \n", | |
"4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 \n", | |
"\n", | |
" 9 ... 22 23 24 25 26 27 28 \\\n", | |
"0 0.14710 ... 25.38 17.33 184.60 2019.0 0.1622 0.6656 0.7119 \n", | |
"1 0.07017 ... 24.99 23.41 158.80 1956.0 0.1238 0.1866 0.2416 \n", | |
"2 0.12790 ... 23.57 25.53 152.50 1709.0 0.1444 0.4245 0.4504 \n", | |
"3 0.10520 ... 14.91 26.50 98.87 567.7 0.2098 0.8663 0.6869 \n", | |
"4 0.10430 ... 22.54 16.67 152.20 1575.0 0.1374 0.2050 0.4000 \n", | |
"\n", | |
" 29 30 31 \n", | |
"0 0.2654 0.4601 0.11890 \n", | |
"1 0.1860 0.2750 0.08902 \n", | |
"2 0.2430 0.3613 0.08758 \n", | |
"3 0.2575 0.6638 0.17300 \n", | |
"4 0.1625 0.2364 0.07678 \n", | |
"\n", | |
"[5 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"B 357\n", | |
"M 212\n", | |
"Name: 1, dtype: int64" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc_data[1].value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### (note that there are a lot more benign than malignant samples - this could affect our predictive model)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# read in header file\n", | |
"header = requests.get('https://gist.githubusercontent.com/jeff-boykin/b5c536467c30d66ab97cd1f5c9a3497d/raw/5233c792af49c9b78f20c35d5cd729e1307a7df7/field_names.txt')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"header = header.text.splitlines()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# assign headers to dataframe\n", | |
"bc_data.columns=header" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ID</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>radius_sd_error</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>texture_sd_error</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>perimeter_sd_error</th>\n", | |
" <th>...</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave_points_mean</th>\n", | |
" <th>concave_points_sd_error</th>\n", | |
" <th>concave_points_worst</th>\n", | |
" <th>symmetry_mean</th>\n", | |
" <th>symmetry_sd_error</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_mean</th>\n", | |
" <th>fractal_dimension_sd_error</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>842302</td>\n", | |
" <td>M</td>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.3001</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>...</td>\n", | |
" <td>25.38</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.1622</td>\n", | |
" <td>0.6656</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>842517</td>\n", | |
" <td>M</td>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.0869</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>...</td>\n", | |
" <td>24.99</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.1238</td>\n", | |
" <td>0.1866</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>84300903</td>\n", | |
" <td>M</td>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.1974</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>...</td>\n", | |
" <td>23.57</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.1444</td>\n", | |
" <td>0.4245</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>84348301</td>\n", | |
" <td>M</td>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.2414</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>...</td>\n", | |
" <td>14.91</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.2098</td>\n", | |
" <td>0.8663</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>84358402</td>\n", | |
" <td>M</td>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.1980</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>...</td>\n", | |
" <td>22.54</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.1374</td>\n", | |
" <td>0.2050</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" ID diagnosis radius_mean radius_sd_error radius_worst \\\n", | |
"0 842302 M 17.99 10.38 122.80 \n", | |
"1 842517 M 20.57 17.77 132.90 \n", | |
"2 84300903 M 19.69 21.25 130.00 \n", | |
"3 84348301 M 11.42 20.38 77.58 \n", | |
"4 84358402 M 20.29 14.34 135.10 \n", | |
"\n", | |
" texture_mean texture_sd_error texture_worst perimeter_mean \\\n", | |
"0 1001.0 0.11840 0.27760 0.3001 \n", | |
"1 1326.0 0.08474 0.07864 0.0869 \n", | |
"2 1203.0 0.10960 0.15990 0.1974 \n", | |
"3 386.1 0.14250 0.28390 0.2414 \n", | |
"4 1297.0 0.10030 0.13280 0.1980 \n", | |
"\n", | |
" perimeter_sd_error ... concavity_worst \\\n", | |
"0 0.14710 ... 25.38 \n", | |
"1 0.07017 ... 24.99 \n", | |
"2 0.12790 ... 23.57 \n", | |
"3 0.10520 ... 14.91 \n", | |
"4 0.10430 ... 22.54 \n", | |
"\n", | |
" concave_points_mean concave_points_sd_error concave_points_worst \\\n", | |
"0 17.33 184.60 2019.0 \n", | |
"1 23.41 158.80 1956.0 \n", | |
"2 25.53 152.50 1709.0 \n", | |
"3 26.50 98.87 567.7 \n", | |
"4 16.67 152.20 1575.0 \n", | |
"\n", | |
" symmetry_mean symmetry_sd_error symmetry_worst fractal_dimension_mean \\\n", | |
"0 0.1622 0.6656 0.7119 0.2654 \n", | |
"1 0.1238 0.1866 0.2416 0.1860 \n", | |
"2 0.1444 0.4245 0.4504 0.2430 \n", | |
"3 0.2098 0.8663 0.6869 0.2575 \n", | |
"4 0.1374 0.2050 0.4000 0.1625 \n", | |
"\n", | |
" fractal_dimension_sd_error fractal_dimension_worst \n", | |
"0 0.4601 0.11890 \n", | |
"1 0.2750 0.08902 \n", | |
"2 0.3613 0.08758 \n", | |
"3 0.6638 0.17300 \n", | |
"4 0.2364 0.07678 \n", | |
"\n", | |
"[5 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 569 entries, 0 to 568\n", | |
"Data columns (total 32 columns):\n", | |
"ID 569 non-null int64\n", | |
"diagnosis 569 non-null object\n", | |
"radius_mean 569 non-null float64\n", | |
"radius_sd_error 569 non-null float64\n", | |
"radius_worst 569 non-null float64\n", | |
"texture_mean 569 non-null float64\n", | |
"texture_sd_error 569 non-null float64\n", | |
"texture_worst 569 non-null float64\n", | |
"perimeter_mean 569 non-null float64\n", | |
"perimeter_sd_error 569 non-null float64\n", | |
"perimeter_worst 569 non-null float64\n", | |
"area_mean 569 non-null float64\n", | |
"area_sd_error 569 non-null float64\n", | |
"area_worst 569 non-null float64\n", | |
"smoothness_mean 569 non-null float64\n", | |
"smoothness_sd_error 569 non-null float64\n", | |
"smoothness_worst 569 non-null float64\n", | |
"compactness_mean 569 non-null float64\n", | |
"compactness_sd_error 569 non-null float64\n", | |
"compactness_worst 569 non-null float64\n", | |
"concavity_mean 569 non-null float64\n", | |
"concavity_sd_error 569 non-null float64\n", | |
"concavity_worst 569 non-null float64\n", | |
"concave_points_mean 569 non-null float64\n", | |
"concave_points_sd_error 569 non-null float64\n", | |
"concave_points_worst 569 non-null float64\n", | |
"symmetry_mean 569 non-null float64\n", | |
"symmetry_sd_error 569 non-null float64\n", | |
"symmetry_worst 569 non-null float64\n", | |
"fractal_dimension_mean 569 non-null float64\n", | |
"fractal_dimension_sd_error 569 non-null float64\n", | |
"fractal_dimension_worst 569 non-null float64\n", | |
"dtypes: float64(30), int64(1), object(1)\n", | |
"memory usage: 142.3+ KB\n" | |
] | |
} | |
], | |
"source": [ | |
"# check for any null values and look at types of columns\n", | |
"bc_data.info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# encode 'diagnosis' column\n", | |
"bc_data['diagnosis'] = bc_data['diagnosis'].astype('category').cat.codes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ID</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>radius_sd_error</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>texture_sd_error</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>perimeter_sd_error</th>\n", | |
" <th>...</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave_points_mean</th>\n", | |
" <th>concave_points_sd_error</th>\n", | |
" <th>concave_points_worst</th>\n", | |
" <th>symmetry_mean</th>\n", | |
" <th>symmetry_sd_error</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_mean</th>\n", | |
" <th>fractal_dimension_sd_error</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>5.690000e+02</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" <td>569.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>3.037183e+07</td>\n", | |
" <td>0.372583</td>\n", | |
" <td>14.127292</td>\n", | |
" <td>19.289649</td>\n", | |
" <td>91.969033</td>\n", | |
" <td>654.889104</td>\n", | |
" <td>0.096360</td>\n", | |
" <td>0.104341</td>\n", | |
" <td>0.088799</td>\n", | |
" <td>0.048919</td>\n", | |
" <td>...</td>\n", | |
" <td>16.269190</td>\n", | |
" <td>25.677223</td>\n", | |
" <td>107.261213</td>\n", | |
" <td>880.583128</td>\n", | |
" <td>0.132369</td>\n", | |
" <td>0.254265</td>\n", | |
" <td>0.272188</td>\n", | |
" <td>0.114606</td>\n", | |
" <td>0.290076</td>\n", | |
" <td>0.083946</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>1.250206e+08</td>\n", | |
" <td>0.483918</td>\n", | |
" <td>3.524049</td>\n", | |
" <td>4.301036</td>\n", | |
" <td>24.298981</td>\n", | |
" <td>351.914129</td>\n", | |
" <td>0.014064</td>\n", | |
" <td>0.052813</td>\n", | |
" <td>0.079720</td>\n", | |
" <td>0.038803</td>\n", | |
" <td>...</td>\n", | |
" <td>4.833242</td>\n", | |
" <td>6.146258</td>\n", | |
" <td>33.602542</td>\n", | |
" <td>569.356993</td>\n", | |
" <td>0.022832</td>\n", | |
" <td>0.157336</td>\n", | |
" <td>0.208624</td>\n", | |
" <td>0.065732</td>\n", | |
" <td>0.061867</td>\n", | |
" <td>0.018061</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>8.670000e+03</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>6.981000</td>\n", | |
" <td>9.710000</td>\n", | |
" <td>43.790000</td>\n", | |
" <td>143.500000</td>\n", | |
" <td>0.052630</td>\n", | |
" <td>0.019380</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>7.930000</td>\n", | |
" <td>12.020000</td>\n", | |
" <td>50.410000</td>\n", | |
" <td>185.200000</td>\n", | |
" <td>0.071170</td>\n", | |
" <td>0.027290</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.156500</td>\n", | |
" <td>0.055040</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>8.692180e+05</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>11.700000</td>\n", | |
" <td>16.170000</td>\n", | |
" <td>75.170000</td>\n", | |
" <td>420.300000</td>\n", | |
" <td>0.086370</td>\n", | |
" <td>0.064920</td>\n", | |
" <td>0.029560</td>\n", | |
" <td>0.020310</td>\n", | |
" <td>...</td>\n", | |
" <td>13.010000</td>\n", | |
" <td>21.080000</td>\n", | |
" <td>84.110000</td>\n", | |
" <td>515.300000</td>\n", | |
" <td>0.116600</td>\n", | |
" <td>0.147200</td>\n", | |
" <td>0.114500</td>\n", | |
" <td>0.064930</td>\n", | |
" <td>0.250400</td>\n", | |
" <td>0.071460</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>9.060240e+05</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>13.370000</td>\n", | |
" <td>18.840000</td>\n", | |
" <td>86.240000</td>\n", | |
" <td>551.100000</td>\n", | |
" <td>0.095870</td>\n", | |
" <td>0.092630</td>\n", | |
" <td>0.061540</td>\n", | |
" <td>0.033500</td>\n", | |
" <td>...</td>\n", | |
" <td>14.970000</td>\n", | |
" <td>25.410000</td>\n", | |
" <td>97.660000</td>\n", | |
" <td>686.500000</td>\n", | |
" <td>0.131300</td>\n", | |
" <td>0.211900</td>\n", | |
" <td>0.226700</td>\n", | |
" <td>0.099930</td>\n", | |
" <td>0.282200</td>\n", | |
" <td>0.080040</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>8.813129e+06</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>15.780000</td>\n", | |
" <td>21.800000</td>\n", | |
" <td>104.100000</td>\n", | |
" <td>782.700000</td>\n", | |
" <td>0.105300</td>\n", | |
" <td>0.130400</td>\n", | |
" <td>0.130700</td>\n", | |
" <td>0.074000</td>\n", | |
" <td>...</td>\n", | |
" <td>18.790000</td>\n", | |
" <td>29.720000</td>\n", | |
" <td>125.400000</td>\n", | |
" <td>1084.000000</td>\n", | |
" <td>0.146000</td>\n", | |
" <td>0.339100</td>\n", | |
" <td>0.382900</td>\n", | |
" <td>0.161400</td>\n", | |
" <td>0.317900</td>\n", | |
" <td>0.092080</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>9.113205e+08</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>28.110000</td>\n", | |
" <td>39.280000</td>\n", | |
" <td>188.500000</td>\n", | |
" <td>2501.000000</td>\n", | |
" <td>0.163400</td>\n", | |
" <td>0.345400</td>\n", | |
" <td>0.426800</td>\n", | |
" <td>0.201200</td>\n", | |
" <td>...</td>\n", | |
" <td>36.040000</td>\n", | |
" <td>49.540000</td>\n", | |
" <td>251.200000</td>\n", | |
" <td>4254.000000</td>\n", | |
" <td>0.222600</td>\n", | |
" <td>1.058000</td>\n", | |
" <td>1.252000</td>\n", | |
" <td>0.291000</td>\n", | |
" <td>0.663800</td>\n", | |
" <td>0.207500</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>8 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" ID diagnosis radius_mean radius_sd_error radius_worst \\\n", | |
"count 5.690000e+02 569.000000 569.000000 569.000000 569.000000 \n", | |
"mean 3.037183e+07 0.372583 14.127292 19.289649 91.969033 \n", | |
"std 1.250206e+08 0.483918 3.524049 4.301036 24.298981 \n", | |
"min 8.670000e+03 0.000000 6.981000 9.710000 43.790000 \n", | |
"25% 8.692180e+05 0.000000 11.700000 16.170000 75.170000 \n", | |
"50% 9.060240e+05 0.000000 13.370000 18.840000 86.240000 \n", | |
"75% 8.813129e+06 1.000000 15.780000 21.800000 104.100000 \n", | |
"max 9.113205e+08 1.000000 28.110000 39.280000 188.500000 \n", | |
"\n", | |
" texture_mean texture_sd_error texture_worst perimeter_mean \\\n", | |
"count 569.000000 569.000000 569.000000 569.000000 \n", | |
"mean 654.889104 0.096360 0.104341 0.088799 \n", | |
"std 351.914129 0.014064 0.052813 0.079720 \n", | |
"min 143.500000 0.052630 0.019380 0.000000 \n", | |
"25% 420.300000 0.086370 0.064920 0.029560 \n", | |
"50% 551.100000 0.095870 0.092630 0.061540 \n", | |
"75% 782.700000 0.105300 0.130400 0.130700 \n", | |
"max 2501.000000 0.163400 0.345400 0.426800 \n", | |
"\n", | |
" perimeter_sd_error ... concavity_worst \\\n", | |
"count 569.000000 ... 569.000000 \n", | |
"mean 0.048919 ... 16.269190 \n", | |
"std 0.038803 ... 4.833242 \n", | |
"min 0.000000 ... 7.930000 \n", | |
"25% 0.020310 ... 13.010000 \n", | |
"50% 0.033500 ... 14.970000 \n", | |
"75% 0.074000 ... 18.790000 \n", | |
"max 0.201200 ... 36.040000 \n", | |
"\n", | |
" concave_points_mean concave_points_sd_error concave_points_worst \\\n", | |
"count 569.000000 569.000000 569.000000 \n", | |
"mean 25.677223 107.261213 880.583128 \n", | |
"std 6.146258 33.602542 569.356993 \n", | |
"min 12.020000 50.410000 185.200000 \n", | |
"25% 21.080000 84.110000 515.300000 \n", | |
"50% 25.410000 97.660000 686.500000 \n", | |
"75% 29.720000 125.400000 1084.000000 \n", | |
"max 49.540000 251.200000 4254.000000 \n", | |
"\n", | |
" symmetry_mean symmetry_sd_error symmetry_worst \\\n", | |
"count 569.000000 569.000000 569.000000 \n", | |
"mean 0.132369 0.254265 0.272188 \n", | |
"std 0.022832 0.157336 0.208624 \n", | |
"min 0.071170 0.027290 0.000000 \n", | |
"25% 0.116600 0.147200 0.114500 \n", | |
"50% 0.131300 0.211900 0.226700 \n", | |
"75% 0.146000 0.339100 0.382900 \n", | |
"max 0.222600 1.058000 1.252000 \n", | |
"\n", | |
" fractal_dimension_mean fractal_dimension_sd_error \\\n", | |
"count 569.000000 569.000000 \n", | |
"mean 0.114606 0.290076 \n", | |
"std 0.065732 0.061867 \n", | |
"min 0.000000 0.156500 \n", | |
"25% 0.064930 0.250400 \n", | |
"50% 0.099930 0.282200 \n", | |
"75% 0.161400 0.317900 \n", | |
"max 0.291000 0.663800 \n", | |
"\n", | |
" fractal_dimension_worst \n", | |
"count 569.000000 \n", | |
"mean 0.083946 \n", | |
"std 0.018061 \n", | |
"min 0.055040 \n", | |
"25% 0.071460 \n", | |
"50% 0.080040 \n", | |
"75% 0.092080 \n", | |
"max 0.207500 \n", | |
"\n", | |
"[8 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# look at some summary statistics for the dataset\n", | |
"bc_data.describe()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"### Compute the mean and median smoothness and compactness for benign and malignant tumors - do they differ? " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"benign = bc_data[bc_data['diagnosis']==0]\n", | |
"malignant = bc_data[bc_data['diagnosis']==1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"mean and median smoothness for benign is 2.0003, 1.851\n", | |
"mean and median smoothness for malignant is 4.3239, 3.6795\n" | |
] | |
} | |
], | |
"source": [ | |
"# smoothness\n", | |
"\n", | |
"median_smooth_benign = statistics.median(benign['smoothness_mean'])\n", | |
"median_smooth_malig = statistics.median(malignant['smoothness_mean'])\n", | |
"\n", | |
"print('mean and median smoothness for benign is {}, {}'.format(\n", | |
" round(benign['smoothness_mean'].mean(),4), median_smooth_benign))\n", | |
"print('mean and median smoothness for malignant is {}, {}'.format(\n", | |
" round(malignant['smoothness_mean'].mean(),4), median_smooth_malig))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"mean and median compactness for benign is 0.0214, 0.0163\n", | |
"mean and median compactness for malignant is 0.0323, 0.02859\n" | |
] | |
} | |
], | |
"source": [ | |
"# compactness\n", | |
"\n", | |
"median_compact_benign = statistics.median(benign['compactness_mean'])\n", | |
"median_compact_malig = statistics.median(malignant['compactness_mean'])\n", | |
"\n", | |
"print('mean and median compactness for benign is {}, {}'.format(\n", | |
" round(benign['compactness_mean'].mean(),4), round(median_compact_benign,4)))\n", | |
"print('mean and median compactness for malignant is {}, {}'.format(\n", | |
" round(malignant['compactness_mean'].mean(),4), median_compact_malig))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Write a function to generate bootstrap samples of the data." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_bootstrap(df, num_samples_to_get):\n", | |
" n = len(df.values)\n", | |
" bootstrap_df = pd.DataFrame(\n", | |
" df.values[np.random.randint(n, size=num_samples_to_get)], columns=df.columns)\n", | |
" return bootstrap_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ID</th>\n", | |
" <th>diagnosis</th>\n", | |
" <th>radius_mean</th>\n", | |
" <th>radius_sd_error</th>\n", | |
" <th>radius_worst</th>\n", | |
" <th>texture_mean</th>\n", | |
" <th>texture_sd_error</th>\n", | |
" <th>texture_worst</th>\n", | |
" <th>perimeter_mean</th>\n", | |
" <th>perimeter_sd_error</th>\n", | |
" <th>...</th>\n", | |
" <th>concavity_worst</th>\n", | |
" <th>concave_points_mean</th>\n", | |
" <th>concave_points_sd_error</th>\n", | |
" <th>concave_points_worst</th>\n", | |
" <th>symmetry_mean</th>\n", | |
" <th>symmetry_sd_error</th>\n", | |
" <th>symmetry_worst</th>\n", | |
" <th>fractal_dimension_mean</th>\n", | |
" <th>fractal_dimension_sd_error</th>\n", | |
" <th>fractal_dimension_worst</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>868682.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>11.430</td>\n", | |
" <td>15.39</td>\n", | |
" <td>73.06</td>\n", | |
" <td>399.8</td>\n", | |
" <td>0.09639</td>\n", | |
" <td>0.06889</td>\n", | |
" <td>0.03503</td>\n", | |
" <td>0.02875</td>\n", | |
" <td>...</td>\n", | |
" <td>12.320</td>\n", | |
" <td>22.02</td>\n", | |
" <td>79.93</td>\n", | |
" <td>462.0</td>\n", | |
" <td>0.1190</td>\n", | |
" <td>0.16480</td>\n", | |
" <td>0.13990</td>\n", | |
" <td>0.08476</td>\n", | |
" <td>0.2676</td>\n", | |
" <td>0.06765</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>911384.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>14.920</td>\n", | |
" <td>14.93</td>\n", | |
" <td>96.45</td>\n", | |
" <td>686.9</td>\n", | |
" <td>0.08098</td>\n", | |
" <td>0.08549</td>\n", | |
" <td>0.05539</td>\n", | |
" <td>0.03221</td>\n", | |
" <td>...</td>\n", | |
" <td>17.180</td>\n", | |
" <td>18.22</td>\n", | |
" <td>112.00</td>\n", | |
" <td>906.6</td>\n", | |
" <td>0.1065</td>\n", | |
" <td>0.27910</td>\n", | |
" <td>0.31510</td>\n", | |
" <td>0.11470</td>\n", | |
" <td>0.2688</td>\n", | |
" <td>0.08273</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>924342.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>9.333</td>\n", | |
" <td>21.94</td>\n", | |
" <td>59.01</td>\n", | |
" <td>264.0</td>\n", | |
" <td>0.09240</td>\n", | |
" <td>0.05605</td>\n", | |
" <td>0.03996</td>\n", | |
" <td>0.01282</td>\n", | |
" <td>...</td>\n", | |
" <td>9.845</td>\n", | |
" <td>25.05</td>\n", | |
" <td>62.86</td>\n", | |
" <td>295.8</td>\n", | |
" <td>0.1103</td>\n", | |
" <td>0.08298</td>\n", | |
" <td>0.07993</td>\n", | |
" <td>0.02564</td>\n", | |
" <td>0.2435</td>\n", | |
" <td>0.07393</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>915664.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>14.810</td>\n", | |
" <td>14.70</td>\n", | |
" <td>94.66</td>\n", | |
" <td>680.7</td>\n", | |
" <td>0.08472</td>\n", | |
" <td>0.05016</td>\n", | |
" <td>0.03416</td>\n", | |
" <td>0.02541</td>\n", | |
" <td>...</td>\n", | |
" <td>15.610</td>\n", | |
" <td>17.58</td>\n", | |
" <td>101.70</td>\n", | |
" <td>760.2</td>\n", | |
" <td>0.1139</td>\n", | |
" <td>0.10110</td>\n", | |
" <td>0.11010</td>\n", | |
" <td>0.07955</td>\n", | |
" <td>0.2334</td>\n", | |
" <td>0.06142</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>845636.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>16.020</td>\n", | |
" <td>23.24</td>\n", | |
" <td>102.70</td>\n", | |
" <td>797.8</td>\n", | |
" <td>0.08206</td>\n", | |
" <td>0.06669</td>\n", | |
" <td>0.03299</td>\n", | |
" <td>0.03323</td>\n", | |
" <td>...</td>\n", | |
" <td>19.190</td>\n", | |
" <td>33.88</td>\n", | |
" <td>123.80</td>\n", | |
" <td>1150.0</td>\n", | |
" <td>0.1181</td>\n", | |
" <td>0.15510</td>\n", | |
" <td>0.14590</td>\n", | |
" <td>0.09975</td>\n", | |
" <td>0.2948</td>\n", | |
" <td>0.08452</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" ID diagnosis radius_mean radius_sd_error radius_worst \\\n", | |
"0 868682.0 0.0 11.430 15.39 73.06 \n", | |
"1 911384.0 0.0 14.920 14.93 96.45 \n", | |
"2 924342.0 0.0 9.333 21.94 59.01 \n", | |
"3 915664.0 0.0 14.810 14.70 94.66 \n", | |
"4 845636.0 1.0 16.020 23.24 102.70 \n", | |
"\n", | |
" texture_mean texture_sd_error texture_worst perimeter_mean \\\n", | |
"0 399.8 0.09639 0.06889 0.03503 \n", | |
"1 686.9 0.08098 0.08549 0.05539 \n", | |
"2 264.0 0.09240 0.05605 0.03996 \n", | |
"3 680.7 0.08472 0.05016 0.03416 \n", | |
"4 797.8 0.08206 0.06669 0.03299 \n", | |
"\n", | |
" perimeter_sd_error ... concavity_worst \\\n", | |
"0 0.02875 ... 12.320 \n", | |
"1 0.03221 ... 17.180 \n", | |
"2 0.01282 ... 9.845 \n", | |
"3 0.02541 ... 15.610 \n", | |
"4 0.03323 ... 19.190 \n", | |
"\n", | |
" concave_points_mean concave_points_sd_error concave_points_worst \\\n", | |
"0 22.02 79.93 462.0 \n", | |
"1 18.22 112.00 906.6 \n", | |
"2 25.05 62.86 295.8 \n", | |
"3 17.58 101.70 760.2 \n", | |
"4 33.88 123.80 1150.0 \n", | |
"\n", | |
" symmetry_mean symmetry_sd_error symmetry_worst fractal_dimension_mean \\\n", | |
"0 0.1190 0.16480 0.13990 0.08476 \n", | |
"1 0.1065 0.27910 0.31510 0.11470 \n", | |
"2 0.1103 0.08298 0.07993 0.02564 \n", | |
"3 0.1139 0.10110 0.11010 0.07955 \n", | |
"4 0.1181 0.15510 0.14590 0.09975 \n", | |
"\n", | |
" fractal_dimension_sd_error fractal_dimension_worst \n", | |
"0 0.2676 0.06765 \n", | |
"1 0.2688 0.08273 \n", | |
"2 0.2435 0.07393 \n", | |
"3 0.2334 0.06142 \n", | |
"4 0.2948 0.08452 \n", | |
"\n", | |
"[5 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"get_bootstrap(bc_data, 5)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Identify 2-3 variables that are predictive of a malignant tumor. Display the relationship visually and write 1-2 sentences explaining the relationship." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# split data into X and y (target)\n", | |
"\n", | |
"bc_X = bc_data.iloc[:, 2:]\n", | |
"bc_y = bc_data.iloc[:, 1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(569, 30)" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc_X.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(569,)" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc_y.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>feature</th>\n", | |
" <th>f-score</th>\n", | |
" <th>p-value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>fractal_dimension_mean</td>\n", | |
" <td>964.385393</td>\n", | |
" <td>1.969100e-124</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>concave_points_sd_error</td>\n", | |
" <td>897.944219</td>\n", | |
" <td>5.771397e-119</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>perimeter_sd_error</td>\n", | |
" <td>861.676020</td>\n", | |
" <td>7.101150e-116</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>concavity_worst</td>\n", | |
" <td>860.781707</td>\n", | |
" <td>8.482292e-116</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>radius_worst</td>\n", | |
" <td>697.235272</td>\n", | |
" <td>8.436251e-101</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>concave_points_worst</td>\n", | |
" <td>661.600206</td>\n", | |
" <td>2.828848e-97</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>radius_mean</td>\n", | |
" <td>646.981021</td>\n", | |
" <td>8.465941e-96</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>texture_mean</td>\n", | |
" <td>573.060747</td>\n", | |
" <td>4.734564e-88</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>perimeter_mean</td>\n", | |
" <td>533.793126</td>\n", | |
" <td>9.966556e-84</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>symmetry_worst</td>\n", | |
" <td>436.691939</td>\n", | |
" <td>2.464664e-72</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>texture_worst</td>\n", | |
" <td>313.233079</td>\n", | |
" <td>3.938263e-56</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>symmetry_sd_error</td>\n", | |
" <td>304.341063</td>\n", | |
" <td>7.069816e-55</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>area_sd_error</td>\n", | |
" <td>268.840327</td>\n", | |
" <td>9.738949e-50</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>smoothness_mean</td>\n", | |
" <td>253.897392</td>\n", | |
" <td>1.651905e-47</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>smoothness_sd_error</td>\n", | |
" <td>243.651586</td>\n", | |
" <td>5.895521e-46</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>concave_points_mean</td>\n", | |
" <td>149.596905</td>\n", | |
" <td>1.078057e-30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>symmetry_mean</td>\n", | |
" <td>122.472880</td>\n", | |
" <td>6.575144e-26</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>fractal_dimension_sd_error</td>\n", | |
" <td>118.860232</td>\n", | |
" <td>2.951121e-25</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>radius_sd_error</td>\n", | |
" <td>118.096059</td>\n", | |
" <td>4.058636e-25</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>compactness_worst</td>\n", | |
" <td>113.262760</td>\n", | |
" <td>3.072309e-24</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>texture_sd_error</td>\n", | |
" <td>83.651123</td>\n", | |
" <td>1.051850e-18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>perimeter_worst</td>\n", | |
" <td>69.527444</td>\n", | |
" <td>5.733384e-16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>fractal_dimension_worst</td>\n", | |
" <td>66.443961</td>\n", | |
" <td>2.316432e-15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>compactness_mean</td>\n", | |
" <td>53.247339</td>\n", | |
" <td>9.975995e-13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>compactness_sd_error</td>\n", | |
" <td>39.014482</td>\n", | |
" <td>8.260176e-10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>concavity_sd_error</td>\n", | |
" <td>3.468275</td>\n", | |
" <td>6.307355e-02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>smoothness_worst</td>\n", | |
" <td>2.557968</td>\n", | |
" <td>1.102966e-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>area_mean</td>\n", | |
" <td>0.093459</td>\n", | |
" <td>7.599368e-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>area_worst</td>\n", | |
" <td>0.039095</td>\n", | |
" <td>8.433320e-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>concavity_mean</td>\n", | |
" <td>0.024117</td>\n", | |
" <td>8.766418e-01</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" feature f-score p-value\n", | |
"0 fractal_dimension_mean 964.385393 1.969100e-124\n", | |
"1 concave_points_sd_error 897.944219 5.771397e-119\n", | |
"2 perimeter_sd_error 861.676020 7.101150e-116\n", | |
"3 concavity_worst 860.781707 8.482292e-116\n", | |
"4 radius_worst 697.235272 8.436251e-101\n", | |
"5 concave_points_worst 661.600206 2.828848e-97\n", | |
"6 radius_mean 646.981021 8.465941e-96\n", | |
"7 texture_mean 573.060747 4.734564e-88\n", | |
"8 perimeter_mean 533.793126 9.966556e-84\n", | |
"9 symmetry_worst 436.691939 2.464664e-72\n", | |
"10 texture_worst 313.233079 3.938263e-56\n", | |
"11 symmetry_sd_error 304.341063 7.069816e-55\n", | |
"12 area_sd_error 268.840327 9.738949e-50\n", | |
"13 smoothness_mean 253.897392 1.651905e-47\n", | |
"14 smoothness_sd_error 243.651586 5.895521e-46\n", | |
"15 concave_points_mean 149.596905 1.078057e-30\n", | |
"16 symmetry_mean 122.472880 6.575144e-26\n", | |
"17 fractal_dimension_sd_error 118.860232 2.951121e-25\n", | |
"18 radius_sd_error 118.096059 4.058636e-25\n", | |
"19 compactness_worst 113.262760 3.072309e-24\n", | |
"20 texture_sd_error 83.651123 1.051850e-18\n", | |
"21 perimeter_worst 69.527444 5.733384e-16\n", | |
"22 fractal_dimension_worst 66.443961 2.316432e-15\n", | |
"23 compactness_mean 53.247339 9.975995e-13\n", | |
"24 compactness_sd_error 39.014482 8.260176e-10\n", | |
"25 concavity_sd_error 3.468275 6.307355e-02\n", | |
"26 smoothness_worst 2.557968 1.102966e-01\n", | |
"27 area_mean 0.093459 7.599368e-01\n", | |
"28 area_worst 0.039095 8.433320e-01\n", | |
"29 concavity_mean 0.024117 8.766418e-01" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Instantiate SelectKBest with scoring funcion and number of columns we want to select\n", | |
"\n", | |
"skb = SelectKBest(score_func=f_classif, k=3)\n", | |
"\n", | |
"# Fit X and y\n", | |
"\n", | |
"skb.fit(bc_X, bc_y)\n", | |
"\n", | |
"# Observe scores in descending order\n", | |
"\n", | |
"feat_list = list(zip(bc_X.columns,skb.scores_, skb.pvalues_))\n", | |
"feat_list.sort(key=lambda x: x[2], reverse=False)\n", | |
"pd.DataFrame(feat_list, columns=['feature','f-score','p-value'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# make a Boolean mask to select only these columns\n", | |
"\n", | |
"mask = skb.get_support()\n", | |
"k_best_features = bc_X.columns[mask]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['perimeter_sd_error', 'concave_points_sd_error',\n", | |
" 'fractal_dimension_mean'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"k_best_features" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.text.Text at 0x11afaa668>" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAFcCAYAAAA0xeJbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl0VFWeB/Dvq/dqSapCEkhkWJIoS9IoO9IomhZBtJVG\nDuS00CPgNjKco5yjHXGQRozsLdjtGHAc6GnFzCgcV2QfIyAKQgMakNGEsIWENUBCUlVJbe/OHyFF\nCrJcQl4W6vv5B169V+/+bl5Vfeu+rRQhhAAREZEEU0sXQEREbQdDg4iIpDE0iIhIGkODiIikMTSI\niEgaQ4OIiKRpLV2ArOLi8mZtLzY2EiUl7mZts6Wxz+EhHPsMtEy/4+OjmrW95sCRRh00TW3pEpod\n+xwewrHPQPj2u6kxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpLWZ\nK8JvJpV+D/JKDsPlc8FisgAAvLoXdrMdKbE94Al4sK1oB866inHadQZWzYoYazRub58CAYGzrnM4\n5TqLMm85NEVFQlQX9GqfjN5xvWDTrHW24fK5cb7yIjx+DwBcXm8UnD437JodFSfKkXv2KE45z0KH\nDgCI0CKg6wH4hR8BXQcUwKxqCPh1QBHQhQ6zakEnR0fEmKNx+NJRlPucEKj6bS8zzFU1KQqsJjO6\nRHVGR3s8+nTohY0FX6Pw0kl4dS+sqgUWkxUVgQroEPAHfBAC0KFDM2nQFBWqYoI34INfBCCgQ0ft\nvx9mggKTosKimGHRLHD6XNCFDgEdgAJVUWFVLYi1xaAiUAmv3wvNpKGDNRYxthg4fS5cqLgIt78C\ndi0CMdYYBBCA0+eEpmjoGBkPn/DDqlrQztIOMdZo5F7Mh0/3ISGqCx5I/A2sqhU/njuAH84dgE/3\nIyGqM1K73I3ci/n47uQulHudUE0qYq0xMCkKKvyVsGk2dHV0QkJUF3h1H0o9lxBjiUaMLRopsT1g\n06zB7VrquYTzFRfh9rlR5nUi1hqN5NjuIa8BACgqP4VVeZ+h3OuE3WzHHR1+hdM/n0JZhRuR5gj0\njLkNcRFxSIntAQDB14zdbMet7RKQX3IURy4dR0AEYFUtiLN1CKmnrtd19Wv56mUa+764tV0CjpcV\nhtRWc7qhtir9Huw5eRSnzp9vVG10hdJWfrmvuW8jEh8fZUibO0/twa7Te+DTfXD6XHD5qm5rYDfb\n4TBHoqTyEir8lQiIQPCDV4ZJMaGDrT0eTLofAK5pI6DrVR+zQlyzXhNMwZCgxlOgVP2rKDDBhIAI\nBOddz7YEqraJZlKhKArsZjtirdGIj4hDccV5lHhKUeZxwi/8IW2rigmxtlg8mHQ/hnYejNd2/hnn\nKy802JZdi0SkORKAQIRmAwA4fW6UecsBUVW5LqpeH6piQjtrO8Rao3FXp8EY2nkwgNDXdTWzyRyy\nTH1C3xduuHyuqtrMkQAAt68CkeZIOMyRcPpcIdMNtVW9bmHS4fcHrru2G3Ez3kbE0NDYv38/lixZ\ngqysrJDHt2zZgmXLlkHTNKSlpeGxxx5rcF03Q2jsPLUH357cCQBw+lxwel1XLaHAp/uu+wOmmgkm\nRGg2WDULHGZ7sI2A0EM+wKj5KFBuaHtqpqpbX5hNZvh0H8wmDZUBb63bU4ECk2JCtLUdvH4vnP6r\nX18Nt+Ww2AEouOQpuzw6E8F1B5e93IbDHInULkMBIPi6rk1ql6H1fjiHvi/ccHqdwXmBGmEFXPk7\nVHNYHMHgqK2tmuvWNDUYGrK13aibMTQM2z21YsUKfPnll4iIiAh53OfzYeHChfjkk08QERGBP/zh\nDxg+fDji4uKMKqVVqPR7sOv0HgBV39qqRxjVBETIm6ExdOhw+yvg0/2wqVa4fO6Qb4nU/BobGAAu\n74AzAVDg9ldAVVS4/ZVAPevUhY5LlZcQuM6RY1VbKpxeN4QQ17xmBEQwOHShw+l1IlKLwM5Tu4Ea\ngVKbXaf3YFDHfrCqlmvmhb4vRHCEASCkDpNiAoSA218BzaQFW3T5XIjUImBSlGvaqrnuxtRGtTPs\nQHhiYiIyMzOvefzIkSNITExEdHQ0LBYLBg0ahD176t+wN4O8ksPBUKj0e3D1AC/QRB/sAgK6CKDc\n56raFVXj2yK1PQGhQ1x+bVR/869ra1bPud7AuNJWALoIBEcx9b1udKGjMuBBudeJcp+zzuUAwKf7\nkHcxv9Z5Ie+LQOj7omb7NV/HosZ7RQiByoCn1rZqrrsxtVHtDBtpPPTQQygqKrrmcafTiaioK0M2\nu90Op7P+Fx1QdVvj5r5LZVMOLU3lgSv1BwAoV307a8rPdeXyG666DWZG26agahsqwvhtqaCqsQba\nEQAURUAogALR4HtTsem1vp9qvi+UgAh5X4R8r1JqTCsIWU5RQtuvbivkPXdZbXXWVRvVrtnPnnI4\nHHC5rgxBXS5XSIjUpSXug9+UxzT0yhr7U3Vc9Y5oYuLyPmjRDB8yZLzqbSgUGL5BhVw7CgAhql5j\nQlGuOVZwzWorTbW+n2q+L6rXV7ONmnUpNf6PkFFIaPvVbYW851D7MY36amsKN2MYNft1Gt27d0dB\nQQFKS0vh9Xqxd+9eDBgwoLnLaHYpsT1gNpkBADbNCuWqkUb1gb4bpVw+3TTKbIeiKFAUU8hBTGpb\nVMUE5fJrw3R5W9a1NavnqI18W6uKCpOiQlXUkPXVxqSYYFOtiLI4EGV21Ltes8mMlPY9a50X8r5Q\nQ98XNduv+TpWarxXFEWBTb1y6mzNtmquuzG1Ue2aLTTWrl2L1atXw2w2Y8aMGXjmmWcwYcIEpKWl\noWPHjs1VRouxaVbc1anqLA2TYgqeSlhNgQKLyXJDH/AmmBCpRcBhiYRm0mA3R0K53B61jBvdntUh\nUX2wN1KzwaTUvSvIpJgQbYuGQ7M3oi3AYYlElNVxzWvm6rOnHBYHTIqCoZ2HYGjnX9e77rs6Da7z\nQHPo+6Lq9OJgm0rV2WBVYVk1HalFhPxF7WZ78CD41W3VXHdjaqPa8TqNOvA6DbpevE6D12lc7Wbc\nPcXQqINRoQEAnoAXeRfz4fS5YVXNABR4Al44zJFIad8TnoAX2wq/vXxF+FlYNStirdHo1eFXEELH\nWXcxTjvP4JK3HJop9Irw6m9NtbXh8rlxoeICKv1eAAI2zYZoazs4fU44zA64FRdyzxzGSecZ6NCr\nvuGaI+EP+BEQAQT0AKAAmmqGHggAEAgIHRbVgk6Of0J7azQOlRxFma88+CFpgRlWs7VqJKVa0NXR\nCR3t8egb1xsbj2ej4FJR8Ipwm2qF218BHXrwivDA5SvCzYoKk0mF1++DX/gbuCK86tupVTHDqllQ\nFrwiXFSNvEwqbCYrYiNiUel3w+P3QVOrrgiPtcXA5XWhuOIi3P6qq6bbW2IRUAIo9zqhmTR0jLwF\nft0Hi2pBtCUKMbZY/HIhDz7dj8SozhiRNAxW1YIfz/2EH87thy9QdUX4b7oOxS8X8/Ft0U6Ue13Q\nTCra22KgXD6lNkKzoWtUl6orwgNelHpKEWONQYy1HVLa94RVtQS3a6mnDBcqLsLld6HMc/mK8PY9\nQl4DAHDKeQYf5X6KMq8TdnMk+sTdjlOVJ1HmdsFujkSPmG6Ii2gf3EVT/ZpxmCNxa3QS8kuO4Ejp\nMQSEXnVFeESHkHrqel1Xv5av51t86GvWAkDAE/AFazl+qSCktprTDbXlCXhxJlCEk+cvNKq2xmJo\ntKCbKTRaK/Y5PIRjn4GW6ffNGBrc2U1ERNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFE\nRNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETS\nGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hga\nREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkRE\nJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0gwLDV3XMXv2bIwfPx6TJk1C\nQUFByPwvv/wSY8eORVpaGj788EOjyiAioiakGbXi7OxseL1erF69Gjk5OVi0aBH+4z/+Izj/jTfe\nwLp16xAZGYlRo0Zh1KhRiI6ONqocIiJqAoaFxr59+5CamgoA6N+/Pw4ePBgyPyUlBeXl5dA0DUII\nKIpiVClERNREDAsNp9MJh8MRnFZVFX6/H5pW1WTPnj2RlpaGiIgIjBw5Eu3atat3fbGxkdA01ahy\naxUfH9Ws7bUG7HN4CMc+A+Hb76ZkWGg4HA64XK7gtK7rwcDIzc3Ftm3b8PXXXyMyMhLTp0/Hxo0b\n8fDDD9e5vpISt1Gl1io+PgrFxeXN2mZLY5/DQzj2GWiZft+MIWXYgfCBAwdi+/btAICcnBwkJycH\n50VFRcFms8FqtUJVVbRv3x5lZWVGlUJERE3EsJHGyJEjsWPHDkyYMAFCCCxYsABr166F2+3G+PHj\nMX78ePzzP/8zzGYzEhMTMXbsWKNKISKiJqIIIURLFyGjJYaV4TaEZ5/DQzj2GeDuqabCi/uIiEga\nQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkOD\niIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iI\npDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQx\nNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSI\niEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhImmbUinVdR0ZGBvLy8mCxWDBv3jwkJSUF5x84cACLFi2C\nEALx8fFYvHgxrFarUeUQEVETMGykkZ2dDa/Xi9WrVyM9PR2LFi0KzhNC4NVXX8XChQvx0UcfITU1\nFSdPnjSqFCIiaiKGjTT27duH1NRUAED//v1x8ODB4Lxjx44hJiYG77//PvLz83HfffehW7duRpVC\nRERNxLDQcDqdcDgcwWlVVeH3+6FpGkpKSvDjjz9i9uzZSExMxNSpU9G7d2/cfffdda4vNjYSmqYa\nVW6t4uOjmrW91oB9Dg/h2GcgfPvdlAwLDYfDAZfLFZzWdR2aVtVcTEwMkpKS0L17dwBAamoqDh48\nWG9olJS4jSq1VvHxUSguLm/WNlsa+xwewrHPQMv0+2YMKcOOaQwcOBDbt28HAOTk5CA5OTk4LyEh\nAS6XCwUFBQCAvXv3omfPnkaVQkRETcSwkcbIkSOxY8cOTJgwAUIILFiwAGvXroXb7cb48eMxf/58\npKenQwiBAQMGYNiwYUaVQkRETUQRQoiWLkJGSwwrw20Izz6Hh3DsM8DdU02FF/cREZE0qdA4cOAA\n3nvvPXi9Xjz99NO46667sHnzZqNrIyKiVkYqNObNm4fevXtj8+bNsNls+Pzzz7F8+XKjayMiolZG\nKjR0XcfgwYOxbds2PPjgg+jUqRMCgYDRtRERUSsjFRoRERH4+9//jl27duH+++/HypUrYbfbja6N\niIhaGanQWLJkCdxuNzIzMxEdHY1z587hL3/5i9G1ERFRK1NvaPzf//0fAODEiRMYMmQIAoEA9uzZ\ng2HDhuHEiRPNUiAREbUe9V7ct2rVKsydOxdvv/32NfMURcEHH3xgWGFERNT61Bsac+fOBQBkZWWF\nPH71zQiJiCg8SB3T2Lp1KxYvXgyXy4WHH34YI0aMwP/8z/8YXRsREbUyUqGxdOlSjBs3Dhs2bEDf\nvn2xZcsWfPrpp0bXRkRErYz0bUS6d++Obdu2Yfjw4bDb7fD5fEbWRURErZBUaMTFxWHu3Ln46aef\nkJqaikWLFqFz585G10ZERK2MVGi8+eab6NOnD/77v/8bkZGRSEhIwJtvvml0bURE1MpI/Z6G3W6H\ny+XCkiVL4Pf7MWTIEERGRhpdGxERtTJSofHGG2+goKAAaWlpEELgs88+Q1FREf70pz8ZXR8REbUi\nUqGxY8cOfPHFFzCZqvZmDRs2DKNHjza0MCIian2kjmkEAgH4/f6QaVVVDSuKiIhaJ6mRxujRozF5\n8mSMGjUKALB+/frg/4mIKHxIhcbUqVPRq1cv7Nq1C0IITJ06FcOGDTO4NCKim9vu3buxfv16OBwO\nvPzyy83S5meffYZOnTrh7rvvbtTzpUIDADp37owRI0ZACAEA2LNnDwYPHtyoRomI6IrmCgwAGDdu\n3A09Xyo0Xn/9dWzduhUJCQnBx3iXWyKi61deXo4XX3wRHo8HUVFR8Hg8eOaZZ/Bf//VfWLFiBXbs\n2IGysjIMHz4czz//PDZu3Ijly5cjNjYWbrcbS5YswdKlS2GxWFBQUIBAIIB3330XPp8P06dPR2Vl\nJTRNw7x582Cz2fDiiy9C13W0a9cOf/3rX7FixQp069YN8fHxePPNN6EoCgYPHoz09HSp+qXPntq0\naRNsNtsN/bGIiMLdZ599hnvvvRdPPvkkPvjgA3zzzTcAEPwJ7ffffx9erxejRo3C1KlTkZmZiY8/\n/hhmsznkrNU77rgDc+bMwauvvopdu3Zhz549GDNmDEaPHo2dO3fizTffxOjRo9GtWze8+uqr+Pbb\nb1FWVhZ8/pYtW/D444/j0UcfxccffwwhBBRFabB+qbOnEhISgruliIio8Y4ePYrbb78dANCvX7/g\n4yaTCRUVFUhPT8f8+fPh8/lQUlKCDh06wG63w2KxBJ8HAMnJyQCAW265BR6PB0ePHsWAAQMAAAMH\nDsTRo0dx3333ISEhAc8++yzWr18PTbsyTpgyZQp++uknTJ48GSdOnICu61L1S400oqOjMWrUKAwY\nMAAWiyX4+MKFC6UaISKiKklJSdi/fz9+/etfB38dFQByc3Px888/491330VhYSE2btyIDh064OLF\ni3C73TCbzfjll1+Cy189Krj11luRk5ODrl27Yt++fUhISMA//vEPdOnSBe+99x7ee+89bNiwIbj8\nunXrMH78ePTo0QNTp07FkSNHgkFUH6nQSE1NRWpqqsyiRERUjwkTJuCPf/wjtm/fjvj4+ODjSUlJ\nKC8vx+9//3vY7XbExcXB7XbjhRdewMSJExEbGwtN00JGCzVNnToVr7zyCj766CMoioL58+fD4XDg\nhRdewEcffQSz2Yz58+fj448/BgDcfvvtmDFjBux2Ozp27Iju3btL1a8Iif1Op06dCn2SosBqtaJ9\n+/ZSjTSF4uLyZmsLAOLjo5q9zZbGPoeHcOwz0DL9jo+PuuF1/O1vf8PTTz8NXdcxduxYfPrppyF7\nfJqb1Ejjueeew6FDh5CSkgIhBPLz8xEfHw9VVTF37txGn+9LRET103Ud48aNg9VqxYQJE1o0MADJ\n0OjYsSPmzp2L3r17AwDy8vKwdOlSzJw5E9OmTcMnn3xiaJFEROFqypQpmDJlSkuXESR19tTJkyeD\ngQEAKSkpOHHiBDp16hQ8TYyIiG5+UiONhIQELFmyBGPGjIGu61i3bh2SkpLw448/Bu98S0RENz+p\nT/w33ngDfr8f6enpmDFjBnRdx4IFC1BYWIjXX3/d6BqJiKiVkBppOBwOzJgxIzgthEBRUREeffRR\nwwojIgp3FR4/9ucXo8zlRTu7Bf16xiPCKn3LQENItZ6VlYW//vWvqKioCD7WpUsXZGdnG1YYEVE4\n+2p3AbL3nIDXd+W48efbDuOBwYkYOSSpUevUdR0ZGRnIy8uDxWLBvHnzkJR0feuS2j313nvvYc2a\nNXjkkUfw1VdfYf78+SGXvxMRUdP5ancBNuw8FhIYAOD1BbBh5zF8tbugUevNzs6G1+vF6tWrkZ6e\njkWLFl33OqRCo0OHDkhISEBKSgoOHTqEcePG4dixY9fdGBER1a/C40f2nhP1LpO95wQqPf56l6nN\nvn37gnf36N+/Pw4ePHjd65AKjYiICOzatQspKSnYunUriouLQ+6WSERETWN/fvE1I4yreX0B7M8v\nvu51O51OOByO4LSqqiE/5S1DKjRmzZqFLVu2IDU1FaWlpfjtb3+LiRMnXl+1RETUoDKXt0mXq8nh\ncMDlcgWndV2v815WdZFaOjk5GTNnzgQAZGZmXlcDREQkr51d7jYhssvVNHDgQGzduhWPPPIIcnJy\npO5qe7V6Q+Nf//Vf8Z//+Z8YPnx4rT/O8fXXX193g0REVLd+PePx+bbD9e6isphV9OsZX+f8uowc\nORI7duzAhAkTIITAggULrnsd9YbGwIED8cUXX2DatGnXvWIiIrp+EVYNDwxOxIaddZ9s9MDgRNga\ncb2GyWTCnDlzbqS8+kPj+PHjOH78OAoLC1FQUID77rsPJpMJ3333HXr06IGxY8feUONERHSt6usw\nrr5Ow2JWb+g6jaZQb2hU/zLfpEmTsGbNmuDvZ1y6dAnPPfec8dUREYWpkUOSkNq/yzVXhDdmhNGU\npFo/d+4cYmJigtMREREoLr7+072IiEiezaphSO9OLV1GCKnQGDZsGJ566ik8+OCD0HUdmzZtwsMP\nP2x0bURE1MpIhcYrr7yCzZs34x//+AcURcHTTz+NESNGGF0bERG1MtI7xx566CE89NBDRtZCREQ1\nVPoq8dO5PJR7nIiyOtDnlhTYzLYWrcmwIyqyd1N89dVXER0djZdeesmoUoiI2pwtR3di27Gd8AZ8\nwcfW5n6FYbcNxfBuQ29o3fv378eSJUuQlZV13c817Gf3ZO6muGrVKhw6dMioEoiI2qQtR3fifw9/\nExIYAOAN+PC/h7/BlqM7G73uFStWYNasWfB4PI16vmGh0dDdFH/44Qfs378f48ePN6oEIqI2p9JX\niW3H6g+Fbcd2otLfuA/9xMTEG7odlGG7p+q6m6KmaTh37hyWLVuGpUuXYuPGjVLri42NhKapRpVb\nq/j4qGZtrzVgn8NDOPYZaBv9/ulc3jUjjKt5Az4cPJuLO7tc/+8aPfTQQygqKmpsecaFRn13U9y0\naRNKSkowZcoUFBcXo7KyEt26dcO4cePqXF9JiduoUmsVHx+F4uLyZm2zpbHP4SEc+wy0TL8bE1Ll\nHqfUcmUeV8MLGcCw0KjvboqTJ0/G5MmTAQCfffYZjh49Wm9gEBGFiyiro+GFALSz2g2upHaGHdMY\nOXIkLBYLJkyYgIULF+KVV17B2rVrsXr1aqOaJCJq8/rckgKLaq53GYtqRu+Ov2qmikIpQgjRIi1f\np5YYVobbEJ59Dg/h2Geg7eyeAq6cPVWXB3vcd8On3TZWy975ioiIrlEdCFdfp2FRzU1yncaNYGgQ\nEbVCw7sNxdDEQTh4NhdlHhfaWe3o3fFXsGnWFq2LoUFE1ErZNGujTqs1kmEHwomI6ObD0CAiImkM\nDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0i\nIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQICIiaQwNIiKS\nxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQ\nICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAi\nImmaUSvWdR0ZGRnIy8uDxWLBvHnzkJSUFJy/bt06rFy5EqqqIjk5GRkZGTCZmGFERK2ZYZ/S2dnZ\n8Hq9WL16NdLT07Fo0aLgvMrKSrz11lv44IMPsGrVKjidTmzdutWoUoiIqIkYFhr79u1DamoqAKB/\n//44ePBgcJ7FYsGqVasQEREBAPD7/bBarUaVQkRETcSw3VNOpxMOhyM4raoq/H4/NE2DyWRCXFwc\nACArKwtutxv33HNPveuLjY2EpqlGlVur+PioZm2vNWCfw0M49hkI3343JcNCw+FwwOVyBad1XYem\naSHTixcvxrFjx5CZmQlFUepdX0mJ26hSaxUfH4Xi4vJmbbOlsc/hIRz7DLRMv2/GkDJs99TAgQOx\nfft2AEBOTg6Sk5ND5s+ePRsejwfvvPNOcDcVERG1boaNNEaOHIkdO3ZgwoQJEEJgwYIFWLt2Ldxu\nN3r37o1PPvkEd955J5544gkAwOTJkzFy5EijyiEioiZgWGiYTCbMmTMn5LHu3bsH/5+bm2tU00RE\nZBBeGEFERNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQk\njaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2h\nQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFE\nRNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETS\nGBpERCSNoUFERNIYGkREJI2hQURE0rSWLqC1WfPtEazZURCcbmdX4K4Q8OstWFQDTAqgi9DHIswK\nTKoKBQJevw5dv9IHBYAj0oy4KCvOlrjh8etQTQr+Kc6OGLsV5S4vBABXhQ9ms4rSskooCmCPMCO1\nb2eoqgn5haUod3vh8QcQadWgqSbEOKywWVS4K304dcENi1lFt07tMGxAF+QXlWJP7jn4/DqSOkbh\noSGJsFlU/HK8BOUVPkRFmHFb53bIO1GC/KJLAICeXaPRr0ccbJYrL9NKrz/kOb1ujQ2ZT0TGUoQQ\nouHFWl5xcbnhbTy9aIvhbYQzpcZ/TIoCu01DbDsbAMDp9qHM7YUQAialaklFURDjsOCRu5KQ2q8z\nvt1/Ct9ffd0cAAAM3klEQVT9dBreGglu0Uy4t08npPbr3Kia4uOjmuW11ZqEY5+Blul3fHxUs7bX\nHAz7iqbrOjIyMpCXlweLxYJ58+YhKSkpOH/Lli1YtmwZNE1DWloaHnvsMaNKkcLAMJ5AVXAIAQSE\nQJnbh4AuYLNoKHV6oF8eLgkToJoUCCFQUu7BlzuO41BhKYrOu65Zp9evY8uPJwGg0cFBRPIMO6aR\nnZ0Nr9eL1atXIz09HYsWLQrO8/l8WLhwIf7+978jKysLq1evxvnz540qpUFrvj3SYm2Hm6uHtW6P\nH2VuD/QaA15dF6g5AHZWeLHvUHEwVGrz3U+n4fEGmrpcIrqKYaGxb98+pKamAgD69++PgwcPBucd\nOXIEiYmJiI6OhsViwaBBg7Bnzx6jSmlQzWMY1LyEAAIBcU2a1NxpGtAF/AEdlfWEgtev4+eCiwZV\nSUTVDNs95XQ64XA4gtOqqsLv90PTNDidTkRFXdnXZ7fb4XQ6611fbGwkNE01qlxqQQKoccDjMqXq\nmEbVAlUJoiiAWav7e46iqo3ah3wz7nduSDj2GQjffjclw0LD4XDA5bqyD1rXdWiaVus8l8sVEiK1\nKSlxG1Motbjq4xwhBHD1ORpCAL56TmMTgcB1H+gMx4PC4dhngAfCm4phu6cGDhyI7du3AwBycnKQ\nnJwcnNe9e3cUFBSgtLQUXq8Xe/fuxYABA4wqpUFj7klqeCEyhKIAqqpcM9JQakyrJgWaaoLNUvdI\n06KZcHtSe4OqJKJqho00Ro4ciR07dmDChAkQQmDBggVYu3Yt3G43xo8fjxkzZuCZZ56BEAJpaWno\n2LGjUaU0aExqdx7XaCYKQg9fRFq1K2dPXR5ZmEzKlV1TABwRFvwqMabWs6eq3dunE6z1hAoRNQ1e\np1EDT7s1Fq/TaB3Csc8Ad081FYbGVTZ8fwyffHMsOB0TZYLTrcPfis/mNJkA/apd/REWBapa9c3b\n5w8gEAi9Iryd3YwO7aw4e9ENj0+Hqir4pzgHYu0WlLm8AABnhQ9Ws4oLZZVQFAWOCDN+068zVJOC\nQ4WlKHf74PH5EWkzV10RbrfAZlXh9vhxstgFm0XFbZ3a4f6BXXGosBR7fjkLX6DqivDfDkmC1azi\n54KLcLp9cESa0b1zNHJPlCC/sBRQgJ5dY9Cve1zICMLjDYQ85/ak9jc0wgjHD9Bw7DPA0GgqDI06\nhOMbi30OD+HYZ4Ch0VR4w0IiIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLWZq7T\nICKilseRBhERSWNoEBGRNIYGERFJY2gQEZE0hgYREUljaBARkTTDfrmvtdJ1HRkZGcjLy4PFYsG8\nefOQlHTl5163bNmCZcuWQdM0pKWl4bHHHmvwOW1BY/oNAGPHjoXD4QAAdO3aFQsXLmyR+htDZrtV\nVFTgqaeewvz589G9e/c2v60b02fg5t7O69atw8qVK6GqKpKTk5GRkQEAbXo7tygRZjZv3iz+7d/+\nTQghxI8//iimTp0anOf1esUDDzwgSktLhcfjEePGjRPFxcX1PqetaEy/KysrxZgxY1qq5BvW0HY7\ncOCAGDt2rBg6dKg4fPiw1HNau8b0+WbezhUVFWLEiBHC7XYLIYR48cUXRXZ2dpvfzi0p7HZP7du3\nD6mpqQCA/v374+DBg8F5R44cQWJiIqKjo2GxWDBo0CDs2bOn3ue0FY3pd25uLioqKvD0009j8uTJ\nyMnJaanyG6Wh7eb1erFs2TJ069ZN+jmtXWP6fDNvZ4vFglWrViEiIgIA4Pf7YbVa2/x2bklht3vK\n6XQGh+EAoKoq/H4/NE2D0+lEVNSVX9qy2+1wOp31PqetaEy/bTYbnnnmGfz+97/H8ePH8eyzz2LT\npk1tpt8NbbdBgwZd93Nau8b0+WbeziaTCXFxcQCArKwsuN1u3HPPPdi4cWOb3s4tKez+Qg6HAy6X\nKzit63rwhXL1PJfLhaioqHqf01Y0pt+33XYbkpKSoCgKbrvtNsTExKC4uBidOnVq9vobozHbra1v\n68bUf7NvZ13XsXjxYhw7dgyZmZlVv3ffxrdzSwq73VMDBw7E9u3bAQA5OTlITk4OzuvevTsKCgpQ\nWloKr9eLvXv3YsCAAfU+p61oTL8/+eQTLFq0CABw9uxZOJ1OxMfHt0j9jdGY7dbWt3Vj6r/Zt/Ps\n2bPh8XjwzjvvBHdTtfXt3JLC7oaF1WdaHDp0CEIILFiwAD///DPcbjfGjx8fPItICIG0tDQ8/vjj\ntT6n+qyTtqIx/fZ6vXjllVdw6tQpKIqCl156CQMHDmzprkhrqM/VJk2ahIyMjJCzp9rqtm5Mn2/m\n7dy7d2+kpaXhzjvvhKIoAIDJkydjxIgRbXo7t6SwCw0iImq8sNs9RUREjcfQICIiaQwNIiKSxtAg\nIiJpDA0iIpLG0KBW4aeffsKf/vSnG15PYWEhZs6c2QQVydm9ezcmTZrUbO0RtTReAkmtQp8+fdCn\nT58bXs+pU6dQWFjYBBURUW0YGtRkdu/ejczMTGiahtOnT6Nv376YP38+NmzYgJUrV0LXddxxxx14\n7bXXYLVacdddd+GOO+7A+fPn8fLLL+Pdd99FVlYWJk2ahF69euH7779HZWUlZs2ahaysLBw+fBhP\nPvkknnzySbhcLsyZMwf5+fkIBAJ49tln8bvf/Q7z5s1DUVERXn/9dbz22mtYvnw5Nm7ciEAggHvv\nvRfTp0/HyZMn8S//8i+IjY2F1WrF+++/X2t/zpw5g5deeglutxsmkwmzZs1C//798d1332HhwoWw\nWq247bbbGvy7nD9/HrNnz8aZM2egKArS09MxdOhQZGZmIicnB6dPn8bjjz+OTZs2ITo6Gvn5+Xjr\nrbdw5swZvPXWW9B1HQkJCZgzZw7i4uIwfPhw9O3bF7/88gs+/PBDdOjQoYm3JFE9WuLWunRz2rVr\nl+jTp484cuSI0HVdTJs2TbzzzjviD3/4g6isrBRCCLFkyRKxbNkyIYQQycnJYteuXcHnTpw4UQgh\nxMSJE8X8+fOFEEJkZmaKBx54QLjdblFUVCTuvPNOIYQQixcvFitXrhRCCFFeXi5GjRolTpw4EbKe\nb775RkybNk34/X4RCATEH//4R/HFF1+IwsJCkZycLAoLC+vtT2ZmplixYkWwvr/97W/C4/GIe+65\nJ3hb8ZkzZwbbq8sLL7wgsrOzhRBCnD17VowYMUKUl5eLt99+O+S5EydOFG+//bYQQojz58+Le++9\nN1jjihUrxLRp04QQQtx///3i008/rbdNIqNwpEFNavDgwcHbbo8ZMwbTpk1DbGxs8EedfD4fbr/9\n9uDy/fr1q3U9v/nNbwAAnTt3Rr9+/RAREYEuXbqgrKwMALBz505UVlbi008/BQC43W7k5+fDbrcH\n1/H999/jwIEDGDduHACgsrISnTt3xqBBg9ChQwd07dq13r7cfffdmDZtGn755Rfcd999mDhxIvLy\n8nDLLbeE/HjRv//7v9e7np07d+Lo0aN4++23AVTdnrt6F1rfvn1Dlq2ePnDgAPr27Ruscfz48Vi+\nfHmDfzciozE0qEmpqhr8vxACgUAADz/8MGbNmgWg6g66gUAguIzNZqt1PWazOfj/2u4+Wn3n0jvu\nuANA1S6g6Oho/PDDD8FlAoEAnnjiCTz11FMAgLKyMqiqipKSkjrbrWnQoEFYv349tm3bhg0bNuDz\nzz9Heno6dF2vtb910XUdK1euRExMDICqmwLGxcUhOzv7mjqqp2u2AVT9Lf1+f3DaarU22C6REXj2\nFDWpffv24ezZs9B1HV988QVmzpyJr776ChcuXIAQAhkZGVi5cuUNt3PXXXfho48+AgCcO3cOjz76\nKE6fPh38XYTqZdasWQOXywW/34/nnnsOmzdvlm7jjTfewJo1azB27FjMnj0bP//8M1JSUnDhwgXk\n5uYCANavXy9V64cffggAOHz4MB599FFUVFTU+5x+/fph//79KCoqAgCsXr0aQ4YMka6dyCgcaVCT\nuuWWW/Dyyy/j7NmzuOeeezBx4kRERkbiiSeegK7r6NWrF6ZMmXLD7Tz//PPIyMjA7373OwQCAUyf\nPh2JiYmIiopCeXk5pk+fjsWLFyM3NxePPfYYAoEAUlNTMXbsWJw8eVKqjUmTJiE9PR2ff/45VFXF\na6+9BrPZjL/85S+YPn06NE0L2dVWl1mzZmH27NkYPXo0gKowqvkDQLWJi4vDnDlz8Pzzz8Pn86Fz\n586YP3++VN1ERuJdbqnJ7N69G0uXLkVWVlZLl0JEBuFIg8La3r17MXfu3FrnLV++HB07dpRaz5//\n/Gfs3Lnzmsd79+7NEQLdVDjSICIiaTwQTkRE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJO3/\nAaCMWnpTxrHnAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x11ae97a58>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"sns.lmplot('perimeter_sd_error', 'diagnosis',\n", | |
" data=bc_data,\n", | |
" fit_reg=False,\n", | |
" hue=\"diagnosis\", \n", | |
" scatter_kws={\"marker\": \"D\",\n", | |
" \"s\": 100})\n", | |
"\n", | |
"plt.xlabel('perimeter_sd_error')\n", | |
"plt.ylabel('diagnosis')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.text.Text at 0x11d6b2a90>" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAFcCAYAAAA0xeJbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt8E2W+BvBnkknSNukNWjkgbYFCe3CRmyIqlusWFhE4\nwFlbdxFd+Ai4qIcV8QCKi1CgcnE9oruueNkVVikCq4uCFwQEQbCytlChiFxKAYUCvSVpc5v3/BEa\nGmjLCyT09nz5fLTJTN755ZdJnswkM1GEEAJEREQSdPVdABERNR4MDSIiksbQICIiaQwNIiKSxtAg\nIiJpDA0iIpKm1ncBsoqKygMyTnR0GIqL7QEZqzFjH7zYBy/2wSvQfYiNDQ/YWA1Fs9vSUFV9fZfQ\nILAPXuyDF/vgxT5cWbMLDSIiunYMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIi\nktZojghvrirdDhws/hE2lw1GnREA4NScMBvMSI7uiBDVhFJHGTYd34bC8pMw6AzoedOtuKVlMo6V\nFfrdzuayo8RZiihTJFpXtMTP587juzN7UVRxDnpFhVGvotLtQLmzHAoUGHQGKNDBLVzQKTrodHro\noQBQ4HK7UCkcAABVp4dJMUID4BYuqIoKAQGP0KAA0Cl6hBlCoFN0sDqsqNScENCgQAcFClSdHuLC\n/aqiQEELYzRUnQ5lbiuEEDAoKsxGCzyaBya9AZUeB0LUENhdFXC4nXB4Ki/UY4BOp0APPRRFgcPt\ngFt4oNfpEWdugxYhLXDcWgirywZVr4fT6UaFqPAtO8oYiZjQFtBBByje6zShoaSyDGXOMriFBwLC\ne9+gQ7gpHK3N/4GOUe2Rf+4HFFWeg1FnQMuwlkiMbIcyRxlOWn/G2YpzAICbwmJwx3/09D1GP1lP\nY+/Z71HhroBe0SPKFIFQQwhcmgdmfRiMqhGJke3QKboD9p87iH+f2Qenx4FQQyjahcfD5rbB4XZC\nr9MjMbIdusR0Rohq8luH8s4ewOHSYwDgmweAb91q44xBtIjxrTNmgxntIuJwqPgIfig+jGJHKaJN\nkUiKTvSNX+oow9YTO1DqKIPZYEYbcysICL91s7Z1ubZ5rmXeQKl0O5B98ghOnT17w5bZGCmN5Zf7\nAnUakdjY8ICNFWw7T2Vj10/ZcGkuWF122Fw2AIDZEAaLwQyDzgAhgOPWE/BoHt/tBAR00CEqJAKA\nApvLBo+mARDQKTpoQoNHaBBoFA99k6VTvKHpEZ4651OgQK/o6nzMFCjQKTqoOj0ijBEYnDAAd7fp\nhZ2nsvFZwRaUOctQ9VRXFO8bglA1FKEXXhQrPBUod9gQZgiFxWCG1WVDmdMKTWio/hKhV3SIDolC\nuCEcp2w/+dYlTWgAgDA1FC1Do2HQGXBn6164u00vAP7rcpVL56lyNfMGStUyhU6D2+0J2DKb4mlE\nghoaubm5WLJkCVasWOF3/ebNm/Hqq69CVVWMGTMG999//xXHam6hsfNUNraf3AkAsLrssDqtftMt\nRjMcbgdsbu87ZOXCW+JLX1T0ive0CFVP6prmoaahKjj0ig4WowVJUYn4oeTwZetO1Yu8TtEh0hQB\nALC6bMCFlwKDTkWlx+kXZlXrF3Bx/alp3QIuBgcApNx8NwD41uWapNx8t1+4yM4bKNWXqap6X2gE\nYplNMTSCtntq+fLl+Ne//oXQ0FC/610uFxYuXIg1a9YgNDQUDzzwAAYOHIiYmJhgldLoVLod2PVT\nNgBAE8K3hVFducMKt3BfcayqJ74ChWHRxAkIXxhYnVZ8V7QXl74lFLj4Iq8JDeUOKxRF8e2GE0LA\n7q6scXRcsg5pwgNUC5MqdncFIrUIqDo9dp765sJta7frp2zc1qobhBC+9f5K85r0xjrnk1X9uXaj\nltnYBe2D8Pj4eCxbtuyy6w8fPoz4+HhERkbCaDTitttuQ3Z23Q9ac3Ow+Effpnmlx4GaNga9uyou\nEhf+EYkLWxIuzX3hhd1/WnUe4fGbR1T753e7mpaD2rdayy+80Sl3WVHustY4TxWX5sLB84f81vsr\nzRso9bHMxi5oWxpDhgzBiRMnLrvearUiPPziJpvZbIbVWvdKBXhPWRyoM1A29E1GXbnHd18VjwCU\ny9/NQRFXegNHzZWCi1sYCi5ff6qvN0q19UtRLtsyuVYCGlRVD8XjjZUrPXeVEA0KhNRzXAnRAvYc\nrv5cq1JTDYFcZmN3w789ZbFYYLNd3N1is9n8QqQ2gTrHfWP4TEOrvLhfVYjqrwDVCAVMDaqR8GaF\ngN9/fNP8560KDABC1LCz6doo0MHt9njXX4jLPie4rORKHQRwxfmq5g3Uc7j6cw2o+TON61lmUwya\nG36cRmJiIgoKClBSUgKn04lvv/0WPXr0uNFlNGjJ0R1h0BkAACF6k3ef8yX0is7vCa5c+EekKDro\nFB0MOhU6RX/ZtOr0it5vHqXaP7/b1bQcoNZ1LtxgvvB/C8INljrrNegMSG7RyW+9v9K8gVIfy2zs\nblhorF+/HllZWTAYDJgxYwYmTJiA9PR0jBkzBq1atbpRZTQKIaoJd7b2fltDpygwX3gCVhdusiBM\nDb3s+kvpFb3vWy4Mlqat6ttTCgCL0YIesV1hMZovmcf7VV9c+H+4yeI3j6IoCFNDfPP439I/JHSK\nvob5vN+eUnXede7uNnfg7ja966z7zta9YNIb/db7K80bKPWxzMaOx2k0YDxOo2njcRo8TqMxYmg0\ncA6PEwfPH4LVZb/wbkfA4XHBYghDcotOMOmNKHNa8UXBlzhefsJ7RHirrril5X/iWGmB3+1srgqU\nOEoQZYpC65Yt8PP5Enx3Jhdn7GdhUAww6FVUeipR5iiHDjrvEeGKApdwQ39hl4dOUQChwO1xo0Kr\nhAJAr1MRopigKRrcmhuqToUQAho0COF9oQkzhkKHqiPCHdCgQVd1RLjeACE0OC45IrylqQVUnR6l\nrnJACKg6FRaDGR7hgVFnRKWnEqFqKCrcFah0O1DprgSgeHfL6C7UCx0cbof3Puj0iA+/GTGmFjhq\nPQ6r0w6DqsLpcMJe7YjwFqYotAxtCZ1QqvbBQBMCpRVlKHGVwqO5oV04Ilyv6BBhikCbsFZIjE7E\ngXMHcabiLIw6A2LDYpAY1Q6ljjKcLP/p4hHh5ljc8R+3eY8ILy3Az7YzyC3Kg/3CEeHRIVEIVUPg\n1twI04fCqJqQGJWATtEdvUeEn86FQ3MiTA1F+4h4WF02ODxO6BUdEqPao0tMZ793xg6P03tEeMlR\nAAoSoxLQJeYWAPCtWzfHtEQ0bvKtMxZDGNpFJuBQ8WHvEeGVJYg2RSGpRQd0ibnFt95tLdyOUkc5\nLIYwtLa0hiY0v3WztnW5tnmuZd5AcXic+NlzAifPngvYMhka9ai5hkawsA9e7IMX++AV6D40xdDg\nCQuJiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpDE0\niIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiI\nSBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEga\nQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkOD\niIikMTSIiEgaQ4OIiKQxNIiISFrQQkPTNDz33HNIS0vDgw8+iIKCAr/p//rXvzBq1CiMGTMG7777\nbrDKICKiAFKDNfCmTZvgdDqRlZWFnJwcZGZm4i9/+Ytv+qJFi/DRRx8hLCwMw4YNw7BhwxAZGRms\ncoiIKACCFhp79uxBSkoKAKB79+7Iy8vzm56cnIzy8nKoqgohBBRFCVYpREQUIEELDavVCovF4rus\n1+vhdruhqt5FdurUCWPGjEFoaChSU1MRERFR53jR0WFQVX1AaouNDQ/IOI0d++DFPnixD17sQ92C\nFhoWiwU2m813WdM0X2Dk5+dj69at+OKLLxAWFobp06dj48aNGDp0aK3jFRfbA1JXbGw4iorKAzJW\nY8Y+eLEPXuyDV6D70BQDKGgfhPfs2RPbtm0DAOTk5CApKck3LTw8HCEhITCZTNDr9WjRogXKysqC\nVQoREQVI0LY0UlNTsWPHDqSnp0MIgQULFmD9+vWw2+1IS0tDWloafvOb38BgMCA+Ph6jRo0KVilE\nRBQgihBC1HcRMgK1ycjNcC/2wYt98GIfvLh76sp4cB8REUljaBARkTSGBhERSWNoEBGRNIYGERFJ\nY2gQEZE0hgYREUljaBARkTSGBhERSWNoEBGRNIYGERFJY2gQEZE0hgYREUljaBARkTSGBhERSWNo\nEBGRNIYGERFJY2gQEZE0hgYREUljaBARkTSGBhERSWNoEBGRNIYGERFJY2gQEZE0hgYREUljaBAR\nkTSGBhERSWNoEBGRNIYGERFJY2gQEZE0hgYREUljaBARkTSGBhERSWNoEBGRNIYGERFJY2gQEZE0\nhgYREUljaBARkTSGBhERSWNoEBGRNIYGERFJY2gQEZE0hgYREUljaBARkTSGBhERSWNoEBGRNIYG\nERFJU4M1sKZpmDNnDg4ePAij0YiMjAwkJCT4pu/duxeZmZkQQiA2NhaLFy+GyWQKVjlERBQAQdvS\n2LRpE5xOJ7KysjBt2jRkZmb6pgkhMHv2bCxcuBDvvfceUlJScPLkyWCVQkREARK0LY09e/YgJSUF\nANC9e3fk5eX5ph09ehRRUVH429/+hkOHDqFfv37o0KFDsEohIqIACVpoWK1WWCwW32W9Xg+32w1V\nVVFcXIzvvvsOzz33HOLj4zF58mR06dIFd911V63jRUeHQVX1AaktNjY8IOM0duyDF/vgxT54sQ91\nC1poWCwW2Gw232VN06Cq3sVFRUUhISEBiYmJAICUlBTk5eXVGRrFxfaA1BUbG46iovKAjNWYsQ9e\n7IMX++AV6D40xQAK2mcaPXv2xLZt2wAAOTk5SEpK8k2Li4uDzWZDQUEBAODbb79Fp06dglUKEREF\nSNC2NFJTU7Fjxw6kp6dDCIEFCxZg/fr1sNvtSEtLw/z58zFt2jQIIdCjRw/0798/WKUQEVGAKEII\nUd9FyAjUJiM3w73YBy/2wYt98OLuqSvjwX1ERCRNKjT27t2Lt99+G06nE+PHj8edd96JTz/9NNi1\nERFRAyMVGhkZGejSpQs+/fRThISE4J///Cdef/31YNdGREQNjFRoaJqGXr16YevWrRg8eDBat24N\nj8cT7NqIiKiBkQqN0NBQvPXWW9i1axcGDBiAv//97zCbzcGujYiIGhip0FiyZAnsdjuWLVuGyMhI\nnDlzBi+++GKwayMiogamztD4/vvvAQDHjx9H79694fF4kJ2djf79++P48eM3pEAiImo46jy4b9Wq\nVZg3bx5efvnly6YpioJ33nknaIUREVHDU2dozJs3DwCwYsUKv+svPRkhERE1D1KfaWzZsgWLFy+G\nzWbD0KFDMWjQIPzjH/8Idm1ERNTASIXGK6+8gtGjR2PDhg3o2rUrNm/ejLVr1wa7NiIiamCkTyOS\nmJiIrVu3YuDAgTCbzXC5XMGsi4iIGiCp0IiJicG8efOwb98+pKSkIDMzE23atAl2bURE1MBIhcbS\npUtx6623YuXKlQgLC0NcXByWLl0a7NqIiKiBkfo9DbPZDJvNhiVLlsDtdqN3794ICwsLdm1ERNTA\nSIXGokWLUFBQgDFjxkAIgXXr1uHEiRN45plngl0fERE1IFKhsWPHDnzwwQfQ6bx7s/r374/hw4cH\ntTAiImp4pD7T8Hg8cLvdfpf1en3QiiIiooZJaktj+PDhGDduHIYNGwYA+Pjjj31/ExFR8yEVGpMn\nT0bnzp2xa9cuCCEwefJk9O/fP8ilERE1bbt378bHH38Mi8WCp59++oYsc926dWjdujXuuuuua7q9\nVGgAQJs2bTBo0CAIIQAA2dnZ6NWr1zUtlIiILrpRgQEAo0ePvq7bS4XG888/jy1btiAuLs53Hc9y\nS0R09crLy/GHP/wBDocD4eHhcDgcmDBhAt58800sX74cO3bsQFlZGQYOHIjHHnsMGzduxOuvv47o\n6GjY7XYsWbIEr7zyCoxGIwoKCuDxePDaa6/B5XJh+vTpqKyshKqqyMjIQEhICP7whz9A0zRERETg\nT3/6E5YvX44OHTogNjYWS5cuhaIo6NWrF6ZNmyZVv/S3pz755BOEhIRcV7OIiJq7devW4Z577sHD\nDz+Md955B19++SUA+H5C+29/+xucTieGDRuGyZMnY9myZXj//fdhMBj8vrX6i1/8AnPnzsXs2bOx\na9cuZGdnY+TIkRg+fDh27tyJpUuXYvjw4ejQoQNmz56N7du3o6yszHf7zZs347e//S1GjBiB999/\nH0IIKIpyxfqlvj0VFxfn2y1FRETX7siRI7jlllsAAN26dfNdr9PpUFFRgWnTpmH+/PlwuVwoLi5G\ny5YtYTabYTQafbcDgKSkJADATTfdBIfDgSNHjqBHjx4AgJ49e+LIkSPo168f4uLi8Mgjj+Djjz+G\nql7cTpg4cSL27duHcePG4fjx49A0Tap+qS2NyMhIDBs2DD169IDRaPRdv3DhQqmFEBGRV0JCAnJz\nc3HHHXf4fh0VAPLz87F//3689tprKCwsxMaNG9GyZUucP38edrsdBoMBBw4c8M1/6VZBu3btkJOT\ng7Zt22LPnj2Ii4vDN998g5tvvhlvv/023n77bWzYsME3/0cffYS0tDR07NgRkydPxuHDh31BVBep\n0EhJSUFKSorMrEREVIf09HQ8+eST2LZtG2JjY33XJyQkoLy8HL/+9a9hNpsRExMDu92OqVOnYuzY\nsYiOjoaqqn5bC9VNnjwZM2fOxHvvvQdFUTB//nxYLBZMnToV7733HgwGA+bPn4/3338fAHDLLbdg\nxowZMJvNaNWqFRITE6XqV4TEfqdTp07530hRYDKZ0KJFC6mFBEJRUXlAxomNDQ/YWI0Z++DFPnix\nD16B7kNsbPh1j/HGG29g/Pjx0DQNo0aNwtq1a/32+NxoUlsaU6ZMwQ8//IDk5GQIIXDo0CHExsZC\nr9dj3rx51/x9XyIiqpumaRg9ejRMJhPS09PrNTAAydBo1aoV5s2bhy5dugAADh48iFdeeQWzZs3C\n448/jjVr1gS1SCKi5mrixImYOHFifZfhI/XtqZMnT/oCAwCSk5Nx/PhxtG7d2vc1MSIiavqktjTi\n4uKwZMkSjBw5Epqm4aOPPkJCQgK+++4735lviYio6ZN6xV+0aBHcbjemTZuGGTNmQNM0LFiwAIWF\nhXj++eeDXSMRETUQUlsaFosFM2bM8F0WQuDEiRMYMWJE0AojImruKhxu5B4qQpnNiQizEd06xSLU\nJH3KwKCQWvqKFSvwpz/9CRUVFb7rbr75ZmzatClohRERNWef7y7ApuzjcLoufm78z60/4pe94pHa\nO+GaxtQ0DXPmzMHBgwdhNBqRkZGBhISrG0tq99Tbb7+NDz/8EPfeey8+//xzzJ8/3+/wdyIiCpzP\ndxdgw86jfoEBAE6XBxt2HsXnuwuuadxNmzbB6XQiKysL06ZNQ2Zm5lWPIRUaLVu2RFxcHJKTk/HD\nDz9g9OjROHr06FUvjIiI6lbhcGNT9vE659mUfRyVDned89Rkz549vrN7dO/eHXl5eVc9hlRohIaG\nYteuXUhOTsaWLVtQVFTkd7ZEIiIKjNxDRZdtYVzK6fIg91DRVY9ttVphsVh8l/V6vd9PecuQCo1n\nn30WmzdvRkpKCkpKSvCrX/0KY8eOvbpqiYjoispszoDOV53FYoHNZvNd1jSt1nNZ1UZq7qSkJMya\nNQsAsGzZsqtaABERyYswy50mRHa+6nr27IktW7bg3nvvRU5OjtRZbS9VZ2hMmjQJf/3rXzFw4MAa\nf5zjiy++uOoFEhFR7bp1isU/t/5Y5y4qo0GPbp1ia51em9TUVOzYsQPp6ekQQmDBggVXPUadodGz\nZ0988MEHePzxx696YCIiunqhJhW/7BWPDTtr/7LRL3vFI+QajtfQ6XSYO3fu9ZRXd2gcO3YMx44d\nQ2FhIQoKCtCvXz/odDp89dVX6NixI0aNGnVdCyciostVHYdx6XEaRoP+uo7TCIQ6Q6Pql/kefPBB\nfPjhh77fzygtLcWUKVOCXx0RUTOV2jsBKd1vvuyI8GvZwggkqaWfOXMGUVFRvsuhoaEoKrr6r3sR\nEZG8EJOK3l1a13cZfqRCo3///vjd736HwYMHQ9M0fPLJJxg6dGiwayMiogZGKjRmzpyJTz/9FN98\n8w0URcH48eMxaNCgYNdGREQNjPTOsSFDhmDIkCHBrIWIiKqpdFVi35mDKHdYEW6y4NabkhFiCKnX\nmoL2iYrs2RRnz56NyMhIPPXUU8EqhYio0dl8ZCe2Ht0Jp8flu259/ufo3/5uDOxw93WNnZubiyVL\nlmDFihVXfdug/eyezNkUV61ahR9++CFYJRARNUqbj+zEZz9+6RcYAOD0uPDZj19i85Gd1zz28uXL\n8eyzz8LhcFzT7YMWGlc6m+K///1v5ObmIi0tLVglEBE1OpWuSmw9WncobD26E5Xua3vRj4+Pv67T\nQQVt91RtZ1NUVRVnzpzBq6++ildeeQUbN26UGi86Ogyqqg9IbbGx4QEZp7FjH7zYBy/2wau++7Dv\nzMHLtjAu5fS4kHc6H7fffPW/azRkyBCcOHHiWssLXmjUdTbFTz75BMXFxZg4cSKKiopQWVmJDh06\nYPTo0bWOV1xsD0hdsbHhKCoqD8hYjRn74MU+eLEPXoHuw7UEULnDKjVfmcN25ZmCIGihUdfZFMeN\nG4dx48YBANatW4cjR47UGRhERM1FuMly5ZkARJjMQa6kZkH7TCM1NRVGoxHp6elYuHAhZs6cifXr\n1yMrKytYiyQiavRuvSkZRr2hznmMegO6tPrPG1SRP0UIIeplyVcpUJuM3Az3Yh+82Acv9sGrIeye\nAi5+e6o2gzv2u+6v3V6r+j3zFRERXaYqEC49TsOoNwTkOI3rwdAgImqABna4G3fH34a80/koc9gQ\nYTKjS6v/RIhqqte6GBpERA1UiGq6pq/VBlPQPggnIqKmh6FBRETSGBpERCSNoUFERNIYGkREJI2h\nQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFE\nRNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETS\nGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hga\nREQkjaFBRETSGBpERCSNoUFERNIYGkREJI2hQURE0hgaREQkjaFBRETS1GANrGka5syZg4MHD8Jo\nNCIjIwMJCQm+6R999BH+/ve/Q6/XIykpCXPmzIFOxwwjImrIgvYqvWnTJjidTmRlZWHatGnIzMz0\nTausrMRLL72Ed955B6tWrYLVasWWLVuCVQoREQVI0EJjz549SElJAQB0794deXl5vmlGoxGrVq1C\naGgoAMDtdsNkMgWrFCIiCpCg7Z6yWq2wWCy+y3q9Hm63G6qqQqfTISYmBgCwYsUK2O129OnTp87x\noqPDoKr6gNQWGxsekHEaO/bBi33wYh+82Ie6BS00LBYLbDab77KmaVBV1e/y4sWLcfToUSxbtgyK\notQ5XnGxPSB1xcaGo6ioPCBjNWbsgxf74MU+eAW6D00xgIK2e6pnz57Ytm0bACAnJwdJSUl+0597\n7jk4HA78+c9/9u2mIiKihi1oWxqpqanYsWMH0tPTIYTAggULsH79etjtdnTp0gVr1qzB7bffjoce\neggAMG7cOKSmpgarHCIiCoCghYZOp8PcuXP9rktMTPT9nZ+fH6xFExFRkPDACCIiksbQICIiaQwN\nIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIi\nksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG\n0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAg\nIiJpDA0iIpLG0CAiImkMDSIiksbQICIiaQwNIiKSxtAgIiJpDA0iIpLG0CAiImkMDSIiksbQICIi\naQwNIiKSptZ3ATdSpdONXXk/4eTPZQgPNaBzu2gAwIFjxSivcMFk0MFe6ca3B8/g5/N2CA0ICzEg\nwmxAtMWIsgoXioorYKtwQQDQhIDbpUFTAKF5lyEk6lAk57vRFACKAgjhvaBXAFX1vq9wuTUY9DqY\njHooENAE4PYIuN0eCEWBSdUhOT4aMZGhiIkKRYtwE9q3icDRU2Uor3D5+h1i9F/lKp1uX/9rm4eI\nGg5FCNEQX78uU1RUfl233557Cl/t+wkC3hdAAKhwuKEACDGpsNpdKLY6oGmNoh0NmqpXYDLo4dEE\nzCEGWMIMAACjqsM9t7ZGSrc2AC4+Js4Lj0dN8wRbbGz4da9bTQH74BXoPsTGhgdsrIYiaG/pNE3D\nnDlzcPDgQRiNRmRkZCAhIcE3ffPmzXj11VehqirGjBmD+++/P1ilYHvuKWz+7iQAwHDhnbPV7kK5\n3em9rsKFCocbzIvAcHsE3B439DrF12NLmAFOt+Z7HAD4/V2l+jw3KjiISF7QPtPYtGkTnE4nsrKy\nMG3aNGRmZvqmuVwuLFy4EG+99RZWrFiBrKwsnD17Nih1VDrd+GrfT37XaZqAtcIFwLubyM7ACAqP\nJiCEt9fVt+C25Z7Ctr2n6rztV/t+gsPpCXaJRHSVghYae/bsQUpKCgCge/fuyMvL8007fPgw4uPj\nERkZCaPKYKXuAAAOIklEQVTRiNtuuw3Z2dlBqePAsWK/3R+AN0iq9soJIdA4dtA1TkJ4e1xZLQDK\n7E6U21x13s7p1rC/4HywyyOiqxS03VNWqxUWi8V3Wa/Xw+12Q1VVWK1WhIdf3NdnNpthtVrrHC86\nOgyqqr/6Qo6c9+2SqqIoChRF8V5gYgSXUtXvi7sGq3p/6eNy2U31+huyT7gp7ne+FuyDF/tQt6CF\nhsVigc1m813WNA2qqtY4zWaz+YVITYqL7ddWiNvj++Ab8L5QebcuGBY3hLi4NVf1OAghoEDxe1xq\nvKnHE/QPZ/kBsBf74MUPwq8saLunevbsiW3btgEAcnJykJSU5JuWmJiIgoIClJSUwOl04ttvv0WP\nHj2CUkfndtEwXvKONsSo+t7tVr0LpuBQLmxphBgvbiVGhBkRbjbUeTujqsMtCS2CXR4RXaWgbWmk\npqZix44dSE9PhxACCxYswPr162G325GWloYZM2ZgwoQJEEJgzJgxaNWqVVDqCDGquOfW1n7f1NHp\nFFhCDSi3O6EACDOp/PZUEOh13t2AllADdLqLydz3wreiavr2VJV7bm0Nk/EadkcSUVDxOA3wOI1A\n43EajQ/74MXdU1fWbEIDABxOD04WV+Dkz2WwhBl8uz/2F5yH1e6Cyaj3HhGefwY/n7dBCO8R4ZFm\nI6LCjSizu3D2fAXKK52AUKAJAZfb4z06vIkeEW4w6CAE4PZoUPU6hBpUKIoGt6bAowm43G4A3pBI\nTohGy4gQxEaGIjrChMQ2kTh8qhRWu8vX70u3HhxOj6//tc0TTHyx9GIfvBgaV9asQgPgk6MK++DF\nPnixD14MjSvjCQuJiEgaQ4OIiKQxNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEhaozlOg4iI6h+3\nNIiISBpDg4iIpDE0iIhIGkODiIikMTSIiEgaQ4OIiKQF7Zf7GopRo0bBYrEAANq2bYvJkydjxowZ\nUBQFnTp1wh//+EfodE03O3Nzc7FkyRKsWLECBQUFNd731atXY9WqVVBVFY8++igGDBhQ32UHXPU+\n7N+/H5MmTUK7du0AAA888ADuvffeJt0Hl8uFWbNm4eTJk3A6nXj00UfRsWPHZrc+1NSH1q1bN7v1\n4bqIJqyyslKMHDnS77pJkyaJXbt2CSGEmD17tvjss8/qo7Qb4vXXXxf33Xef+PWvfy2EqPm+nzlz\nRtx3333C4XCIsrIy399NyaV9WL16tXjzzTf95mnqfVizZo3IyMgQQghRXFws+vXr1yzXh5r60BzX\nh+vRdN9iA8jPz0dFRQXGjx+PcePGIScnB99//z3uuOMOAEDfvn2xc+fOeq4yeOLj47Fs2TLf5Zru\n+969e9GjRw8YjUaEh4cjPj4e+fn59VVyUFzah7y8PGzduhW//e1vMWvWLFit1ibfh1/96lf4n//5\nHwCAEAJ6vb5Zrg819aE5rg/Xo0mHRkhICCZMmIA333wTzz//PJ566ikIIaAoCgDAbDajvLzp/lrZ\nkCFDoKoX90DWdN+tVivCwy/+upjZbIbVar3htQbTpX3o2rUrnn76afzjH/9AXFwcXn311SbfB7PZ\nDIvFAqvViieeeAJTp05tlutDTX1ojuvD9WjSodG+fXuMGDECiqKgffv2iIqKwrlz53zTbTYbIiIi\n6rHCG6v6ZzdV991iscBms/ldX/3J0hSlpqaiS5cuvr/379/fLPrw008/Ydy4cRg5ciSGDx/ebNeH\nS/vQXNeHa9WkQ2PNmjXIzMwEAJw+fRpWqxV9+vTB7t27AQDbtm3D7bffXp8l3lC33HLLZfe9a9eu\n2LNnDxwOB8rLy3H48GEkJSXVc6XBNWHCBOzduxcA8PXXX+MXv/hFk+/D2bNnMX78eEyfPh3//d//\nDaB5rg819aE5rg/Xo0mfsNDpdGLmzJk4deoUFEXBU089hejoaMyePRsulwsdOnRARkYG9Hp9fZca\nNCdOnMCTTz6J1atX4+jRozXe99WrVyMrKwtCCEyaNAlDhgyp77IDrnofvv/+e8ybNw8GgwExMTGY\nN28eLBZLk+5DRkYGNm7ciA4dOviue+aZZ5CRkdGs1oea+jB16lQsXry4Wa0P16NJhwYREQVWk949\nRUREgcXQICIiaQwNIiKSxtAgIiJpDA0iIpLG0KBm65FHHsHp06drnV5eXo7f//73QVv+jBkzsG7d\nuqCNTxQMTf4st0S1Wb58eZ3TS0tLeb4hokswNMiPEAJLlizBpk2boNfrkZaWhr59++K5555DSUkJ\nwsLC8Mwzz6Br166YMWMGLBYLvv/+e5w+fRpTpkzBmDFjUFJSgmeeeQZHjhyB0WjEjBkzcNddd2Hl\nypX48MMPUVFRAUVR8NJLL+HYsWNYvXo1/vrXvwIAVq5ciWPHjmHmzJlYtGgRvvnmG3g8HowePRoP\nP/xwrXWfOHECjz76KOLi4lBQUIA2bdpg8eLFiIqKwpYtW/DSSy9B0zTExcVh7ty5iImJwcCBA/HO\nO+/gm2++wfbt21FaWorCwkL06dMHc+bMQUZGBs6cOYMpU6bghRdewJNPPomzZ88CAKZMmYJBgwbV\nWs/69evxxhtvQK/Xo23btli8eDGMRiMyMzOxdetW3HTTTfB4PL4TBtZm27ZtePnll+F2u9G2bVvM\nmzcP0dHRGDhwILp27YoDBw5g8eLFePrppxEdHQ2TyYS33noLCxYswNdffw1FUTBixAhMnDgRu3fv\nxuLFi6FpGjp16oQXXnjh6lcQovo4tS41XBs2bBDp6enC4XAIq9UqRowYIQYPHiw+/fRTIYQQ3333\nnejfv79wOBzif//3f8WUKVOEpmkiPz9f3HHHHUIIIebMmSMyMzOFEELk5+eL+++/X5SXl4uHHnpI\nVFRUCCGEeOmll8TcuXOF0+kUffr0ESUlJUIIIdLS0kRubq549913xYIFC4QQQjgcDjF27FiRnZ1d\na92FhYUiKSnJd6rvhQsXinnz5omzZ8+Ke+65RxQWFgohhFi+fLl4/PHHhRBCDBgwQBQWFoq1a9eK\nfv36ifLycmG320Xfvn1Ffn6+KCwsFAMGDBBCCLFu3ToxZ84cIYQQP/74o+/+1WbgwIHi7NmzQggh\nXnzxRbF//36xceNGMXbsWOF0OsW5c+dEnz59xNq1a2sd49y5c2LEiBG+3rz33nti1qxZvtqrblt1\n36vu48qVK8Xvf/974Xa7hd1uF2PGjBFbtmwRu3btErfddpsoKyurs3aiunBLg/xkZ2dj6NChMBqN\nMBqNePfddzFgwAAMHjwYANC9e3dERkbiyJEjAIA+ffpAURQkJSWhpKTEN8aSJUsAAMnJycjKygIA\nLF26FB9//DGOHTuG7du3o3PnzjAYDBg8eDA+++wz3H333SgpKUHXrl3xxhtv4MCBA9i1axcAwG63\n4+DBg3WeK6xdu3bo3bs3AOC//uu/8NRTT6FPnz7o2rUr2rZtCwBIS0vD66+/ftlte/To4fuxrri4\nOJSWlsJsNvtNf/HFF3H69Gn0798fU6ZMqbOPAwYMwAMPPIBBgwZhyJAh6Ny5M95//30MHjwYBoMB\nLVq0QN++fescIzc313dyPQDQNA2RkZG+6d26dfP93bJlS9993L17N0aNGgW9Xo/Q0FAMHz4cX3/9\nNQYOHIj27dvzxHt0XRga5Kf6KcQBoLCwEOKSM80IIeDxeAAAJpMJAHyn2K5pjMOHDyMkJAQPPfQQ\nxo4di759+yImJgYHDhwAAIwYMQL/93//h9LSUtx3330AAI/Hg+nTp/vC6vz58wgLC5OuXVz4rQRN\n0y6r3e12X3bbqvtRdV8uvc/t2rXDxo0bsX37dmzZsgVvvfUWNm7c6He/q3v22WeRn5+PL7/8EtOn\nT8djjz0GRVH86rm0T5fyeDzo2bMnXnvtNQCAw+HwO/Nq9ZpDQkJ8f9d0n6ser+rzEV0LfnuK/PTq\n1Quff/45XC4XKioqMHXqVCiKgs8++wwAkJOTg7Nnz6JTp061jnH77bdjw4YNALyB8cgjjyAvLw8J\nCQl4+OGH0a1bN2zbts33Qta9e3ecOXMGH374IUaOHAkAuPPOO7F69Wq4XC7YbDb85je/QW5ubp21\nHz161BdEa9euRd++fdGtWzfk5ubixIkTAICsrCzf1siVqKrqC5iVK1di2bJlGDp0KP74xz/i/Pnz\ntf4Wi9vtxuDBgxEdHY1JkyZh5MiROHDgAO666y588skncDqdKC0txfbt2+tcfrdu3ZCTk4OjR48C\nAP785z9j0aJFV6z7zjvvxAcffACPx4OKigqsX79e+j4TXQm3NMhPamoq8vLyMHr0aGiahnHjxqF3\n796YM2cOli1bBoPBgGXLlsFoNNY6xhNPPIFnn30WI0aMgKqqWLRoETp37oxVq1bh3nvvhdFoRNeu\nXXHo0CHfbYYOHYqvvvoKcXFxAID09HQUFBRg1KhRcLvdGD169BVf+CIjI/Hyyy/j+PHjSE5ORkZG\nBsLCwjB37lw89thjcLlcaNOmDebPny/Vi5YtW6JNmzZ48MEH8Ze//AVPPvkkhg8fDlVV8dhjj9X6\nWyyqquKJJ57A7373O4SEhCAiIgIvvPACWrVqhX379uG+++5DTEwMEhMT61x+bGwsFixYgKlTp0LT\nNLRq1QqLFy++Yt1paWk4duwYRo4cCZfLhREjRiA1NdV3GnSi68Gz3FKTcOLECYwbNw6bN2+u71KI\nmjRuaVCjcfz4cTz++OM1TsvIyLjB1QAvvPBCjb8x36VLF+mtmcrKSqSlpdU47Yknnqjza71E9YFb\nGkREJI0fhBMRkTSGBhERSWNoEBGRNIYGERFJY2gQEZE0hgYREUn7fxPj8bUUxHz+AAAAAElFTkSu\nQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x11ae97ef0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"sns.lmplot('concave_points_sd_error', 'diagnosis',\n", | |
" data=bc_data,\n", | |
" fit_reg=False,\n", | |
" hue=\"diagnosis\", \n", | |
" scatter_kws={\"marker\": \"D\",\n", | |
" \"s\": 100})\n", | |
"\n", | |
"plt.xlabel('concave_points_sd_error')\n", | |
"plt.ylabel('diagnosis')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.text.Text at 0x11d7c0ba8>" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAFcCAYAAAA0xeJbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt4FfWB//H3nGsuJ4QgkYJACghYFxVQvKD5gSKy6qoP\n0AptBau2Ls9T3bWlrmjVh8p1FbUr6lrdqpXdVR7vC6IooqJQFKkBWSUqlyDiCgIhOeck5zbf3x+B\nI4EQvkCG5ODntReZM7cPw+R8MnNm5jjGGIOIiIgFX2sHEBGR3KHSEBERayoNERGxptIQERFrKg0R\nEbGm0hAREWuB1g5ga9u2Wk+XX1JSwM6dcU/X0ZJyKW8uZQXl9VIuZYUjz1taWtSCadoGHWnsFgj4\nWzvCIcmlvLmUFZTXS7mUFXIv79Gg0hAREWsqDRERsabSEBERayoNERGxptIQERFrKg0REbGm0hAR\nEWsqDRERsZYzd4Tnsvp0gsqdXxBLxSgMFtK35ETyAuFWXT9glWnveUO+ECk3xabarwDoVfxDepf0\n5POd61m3a2P2tX4df0ReIJyd11ebwa3388N23dhY82WjdSYyCRZtWsLGXVVUJ2oI+gIUh9vxg8If\n0D4cYVPtVwScAAXBAlKZJBtrN+Mal455HejboRcd8ztml1tdv4tv4lv5Jr6NjHE5Pr8jrnHZUFNF\nIpOkNL8jJ3XozYaaKnbV1xBPxzEG0m6KVCZFigw+HAI+H4lMGte4gMHBh4uLg0PYF6IgUEA0HSPj\npnF8Dnm+fML+EHWZOnyOj4JgPsYYapK1pDJpfI5DXiCPsC/MruQu0iaDg0OhU0DGlyGeqWu0zSOB\nQroVdeHU0n4k3RRrt1fydWwrGZPBj4+USRNLx3Fw8OGjIJhH2s1gXAMOuBh8OPgcH8aFpElgMPgc\nP+0CEfJDBQR9ATImzQmFXehS1JnPq9dRk6gl7WZIpBPsStaAA4WBAnqX9KTfcT9q9O+65ttPWbdr\nIxk3g+PAt3U7SLlpgr4grslggM6FP6BjfgfW7vicZCZB0BfA+Ay76qMUBvIJ+gKk3DQBX4Dj8jqA\nA37HT/eiEwj6gsTScb6t20Eindg9LkAyk6AmGaUkXEyP4u7Z6aoTu2gfKqZ9XnGT+/K+PwOdCzvx\n169XsCtRQ3G4HUO7nktxuF2L/+wdi5xc+eY+rx8jUlpa5Mk6lm1ZwfKvV5ByU9nXgr4gZ3cexOAu\ngw57ubZ5m1p/XToBGPIDec1m2nveaCpGTaKWjHHxOT78jg/XGFzc7BsUgOM4tAsV0au4B9vqviXl\npggE/FTX1RBP1VEQLCASLABgZ/0uYun47jfnwxPyhfA5Dn7HT12m/oiWJQfm4NAxvwO9inuwbtcG\napINBZMxmRZfz97/Nbv/50Aa9j0/PsfBcRwKg4WUhIsb7cv7/gxsr9tBPF2f3Y8BfI6P/qWnMO7k\nKxst/0jfF47Fx4h4eqSxatUqZs2axZw5cxq9vnjxYh566CECgQCjR4/myiuvPMASctuyLSt496tl\n+72eclPZ14+kOA5n/dFUnGgyCkDGZIgEC5vMtPe80VSMXYna7Buya1zM7sIAcAE/4Hd8GGPYUV/N\njvqPKA4XEQkWUpOIEk3GGpa1e92JdIJY+sifQZR0kzg4zb6xyJEzGLbVbefbuh3ZXxBaujD2rGfv\n/x6Mi8E1afz48ePL7l977/d7/3lPYQDZ/bnhFyCXv21dBbBfcUhjnn2m8dhjj3H77beTSCQavZ5K\npZgxYwaPP/44c+bMYe7cuXz77bdexWg19ekEy79e0ew0y79eQSKTPGrrd40hloplh2Op/X/LX/71\nCnYlarPzusYlmoztN92ewvhu2RmMafhRd42bnS/tprOFsUc0GSWebnxK5kioMI4eg9n979vyhXEk\nXONm94JYKoZrDMu2fMCyLe9np0m76WxhNDUfQMW2j6nZXTzSNM9Ko3v37syePXu/19etW0f37t0p\nLi4mFApx+umns2JF82+uuahy5xeNTgk1JeWmqNzx+VFbf30mwd5nI40x1Gf2KXU3xdub38vOW59O\n7FcYTb1Jm92vm72mdY1L7e4f4L2lTUZv9Dms+RNGrWPvfW/Pfl2bilKb+q4Aavf55SU77z777Ntf\nvutt2Bzn2empESNGsHnz5v1ej0ajFBV9d56vsLCQaPTgzV5SUuD5Eydb8vyjrzZjldfJcw97vc3N\n19T6nYwBx9knwP5P8kw6dd+9lsH6DcKwe/Hmu+Hs28ve621r7zhybHDI7meOs+c013f7t3EOsOPt\nNR9Awqlv9LN1LH4ucSSO+tVTkUiEWGyvUySxWKMSORCvn8Hf0h+Eu/V+0umDH8Kbet9hrfdgeZta\nvzEO7Hvdg2G/6UIm/7vXXNinZg7I2b28vYf3fKC533pFWprJ/r+GfX33i3v2ZcccYE/eaz6AsMnL\n/mzpg/D9HfX7NHr16kVVVRXV1dUkk0k+/PBDBgwYcLRjeK5vyYkEfcFmpwn6gvTt0PuorT/PH8bZ\n6zcqx3HI8ze+NDHoCzK063nZefMC4ewHn9n5mqiRPQXh7DWtz/FRFCzEt8/RTcDxN7kMyQ1OG/zX\n23vf27NfFwUjFAUj2WmKQoVNz7vPPju0W7m3YXPcUSuNefPmMXfuXILBIJMmTeK6665j7NixjB49\nmk6dOh2tGEdNXiDM2Z2bvzLq7M6DCPtDR239vt2XJO5RGCzYrxDO7jyI4nBRdl6f4yMSKtxvOt8+\nu47P8eM4DW8mPseXnS/gCxDZ54c1EopQEMg/0r9iVtt7Czt2Obsvr/Y5bevLiXyOL7sXFO7+RWVw\nlzMZ3OWs7DQBX4CCvS4z33c+gP6lp9AuFEEOTPdp7Kb7NHSfhhyY7tM4PMfi6SmVxm5elQZAIpOk\ncsfnRFNxIsEC+nbofcRHGIeSt6n1A1aZ9p437A+ScjNsqtkMGHq170Hvkl58vnMd66o3AA692pfR\nr+PJhP2h7LxOnoup9/HD4jI27qpqtM5EJsmbVe+wvqaK6vpdhHxBivPa0aXgB7QLF7GpdjMBJ0Bh\nsICkm2JjzaaGO8LzO9C3pDcd8ztkl1udqOGb+Da+iW8l47p0yu+Ii8v6XVUkMglK8zvyo+P6smHX\nRqoTu4in6sA4JE2CVCadvVM74PeRTKfI7C4gB6ehHB2HsC9MQSCfWCpOyk3h8/nI8+cR9oeJp+P4\nHD+FwXxcY6hN1pLMpPA5PvIDeeT5Q1QnakiZND4cIk4hKV+Guky80dtiJFBI93ZdObW0H6lMkk93\nfMbX0W/ImDQ+/LvvCI/tdUd4ARk3jXFdcBwyexe5CwmTBAyO46M4VERBoICA30/aZOha2IUTirrw\n2c4vGn4pcNPUZ5LsSuxquCM8WLj7jvCTGv27rvn2E9ZVV5ExGRzH4du67aQyacK+IGkyGAMnFHbi\nuIKOfLq9koSbJOQLgB+q4zUUBgsI+oIkM0mC/iAdwx0wTsM9E93bdSPoCxBLxdlet4P6TD3gEHT8\n1LtJahK1lITb06P97jvCU3GqE9W0D7enfbhdk/vyvj8DXSKdWbblfXYlaikOFzG0W3mTRxgqjf2p\nNHbzsjS8kEt5cykrKK+XcikrqDSaogcWioiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNp\niIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiI\niDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1\nlYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWG\niIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1z0rDdV3uvPNOxowZw7hx\n46iqqmo0/n/+538YOXIko0eP5r//+7+9iiEiIi0o4NWCFy1aRDKZZO7cuVRUVDBz5kz+/d//PTv+\n7rvvZv78+RQUFHDppZdy6aWXUlxc7FUcERFpAZ6VxsqVKykvLwegf//+rFmzptH4vn37UltbSyAQ\nwBiD4zheRRERkRbiWWlEo1EikUh22O/3k06nCQQaVtm7d29Gjx5Nfn4+w4cPp127ds0ur6SkgEDA\n71VcAEpLizxdfkvLpby5lBWU10u5lBVyL6/XPCuNSCRCLBbLDruumy2MtWvX8vbbb/Pmm29SUFDA\nzTffzKuvvsrFF198wOXt3Bn3KirQsGNs21br6TpaUi7lzaWsoLxeyqWscOR5j8XC8eyD8IEDB7Jk\nyRIAKioq6NOnT3ZcUVEReXl5hMNh/H4/HTp0oKamxqsoIiLSQjw70hg+fDhLly5l7NixGGOYPn06\n8+bNIx6PM2bMGMaMGcPPfvYzgsEg3bt3Z+TIkV5FERGRFuIYY0xrh7Dh9SHt9+2w+WjKpaygvF7K\npayg01NN0c19IiJiTaUhIiLWVBoiImJNpSEiItZUGiIiYk2lISIi1lQaIiJiTaUhIiLWVBoiImJN\npSEiItZUGiIiYk2lISIi1lQaIiJiTaUhIiLWVBoiImJNpSEiItZUGiIiYk2lISIi1lQaIiJiTaUh\nIiLWVBoiImJNpSEiItZUGiIiYk2lISIi1lQaIiJiTaUhIiLWVBoiImJNpSEiItZUGiIiYk2lISIi\n1lQaIiJiTaUhIiLWVBoiImJNpSEiItZUGiIiYk2lISIi1lQaIiJiTaUhIiLWVBoiImJNpSEiItZU\nGiIiYk2lISIi1lQaIiJiTaUhIiLWVBoiImJNpSEiItZUGiIiYi3g1YJd12Xy5MlUVlYSCoWYOnUq\nZWVl2fGrV69m5syZGGMoLS3lnnvuIRwOexVHRERagGdHGosWLSKZTDJ37lwmTpzIzJkzs+OMMdxx\nxx3MmDGDp59+mvLycr766iuvooiISAvx7Ehj5cqVlJeXA9C/f3/WrFmTHbdhwwbat2/Pk08+yeef\nf86QIUPo2bOnV1FERKSFeFYa0WiUSCSSHfb7/aTTaQKBADt37uSjjz7izjvvpHv37kyYMIF+/fpx\nzjnnHHB5JSUFBAJ+r+ICUFpa5OnyW1ou5c2lrKC8XsqlrJB7eb3mWWlEIhFisVh22HVdAoGG1bVv\n356ysjJ69eoFQHl5OWvWrGm2NHbujHsVFWjYMbZtq/V0HS0pl/LmUlZQXi/lUlY48rzHYuF49pnG\nwIEDWbJkCQAVFRX06dMnO65bt27EYjGqqqoA+PDDD+ndu7dXUUREpIV4dqQxfPhwli5dytixYzHG\nMH36dObNm0c8HmfMmDFMmzaNiRMnYoxhwIABDB061KsoIiLSQhxjjGntEDa8PqT9vh02H025lBWU\n10u5lBV0eqopurlPRESsWZXG6tWreeKJJ0gmk1x77bWcffbZLFy40OtsIiLSxliVxtSpU+nXrx8L\nFy4kLy+PF198kUcffdTrbCIi0sZYlYbrugwaNIi3336biy66iM6dO5PJZLzOJiIibYxVaeTn5/P4\n44+zfPlyzj//fP7yl79QWFjodTYREWljrEpj1qxZxONxZs+eTXFxMVu3buW+++7zOpuIiLQxzZbG\n//7v/wKwadMmzjrrLDKZDCtWrGDo0KFs2rTpqAQUEZG2o9mb+5555hmmTJnCAw88sN84x3F46qmn\nPAsmIiJtT7OlMWXKFADmzJnT6PV9H0YoIiLfD1afabz11lvcc889xGIxLr74YoYNG8Z//dd/eZ1N\nRETaGKvSePDBBxk1ahQLFizg1FNPZfHixTz//PNeZxMRkTbG+jEivXr14u233+aCCy6gsLCQVCrl\nZS4REWmDrEqjY8eOTJkyhY8//pjy8nJmzpxJly5dvM4mIiJtjFVp3HvvvZxyyin853/+JwUFBXTr\n1o17773X62wiItLGWH2fRmFhIbFYjFmzZpFOpznrrLMoKCjwOpuIiLQxVqVx9913U1VVxejRozHG\n8MILL7B582Z+//vfe51PRETaEKvSWLp0KS+99BI+X8PZrKFDh3LZZZd5GkxERNoeq880MpkM6XS6\n0bDf7/cslIiItE1WRxqXXXYZ48eP59JLLwXglVdeyf5ZRES+P6xKY8KECfzoRz9i+fLlGGOYMGEC\nQ4cO9TiaiMix7f333+eVV14hEonwL//yL0dlnS+88AKdO3fmnHPOOaz5rUoDoEuXLgwbNgxjDAAr\nVqxg0KBBh7VSERH5ztEqDIBRo0Yd0fxWpfGHP/yBt956i27dumVf01NuRUQOXW1tLb/5zW9IJBIU\nFRWRSCS47rrr+POf/8xjjz3G0qVLqamp4YILLuCGG27g1Vdf5dFHH6WkpIR4PM6sWbN48MEHCYVC\nVFVVkclkeOSRR0ilUtx8883U19cTCASYOnUqeXl5/OY3v8F1Xdq1a8f999/PY489Rs+ePSktLeXe\ne+/FcRwGDRrExIkTrfJbXz312muvkZeXd0QbS0Tk++6FF17gvPPO4xe/+AVPPfUU77zzDkD2K7Sf\nfPJJkskkl156KRMmTGD27Nk8++yzBIPBRlet/t3f/R133XUXd9xxB8uXL2fFihVcccUVXHbZZSxb\ntox7772Xyy67jJ49e3LHHXfw7rvvUlNTk51/8eLF/PznP+fyyy/n2WefxRiD4zgHzW919VS3bt2y\np6VEROTwrV+/npNPPhmA0047Lfu6z+ejrq6OiRMnMm3aNFKpFDt37uS4446jsLCQUCiUnQ+gT58+\nABx//PEkEgnWr1/PgAEDABg4cCDr169nyJAhdOvWjV/96le88sorBALfHSdcf/31fPzxx4wfP55N\nmzbhuq5VfqsjjeLiYi699FIGDBhAKBTKvj5jxgyrlYiISIOysjJWrVrFmWeemf12VIC1a9fyySef\n8Mgjj/Dll1/y6quvctxxx7Fjxw7i8TjBYJBPP/00O/2+RwU//OEPqaiooGvXrqxcuZJu3brxwQcf\ncMIJJ/DEE0/wxBNPsGDBguz08+fPZ8yYMZx44olMmDCBdevWZYuoOValUV5eTnl5uc2kIiLSjLFj\nx/Lb3/6WJUuWUFpamn29rKyM2tpafvKTn1BYWEjHjh2Jx+PcdNNNXHXVVZSUlBAIBBodLextwoQJ\n3HrrrTz99NM4jsO0adOIRCLcdNNNPP300wSDQaZNm8azzz4LwMknn8ykSZMoLCykU6dO9OrVyyq/\nYyzOO23ZsqXxTI5DOBymQ4cOVitpCdu21Xq6/NLSIs/X0ZJyKW8uZQXl9VIuZYUjz1taWnTEGf7j\nP/6Da6+9Ftd1GTlyJM8//3yjMz5Hm9WRxq9//Ws+++wz+vbtizGGzz//nNLSUvx+P1OmTDns631F\nRKR5rusyatQowuEwY8eObdXCAMvS6NSpE1OmTKFfv34AVFZW8uCDD3Lbbbdx44038txzz3kaUkTk\n++r666/n+uuvb+0YWVZXT3311VfZwgDo27cvmzZtonPnztnLxERE5NhndaTRrVs3Zs2axRVXXIHr\nusyfP5+ysjI++uij7JNvRUTk2Gf1jn/33XeTTqeZOHEikyZNwnVdpk+fzpdffskf/vAHrzOKiEgb\nYXWkEYlEmDRpUnbYGMPmzZu5/PLLPQsmIvJ9V5dIs+rzbdTEkrQrDHFa71Lyw9aPDPSE1drnzJnD\n/fffT11dXfa1E044gUWLFnkWTETk++yN96tYtGITydR3nxu/+PYXXDioO8PPKjusZbquy+TJk6ms\nrCQUCjF16lTKyg5tWVanp5544glefvllLrnkEt544w2mTZvW6PZ3ERFpOW+8X8WCZRsaFQZAMpVh\nwbINvPF+1WEtd9GiRSSTSebOncvEiROZOXPmIS/DqjSOO+44unXrRt++ffnss88YNWoUGzZsOOSV\niYhI8+oSaRat2NTsNItWbKI+kW52mqasXLky+3SP/v37s2bNmkNehlVp5Ofns3z5cvr27ctbb73F\ntm3bGj0tUUREWsaqz7ftd4Sxr2Qqw6rPtx3ysqPRKJFIJDvs9/sbfZW3DavSuP3221m8eDHl5eVU\nV1fz93//91x11VWHllZERA6qJpZs0en2FolEiMVi2WHXdQ/4LKsDsZq6T58+3HbbbQDMnj37kFYg\nIiL22hXaPSbEdrq9DRw4kLfeeotLLrmEiooKq6fa7qvZ0vjHf/xH/vSnP3HBBRc0+eUcb7755iGv\nUEREDuy03qW8+PYXzZ6iCgX9nNa79IDjD2T48OEsXbqUsWPHYoxh+vTph7yMZktj4MCBvPTSS9x4\n442HvGARETl0+eEAFw7qzoJlB77Y6MJB3ck7jPs1fD4fd91115HEa740Nm7cyMaNG/nyyy+pqqpi\nyJAh+Hw+3nvvPU488URGjhx5RCsXEZH97bkPY9/7NEJB/xHdp9ESmi2NPd/MN27cOF5++eXs92fs\n2rWLX//6196nExH5nhp+Vhnl/U/Y747wwznCaElWa9+6dSvt27fPDufn57Nt26Ff7iUiIvbywgHO\n6te5tWM0YlUaQ4cO5ZprruGiiy7CdV1ee+01Lr74Yq+ziYhIG2NVGrfeeisLFy7kgw8+wHEcrr32\nWoYNG+Z1NhERaWOsT46NGDGCESNGeJlFRET2Up+q5+OtldQmohSFI5xyfF/ygnmtmsmzT1Rsn6Z4\nxx13UFxczO9+9zuvooiI5JzF65fx9oZlJDOp7Gvz1r7B0B6DuaDn4CNa9qpVq5g1axZz5sw55Hk9\n+9o9m6cpPvPMM3z22WdeRRARyUmL1y/j9S/eaVQYAMlMite/eIfF65cd9rIfe+wxbr/9dhKJxGHN\n71lpHOxpin/7299YtWoVY8aM8SqCiEjOqU/V8/aG5kvh7Q3LqE8f3pt+9+7dj+hxUJ6dnjrQ0xQD\ngQBbt27loYce4sEHH+TVV1+1Wl5JSQGBgN+ruACUlhZ5uvyWlkt5cykrKK+XcikrHP28H2+t3O8I\nY1/JTIo136zljBMO/XuNRowYwebNmw83nnel0dzTFF977TV27tzJ9ddfz7Zt26ivr6dnz56MGjXq\ngMvbuTPuVVSgYcfYtq3W03W0pFzKm0tZQXm9lEtZ4cjzHk7h1CaiVtPVJGIHn8gDnpVGc09THD9+\nPOPHjwfghRdeYP369c0WhojI90VROHLwiYB24UKPkzTNs880hg8fTigUYuzYscyYMYNbb72VefPm\nMXfuXK9WKSKS8045vi8hf7DZaUL+IP06nXSUEjXmGGNMq6z5EHl9SPt9O2w+mnIpKyivl3IpK7TO\n6Sn47uqpA7noxCFHfNnt4WrdJ1+JiMh+9hTCvvdphPzBFrlP40ioNERE2qALeg5mcPfTWfPNWmoS\nMdqFC+nX6STyAuFWzaXSEBFpo/IC4cO6rNZLnn0QLiIixx6VhoiIWFNpiIiINZWGiIhYU2mIiIg1\nlYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWG\niIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiI\nWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhT\naYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWAt4tWDXdZk8eTKVlZWE\nQiGmTp1KWVlZdvz8+fP5y1/+gt/vp0+fPkyePBmfTx0mItKWefYuvWjRIpLJJHPnzmXixInMnDkz\nO66+vp4//vGPPPXUUzzzzDNEo1Heeustr6KIiEgL8aw0Vq5cSXl5OQD9+/dnzZo12XGhUIhnnnmG\n/Px8ANLpNOFw2KsoIiLSQjw7PRWNRolEItlhv99POp0mEAjg8/no2LEjAHPmzCEej3Puuec2u7yS\nkgICAb9XcQEoLS3ydPktLZfy5lJWUF4v5VJWyL28XvOsNCKRCLFYLDvsui6BQKDR8D333MOGDRuY\nPXs2juM0u7ydO+NeRQUadoxt22o9XUdLyqW8uZQVlNdLuZQVjjzvsVg4np2eGjhwIEuWLAGgoqKC\nPn36NBp/5513kkgkePjhh7OnqUREpG3z7Ehj+PDhLF26lLFjx2KMYfr06cybN494PE6/fv147rnn\nOOOMM7j66qsBGD9+PMOHD/cqjoiItADPSsPn83HXXXc1eq1Xr17ZP69du9arVYuIiEd0Y4SIiFhT\naYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mI\niIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiI\nNZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWV\nhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaIiFhTaYiIiDWVhoiIWFNpiIiINZWGiIhYU2mIiIg1lYaI\niFhTaYiIiDWVhoiIWAu0doDW9vK763h5aVV2+Ipzy7iivFd2uDqaYOH7m1j31S52xZJE8oO0j4Tp\n2inClm0xHJ+DcQ1dOhawfkstW7ZHybgGYwzReBqzezkhPwSDAfLz/KTThkQyA47BwcFxDOkMGGNI\nZQwOEAr4CAR8JFIZXLfhtbTbOLsPCAWhPnVk28BxwJjvhgO+/dfV5Hy7/2/vSYM+MD6HdNocYK6G\nedi9Tgcwu1/z+x3aRQJk0pBxDQWhAPnhAEWFQb7ZUUd9Ko1x4fj2+eTnB0imXHwO1CddXNclkcoQ\nyQ8S8DvE6tNE69I4DhQVhOjYPkwiadhRU0dNNInjc+hUkk9BXsNyAgEfpcV5GKAokkfX4/I57cSO\n1CczLHx/E1Xf1BIM+Oh/YkcK8gIkUi5F+UF+9MMS8kKNf4zqk2k+3biT2roURflBenRpx4YtNdnh\npuYRyRWd6kUoAAAQDklEQVSOMebAP91tyLZttS2+zGtnLj7guMcnXcCf53/CB2u3krJ5B5VjRsDv\n4Pf5cBxIpV1cY6DhfwHwOVBSlEekIEgo4OO8UzpTfloXAN5dtYX3Pv6a5O59JhpPEatPUZgXJFIQ\nBNhvniNVWlrkyc+HF3IpKxx53tLSohZM0zZ49uuO67pMnjyZyspKQqEQU6dOpaysLDt+8eLFPPTQ\nQwQCAUaPHs2VV17pVZQmNVcYe8b7fQ4ZNyc6VVpQOmNIZzIHHO8a2FFbD0CkIMjij77Kjtv7z9F4\nitp4EiD730hBkGTazU7XUsUhcrR49pnGokWLSCaTzJ07l4kTJzJz5szsuFQqxYwZM3j88ceZM2cO\nc+fO5dtvv/Uqyn5efned1XQqDDkQY6AmlsDdvY8sWb2FJau27DXeEK1rfN4wWpfKTg/w3sdfN5ym\nFMkhnpXGypUrKS8vB6B///6sWbMmO27dunV0796d4uJiQqEQp59+OitWrPAqyn72/gxD5HBlXEP9\n7jf92liKmvh3JVGfyLDvmV9jvpseIJl2+aRqx9EJK9JCPDs9FY1GiUQi2WG/3086nSYQCBCNRikq\n+u5cX2FhIdFotNnllZQUEAj4vYorclgcB4IBH3VOw4f5wUDD72F1DjiOc8Dps8N+f4uc986lc+e5\nlBVyL6/XPCuNSCRCLBbLDruuSyAQaHJcLBZrVCJN2bkz7k1QkSNgTMOH5Xs+KM9eNGHY70ij0fR7\nhjOZI/5gOJc+XM6lrKAPwpvi2empgQMHsmTJEgAqKiro06dPdlyvXr2oqqqiurqaZDLJhx9+yIAB\nA7yKsp8rzi07+EQiB+H3OeSFGo5+iwqDtNt9dRRAXti/35GG43w3PTRcRXVyWYejE1akhXh2pDF8\n+HCWLl3K2LFjMcYwffp05s2bRzweZ8yYMUyaNInrrrsOYwyjR4+mU6dOXkXZzxXlvaw+19DVU3Ig\njgPtCsP4fA3F8P9ObbgKas9VUY7jEMkPZq+aAojkB7PTA5x3SmfCIZ1yldyi+zQOQPdpfH/pPg3v\n5FJW0OmppnyvSwNgwV838Nw7G7LDPx7Sg0vO6ZEdroklee39KtZ9tYvqWIqivCDti0J0/0GEzVtj\n+HwOrmvoWlrIF1/VsOXbKBkXwFATT2XvtA4FHIIBP5H8AIm0SzKZwWDwOXvdEe423BEOEA76CPgb\n7gjPuAafA6l9rs70ORAOQl2SI7LvHeFBH6QsetJHw4e/e8cK+cE4Dqlm7gj3AaapO8IDDu0LA6Qy\nkMkYCsJBCsJ+2hWG+L8dceqTaYyBTu0LsneEO0BdKoNxXRIpl0hBkKDPIZpIE42lsneEl5bkk0hm\n2LGrjupYEp/P4QcdCsgPB0imMgQDfkqLwxggUphH144FnNarI4lUhtfer2q4I9zvo3/v0oY7wpMZ\nIgVBTi7rsN/RQiKZ4ZOqHUTjKSIFQXp1KWbdll3Z4abmORK59EacS1lBpdGU731p7PF925mPplzK\nCsrrpVzKCiqNpuiBhSIiYk2lISIi1lQaIiJiTaUhIiLWVBoiImJNpSEiItZUGiIiYi1n7tMQEZHW\npyMNERGxptIQERFrKg0REbGm0hAREWsqDRERsabSEBERa559c19b4boukydPprKyklAoxNSpUykr\n++7rXhcvXsxDDz1EIBBg9OjRXHnllQedp63lBRg5ciSRSASArl27MmPGjDaRF6Curo5rrrmGadOm\n0atXrza9fZvKC62zfQ+Wdf78+fzlL3/B7/fTp08fJk+eDNBmt21TeX0+X5vddxcuXMijjz6K4zhc\ndtllXH311a2677YZ5hi3cOFCc8sttxhjjPnoo4/MhAkTsuOSyaS58MILTXV1tUkkEmbUqFFm27Zt\nzc7TFvPW19ebK6644qhltM1rjDGrV682I0eONIMHDzZffPGF1TxtLW9rbd/mstbV1Zlhw4aZeDxu\njDHmN7/5jVm0aFGb3bYHyttW9910Om2GDx9uampqTDqdNhdddJHZvn17q27ftuKYPz21cuVKysvL\nAejfvz9r1qzJjlu3bh3du3enuLiYUCjE6aefzooVK5qdpy3mXbt2LXV1dVx77bWMHz+eioqKNpEX\nIJlM8tBDD9GzZ0/redpa3tbavs1lDYVCPPPMM+Tn5wOQTqcJh8NtdtseKG9b3Xf9fj8LFiygqKiI\n6upqXNclFAq16vZtK47501PRaDR76AsNO0M6nSYQCBCNRikq+u6btQoLC4lGo83O0xbz5uXlcd11\n1/GTn/yEjRs38qtf/YrXXnut1fMCnH766Yc8T1vL21rbt7msPp+Pjh07AjBnzhzi8Tjnnnsur776\napvctgfK+9lnn7XZfTcQCPD6669z1113MWTIEPLz81t1320rjvm/aSQSIRaLZYdd183+A+87LhaL\nUVRU1Ow8bTFvjx49KCsrw3EcevToQfv27dm2bRudO3du1bwtOU9LOZx1t9b2PVhW13W555572LBh\nA7Nnz8ZxnDa9bZvK29b33YsuuogLL7yQSZMm8dJLL7Xq9m0rjvnTUwMHDmTJkiUAVFRU0KdPn+y4\nXr16UVVVRXV1Nclkkg8//JABAwY0O09bzPvcc88xc+ZMAL755hui0SilpaWtnrcl52kph7Pu1tq+\nB8t65513kkgkePjhh7Onfdrytm0qb1vdd6PRKFdddRXJZBKfz0d+fj4+n69Vt29bccw/sHDP1Q6f\nffYZxhimT5/OJ598QjweZ8yYMdmrkYwxjB49mp///OdNzrPnKpq2mDeZTHLrrbeyZcsWHMfhd7/7\nHQMHDmwTefcYN24ckydPbnT1VFvcvk3lba3t21zWfv36MXr0aM444wwcxwFg/PjxDBs2rE1u2wPl\nHTJkSJvdd+fOnctzzz1HIBCgb9++3HHHHTiO02rbt6045ktDRERazjF/ekpERFqOSkNERKypNERE\nxJpKQ0RErKk0RETEmkpDmnTrrbcyYsQI5s+ff9jLmDt37kHnnz17NrNnz7Ze5vvvv8+4ceMA+P3v\nf8/HH3982PkOx5tvvsm//du/HdV1irQl369bGcXaiy++yOrVqwmFQoe9jI8++ogzzzyzBVM1Nm3a\nNM+WfSDDhg1j2LBhR329Im2FSkP2M2HCBIwxDB48mOLiYo4//njC4TAPPvggt912G9988w1bt27l\njDPO4O677wZg1qxZLFq0CL/fz5gxY+jduzeLFy9m+fLllJaW0qlTJ6ZMmUI8HmfHjh1cc801jB8/\n3irPe++9x4wZMwiHw/To0SP7+rhx47jhhhsAeOSRRzDGsGnTJkaMGEFRURGLFi0C4NFHH6Vjx44s\nWbKEBx54gHQ6TdeuXZkyZQolJSVccMEFXH755bz33nvU1dXxr//6r/Tr148nnniCF198EZ/Px6mn\nnspdd93FCy+8wAcffMDMmTOpqKhg2rRpJBIJSkpKuOuuuygrK2PcuHGccsoprFy5kh07dnD77bcz\nZMiQA/79Zs+ezZYtW6isrGT79u3cdNNNLF++nFWrVnHSSSdx//334zgOjz76KK+++iqZTIbzzjuP\nm2++GcdxuP/++/nrX//Krl27KCkpYfbs2ZSWlnLeeecxYsQIVq5cid/v549//CPdunU73N1CpMFR\nfqqu5Ig+ffqYL7/8MvtfY4yZN2+eefjhh40xxiQSCXPhhReajz/+2CxYsMCMHTvWJBIJE41GzeWX\nX262bt1qbrnlFvP8888bY4yZOnWqWbZsmTHGmE2bNpn+/fsbY4x54IEHzAMPPHDAHIlEwpx77rnZ\nx5Tfdttt5qqrrjLGGHPVVVeZ5cuXm+XLl5sBAwaYLVu2mHg8bvr372+efvppY4wxkyZNMk8++aTZ\nvn27ufzyy011dbUxxpinn37a3HbbbcYYY84//3zzxBNPGGOMeeqpp8wNN9xgUqmUOeuss0wymTSZ\nTMbceeed5v/+7//M888/b2655RaTSCTM+eefb1atWmWMMWbBggVm1KhR2VxTp041xhjz5ptvmpEj\nRza7rR944AEzatQok0qlzPvvv29OOukk8/nnn5tUKmWGDx9uPv30U/POO++YG2+80aTTaZPJZMxv\nf/tb89JLL5mNGzeaG264wWQyGWOMMTfffLP585//nP03fOONN4wxxsyYMcPMmDGj2RwiNnSkIc06\n7rjj6Nq1KwD/8A//wOrVq3nyySdZv3491dXVxONxVqxYwcUXX0woFCIUCvHyyy/vt5xJkybx7rvv\n8qc//YnKykri8bjV+isrKzn++OMbfRlSU58p9OnTJ/uQu5KSEs455xwAunTpQk1NDatWreLrr7/O\nHt24rktxcXF2/j2Pu+7duzevv/46gUCAAQMG8OMf/5hhw4bx85//nE6dOmWn37hxI+3atePUU08F\n4OKLL+bOO++ktrZ2v+VVV1cf9O957rnnEggE6NKlC6WlpZx44okAdOrUiV27dvHXv/6V1atXM2rU\nKADq6+vp0qULV1xxBbfccgvPPvssGzZsoKKigu7duzf59/rwww8PmkPkYFQa0qy8vLzsn+fMmcPC\nhQu58sorGTx4cPb5O/s+5XPz5s106NCh0Ws33XQT7dq14/zzz+eSSy7hlVdesVq/4zi4rpsd9vv9\nTU4XDAYbDe87XSaTYeDAgTzyyCMAJBKJRk8rDYfD2fXt8fDDD1NRUcGSJUv45S9/yaxZs7Lj9s60\nhzGGTCZzwOU1Z+/8TT01NZPJcPXVV3PNNdcAUFNTg9/vZ82aNUycOJFf/OIXjBgxAp/Ph9nryUB7\n5zB6YpC0AF09JdaWLl3KmDFjuPzyy3Ech7Vr1+K6LoMGDeKNN94glUpRV1fHL3/5S7755hv8fn/2\nTXTp0qX80z/9ExdeeCErVqwAyI5rTt++fdm+fTtr164FsC6bfZ122mlUVFSwYcMGoKEQ9nwe05Qd\nO3Zw8cUX06dPH/75n/+Zc889l8rKyuz4nj17Ul1dzerVqwFYsGABXbp0oX379oeV72DOPvtsXn75\nZWKxGOl0ml//+tcsXLiQFStWcOaZZ/LTn/6UE088kaVLl1ptV5HDpSMNsXb11VczefJkHn/8cQoL\nCxkwYACbN2/mJz/5CWvWrGHUqFG4rsv48ePp0aMHgwcP5r777qOoqIgbb7yRn/3sZ7Rr144ePXpw\nwgknsHnz5oOuMxgMct9993HzzTcTCAQ4+eSTDyt7aWkp06dP56abbsJ1XTp16sQ999xzwOk7dOjA\n2LFj+fGPf0x+fj6dO3dm5MiRvP7660DDN9Hdf//9TJkyhbq6OoqLi7n//vsPK5uNCy64gLVr13Ll\nlVeSyWQoLy9n5MiRbN26lRtuuIHLLruMYDBI3759rbaryOHSU25FRMSajjSkTRg3bhw1NTX7vT52\n7Fh++tOftkKilvXkk0/y4osv7vf68ccfz2OPPdYKiUQOj440RETEmj4IFxERayoNERGxptIQERFr\nKg0REbGm0hAREWsqDRERsfb/ARqMUp2kMDE5AAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x11d7457f0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"sns.lmplot('fractal_dimension_mean', 'diagnosis',\n", | |
" data=bc_data,\n", | |
" fit_reg=False,\n", | |
" hue=\"diagnosis\", \n", | |
" scatter_kws={\"marker\": \"D\",\n", | |
" \"s\": 100})\n", | |
"\n", | |
"plt.xlabel('fractal_dimension_mean')\n", | |
"plt.ylabel('diagnosis')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"These plots show, for each of the three most predictive features, the range of values corresponding with 'benign' (0) or 'malignant' (1). (Clearly there is a lot of overlap.) For each feature, as its value gets larger, we are more likely to see a 'malignant' target value. We could also look at a logistic regression to see the likelihood of a tumor being benign or malignant for the range of values for each feature." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Build a model to predict the malignant tumors.\n", | |
" * Use at least two classification techniques; compare and contrast the advantages and disadvantages of each.\n", | |
" * Identify how you would control for overfitting in each classification technique.\n", | |
" * Evaluate the performance of each model.\n", | |
" * In each model, identify the most important predictive variables and explain how you identified them.\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# train test split the features and target data\n", | |
"\n", | |
"X_train, X_test, y_train, y_test = train_test_split(bc_X, bc_y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# We need to standardize the data (to have mean of 0 and std of 1) to allow for equal comparison of variables???\n", | |
"\n", | |
"scaler = StandardScaler()\n", | |
"X_train_sc = scaler.fit(X_train)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Logistic Regression model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"logreg= LogisticRegression(C=10E2).fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.98356807511737088" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"logreg.score(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.95104895104895104" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"logreg.score(X_test, y_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ -4.89040214e+00, -1.07043962e-01, 1.77156456e-01,\n", | |
" 4.08181526e-02, 4.78844397e+00, -3.71271262e-01,\n", | |
" 3.66629580e+00, 7.73872078e+00, 5.10503836e+00,\n", | |
" -9.55832367e-01, -1.38976254e+00, -2.86400240e+00,\n", | |
" 1.25043506e+00, 8.34873081e-02, 6.83255599e-01,\n", | |
" -6.28819993e+00, -7.65296735e+00, 5.85285124e-01,\n", | |
" -3.41478951e-01, -1.30143805e+00, 1.48126233e+00,\n", | |
" 5.14966863e-01, -1.34803412e-01, 2.12124601e-03,\n", | |
" 9.02020268e+00, -4.52930024e+00, 8.77720048e+00,\n", | |
" 1.53997578e+01, 9.78344021e+00, -1.98603470e+00]])" | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"logreg.coef_" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Most important predictive variables from logreg - identified by largest absolute value of coefficients. In logistic regression this indicates how much the probability of the target changes in response to the feature." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('fractal_dimension_mean', 15.399757833548678),\n", | |
" ('fractal_dimension_sd_error', 9.7834402131332698),\n", | |
" ('symmetry_mean', 9.0202026754728433),\n", | |
" ('symmetry_worst', 8.7772004801627173),\n", | |
" ('perimeter_sd_error', 7.7387207755525829),\n", | |
" ('compactness_sd_error', 7.6529673475395175),\n", | |
" ('compactness_mean', 6.2881999286575558),\n", | |
" ('perimeter_worst', 5.1050383566848723),\n", | |
" ('radius_mean', 4.8904021429123103),\n", | |
" ('texture_sd_error', 4.788443970608534),\n", | |
" ('symmetry_sd_error', 4.5293002380203884),\n", | |
" ('perimeter_mean', 3.6662957990505372),\n", | |
" ('area_worst', 2.8640023999202655),\n", | |
" ('fractal_dimension_worst', 1.9860347048400133),\n", | |
" ('concavity_worst', 1.4812623280518653),\n", | |
" ('area_sd_error', 1.3897625413911123),\n", | |
" ('concavity_sd_error', 1.3014380532400722),\n", | |
" ('smoothness_mean', 1.2504350619155262),\n", | |
" ('area_mean', 0.9558323673257948),\n", | |
" ('smoothness_worst', 0.68325559897317001),\n", | |
" ('compactness_worst', 0.58528512359556129),\n", | |
" ('concave_points_mean', 0.51496686329661967),\n", | |
" ('texture_worst', 0.37127126180280556),\n", | |
" ('concavity_mean', 0.34147895085529872),\n", | |
" ('radius_worst', 0.17715645568571733),\n", | |
" ('concave_points_sd_error', 0.13480341154830128),\n", | |
" ('radius_sd_error', 0.10704396201040854),\n", | |
" ('smoothness_sd_error', 0.083487308096107771),\n", | |
" ('texture_mean', 0.040818152621318229),\n", | |
" ('concave_points_worst', 0.0021212460138316895)]" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sorted(list(zip(bc_X.columns, abs(logreg.coef_[0]))), key=lambda x: x[1], reverse=True) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# make top 5 features to compare below with other model/s\n", | |
"\n", | |
"top_5_feats_lr = sorted(list(zip(bc_X.columns, abs(logreg.coef_[0]))), key=lambda x: x[1], reverse=True)[:5]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Logistic regression is fast to train and predict, scales to large datasets, and works well on sparse data. It is also highly interpretable, but it may not be clear why it sets the coefficients the way it does, especially on a dataset with highly correlated features." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Random Forest model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ranfor = RandomForestClassifier(max_depth=10).fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.99530516431924887" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# train score\n", | |
"ranfor.score(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.95104895104895104" | |
] | |
}, | |
"execution_count": 37, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# test score\n", | |
"ranfor.score(X_test, y_test)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Look at feature importances (they sum to 1, and each feature is ranked from 0 if not used at all to 1 if perfectly predicts the target):" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([ 0.00664 , 0.01192315, 0.13710448, 0.00716612, 0. ,\n", | |
" 0.04831478, 0. , 0.18592364, 0. , 0.0050617 ,\n", | |
" 0.05203602, 0.00125606, 0.00694849, 0.01124793, 0.00541972,\n", | |
" 0.00069702, 0.00889316, 0.00377655, 0.00682655, 0.00703202,\n", | |
" 0.07571054, 0.01774287, 0.09602688, 0.1576697 , 0.01514367,\n", | |
" 0.01134483, 0.0540487 , 0.04214065, 0.0222338 , 0.00167097])" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ranfor.feature_importances_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('perimeter_sd_error', 0.18592363887837887),\n", | |
" ('concave_points_worst', 0.15766970441192482),\n", | |
" ('radius_worst', 0.13710448289774602),\n", | |
" ('concave_points_sd_error', 0.096026882456910934),\n", | |
" ('concavity_worst', 0.075710540823427569),\n", | |
" ('symmetry_worst', 0.054048696911513136),\n", | |
" ('area_sd_error', 0.052036024912112247),\n", | |
" ('texture_worst', 0.048314784282380209),\n", | |
" ('fractal_dimension_mean', 0.042140651237974232),\n", | |
" ('fractal_dimension_sd_error', 0.02223379669289666),\n", | |
" ('concave_points_mean', 0.017742868252507153),\n", | |
" ('symmetry_mean', 0.015143672818011891),\n", | |
" ('radius_sd_error', 0.011923151224529921),\n", | |
" ('symmetry_sd_error', 0.011344834546666903),\n", | |
" ('smoothness_sd_error', 0.011247927042949152),\n", | |
" ('compactness_sd_error', 0.0088931642312099438),\n", | |
" ('texture_mean', 0.0071661187113607958),\n", | |
" ('concavity_sd_error', 0.0070320153136990112),\n", | |
" ('smoothness_mean', 0.0069484874326051658),\n", | |
" ('concavity_mean', 0.0068265506332328531),\n", | |
" ('radius_mean', 0.0066399987446980561),\n", | |
" ('smoothness_worst', 0.0054197192837217344),\n", | |
" ('area_mean', 0.0050616961119502018),\n", | |
" ('compactness_worst', 0.0037765493654083209),\n", | |
" ('fractal_dimension_worst', 0.0016709683632497841),\n", | |
" ('area_worst', 0.0012560563799112593),\n", | |
" ('compactness_mean', 0.00069701803902319298),\n", | |
" ('texture_sd_error', 0.0),\n", | |
" ('perimeter_mean', 0.0),\n", | |
" ('perimeter_worst', 0.0)]" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sorted(list(zip(bc_X.columns, ranfor.feature_importances_)), key=lambda x: x[1], reverse=True) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"top_5_feats_rf = sorted(list(zip(bc_X.columns, ranfor.feature_importances_)), key=lambda x: x[1], reverse=True)[:5]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([('fractal_dimension_mean', 15.399757833548678),\n", | |
" ('fractal_dimension_sd_error', 9.7834402131332698),\n", | |
" ('symmetry_mean', 9.0202026754728433),\n", | |
" ('symmetry_worst', 8.7772004801627173),\n", | |
" ('perimeter_sd_error', 7.7387207755525829)],\n", | |
" [('perimeter_sd_error', 0.18592363887837887),\n", | |
" ('concave_points_worst', 0.15766970441192482),\n", | |
" ('radius_worst', 0.13710448289774602),\n", | |
" ('concave_points_sd_error', 0.096026882456910934),\n", | |
" ('concavity_worst', 0.075710540823427569)])" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"top_5_feats_lr, top_5_feats_rf" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Interesting, the models selected totally different features! Could explore making more trees in the RF model and setting a random state, see if the top features change." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Build a pipeline and compare results with baseline Random Forest" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"rf_pipe = Pipeline([\n", | |
" ('skb', SelectKBest(score_func=f_classif, k=20)),\n", | |
" ('scaler', StandardScaler()),\n", | |
" ('rf', RandomForestClassifier())\n", | |
"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"params = {\n", | |
" 'rf__n_estimators':[10,100],\n", | |
" 'rf__max_depth':[10, 20, 40, 50, None]\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"gs_pipe = GridSearchCV(rf_pipe,\n", | |
" param_grid=params,\n", | |
" n_jobs=-1,\n", | |
" cv=ShuffleSplit()\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"GridSearchCV(cv=ShuffleSplit(n_splits=10, random_state=None, test_size=0.1, train_size=None),\n", | |
" error_score='raise',\n", | |
" estimator=Pipeline(steps=[('skb', SelectKBest(k=20, score_func=<function f_classif at 0x119e5dbf8>)), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('rf', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", | |
" max_depth=None, max_features='auto', max_leaf_nodes=...imators=10, n_jobs=1, oob_score=False, random_state=None,\n", | |
" verbose=0, warm_start=False))]),\n", | |
" fit_params={}, iid=True, n_jobs=-1,\n", | |
" param_grid={'rf__n_estimators': [10, 100], 'rf__max_depth': [10, 20, 40, 50, None]},\n", | |
" pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n", | |
" scoring=None, verbose=0)" | |
] | |
}, | |
"execution_count": 53, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gs_pipe.fit(bc_X, bc_y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.96842105263157896" | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gs_pipe.best_score_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'rf__max_depth': None, 'rf__n_estimators': 100}" | |
] | |
}, | |
"execution_count": 55, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gs_pipe.best_params_" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The pipeline did slightly better but not much." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Random Forests reduce overfitting that might occur with tree-based models by averaging the results of lots of shallow trees. They are very powerful, don't require scaling of the data, and usually work well without much parameter tuning. But they don't wok well on high dimensions, sparse data, and they require more memory and take longer to train and predict than linear models." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"How to prevent overfitting a model?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We can use regularization (L2 Ridge or L1 Lasso) to restrict linear models and prevent overfitting. Controlling the strength of the regularization by increasing alpha (with Ridge and Lasso) or decreasing C (with Logistic Regression) also helps prevent overfitting by further restricting the coefficients. Using Lasso has the additional benefit of performing automatic feature selection by restricting the coefficients of some features to zero, thus revealing the most important features in the dataset and making the model easier to interpret.\n", | |
"\n", | |
"We can also increase the size of the training data (in this case by bootstrapping) to make it harder for the model to overfit.\n", | |
"\n", | |
"For the Random Forest model, we can control for overfitting by limiting the max depth of the trees and setting a smaller max_features." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Discussion:\n", | |
"\n", | |
"These models could be further improved by tuning hyperparameters via grid search, more feature engineering, and combining different models in ensemble learning. After doing this, we could look at other model evaluation metrics such as a confusion matrix and ROC/AUC.\n", | |
"\n", | |
"In summary:\n", | |
"\n", | |
"The logistic regression model works by assigning a weight to each feature in the dataset. This weight explains how important, relatively, the feature is in determining which class (malignant or benign) samples fall into. It determines the probability based on these weights for each sample to be in one class or the other. The model performed very well, with a test score accuracy of 94% meaning that if we were to feed new samples into the model, it would correctly predict whether it was benign or malignant about 94% of the time. The random forest model achieved an even greater accuracy of 97%. It works by combining many tree-based models. These models essentially learn a hierarchy of if/else questions that leads to a decision of which class a sample belongs to, resulting in a recursive partitioning of the data. The factors that contributed to identification of samples as benign or malignant was different for each of the models, indicating that further work is necessary to carefully pre-process the data and select important features. " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Part 2: Feedback" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Student sample 1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"line 5: specify which module in sklearn to import model from\n", | |
"\n", | |
"line 9: good to use more descriptive names (i.e. 'data_df' instead of 'd')\n", | |
"\n", | |
"before setting up your data for prediction, you need to do some data exploration. check shape of data, any null values, any outliers, data types. are there any pre-processing steps you need to take? should you scale the data before modeling? what are your features? what is the target that you're predicting? are there any visualizations you can do to better understand your data?\n", | |
"\n", | |
"line 13 and 14: name your y or target rather than calling it 'x2'. check to see that your feature and target selection worked the way you expected.\n", | |
"\n", | |
"line 17: use a descriptive name when you instantiate the model, i.e. 'linear regression' instead of just 'model', since you may add other models later and will want to be able to distinguish between them.\n", | |
"\n", | |
"choice of model: we don't usually want to use a basic linear regression (ordinary least squares), want to use a model that has parameters that we can use to control model complexity and avoid overfitting to the training data, for example ridge regression. \n", | |
"\n", | |
"You imported train_test_split but didn't use it, so this step is unnecessary. Also line 20 is redundant because you already imported cross_val_score\n", | |
"\n", | |
"probably want to use a larger value for cv in the cross-validation, like 5, or set the cv to do a KFold or ShuffleSplit on the data to ensure the data is adequately shuffled.\n", | |
"\n", | |
"Make sure you review and understand the steps in the process of data exploration and cleaning, pre-processing, and especially implementing a model and fitting it to your data. \n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Student sample 2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"See above comments re. data exploration and pre-processing, naming model\n", | |
"\n", | |
"For model selection, if you were to choose a more complex model you could use GridSearchCV to tune the hyperparameters and do the cross validation. \n", | |
"\n", | |
"For scoring, you can also use R2 or MSE instead of or in addition to MAE. " | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment