Created
January 7, 2022 14:44
-
-
Save alonsosilvaallende/90f09c26b03d4e496f175a5480cb0bfc to your computer and use it in GitHub Desktop.
pharmacoSmoking_RSF_IBS.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "pharmacoSmoking_RSF_IBS.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyNwKYo2gOGRDHnUcyUghjed", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/alonsosilvaallende/90f09c26b03d4e496f175a5480cb0bfc/pharmacosmoking_rsf_ibs.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install -q lifelines" | |
], | |
"metadata": { | |
"id": "mEAsH84H4XxX" | |
}, | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install -q scikit-survival" | |
], | |
"metadata": { | |
"id": "OlU2I67w28pU" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"id": "I5x52DW02cOg" | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"import seaborn as sns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import statsmodels.api as sm\n", | |
"pharmacoSmoking = sm.datasets.get_rdataset(\"pharmacoSmoking\", \"asaur\")\n", | |
"data = pharmacoSmoking.data\n", | |
"data.head(3)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 198 | |
}, | |
"id": "jJPHKraK2fzC", | |
"outputId": "5c3a5777-905e-4c41-cd07-02dc7c67b4cb" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", | |
" import pandas.util.testing as tm\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div id=\"df-c6911c52-c772-4151-a16a-8df6491b8be3\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>ttr</th>\n", | |
" <th>relapse</th>\n", | |
" <th>grp</th>\n", | |
" <th>age</th>\n", | |
" <th>gender</th>\n", | |
" <th>race</th>\n", | |
" <th>employment</th>\n", | |
" <th>yearsSmoking</th>\n", | |
" <th>levelSmoking</th>\n", | |
" <th>ageGroup2</th>\n", | |
" <th>ageGroup4</th>\n", | |
" <th>priorAttempts</th>\n", | |
" <th>longestNoSmoke</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>21</td>\n", | |
" <td>182</td>\n", | |
" <td>0</td>\n", | |
" <td>patchOnly</td>\n", | |
" <td>36</td>\n", | |
" <td>Male</td>\n", | |
" <td>white</td>\n", | |
" <td>ft</td>\n", | |
" <td>26</td>\n", | |
" <td>heavy</td>\n", | |
" <td>21-49</td>\n", | |
" <td>35-49</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>113</td>\n", | |
" <td>14</td>\n", | |
" <td>1</td>\n", | |
" <td>patchOnly</td>\n", | |
" <td>41</td>\n", | |
" <td>Male</td>\n", | |
" <td>white</td>\n", | |
" <td>other</td>\n", | |
" <td>27</td>\n", | |
" <td>heavy</td>\n", | |
" <td>21-49</td>\n", | |
" <td>35-49</td>\n", | |
" <td>3</td>\n", | |
" <td>90</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>39</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>combination</td>\n", | |
" <td>25</td>\n", | |
" <td>Female</td>\n", | |
" <td>white</td>\n", | |
" <td>other</td>\n", | |
" <td>12</td>\n", | |
" <td>heavy</td>\n", | |
" <td>21-49</td>\n", | |
" <td>21-34</td>\n", | |
" <td>3</td>\n", | |
" <td>21</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c6911c52-c772-4151-a16a-8df6491b8be3')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-c6911c52-c772-4151-a16a-8df6491b8be3 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-c6911c52-c772-4151-a16a-8df6491b8be3');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
" id ttr relapse ... ageGroup4 priorAttempts longestNoSmoke\n", | |
"0 21 182 0 ... 35-49 0 0\n", | |
"1 113 14 1 ... 35-49 3 90\n", | |
"2 39 5 1 ... 21-34 3 21\n", | |
"\n", | |
"[3 rows x 14 columns]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"print(f\"sample size: {data.shape[0]}\")\n", | |
"print(f\"% censored: {100*len(data[data['relapse'] == 0])/len(data)}\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "OJHyIFyW2lvb", | |
"outputId": "012f445a-93dc-4fd5-929b-caa647c16ba7" | |
}, | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"sample size: 125\n", | |
"% censored: 28.8\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Drop redundant information and ids\n", | |
"data = data.drop(columns=[\"id\",\"ageGroup2\",\"ageGroup4\"])\n", | |
"data.head(3)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 143 | |
}, | |
"id": "eSb77nMD2y6G", | |
"outputId": "676362b7-2c19-4c13-a807-8e37a6eba4fe" | |
}, | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div id=\"df-9fde91c1-aee4-4b18-8dc7-1999895338f4\">\n", | |
" <div class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ttr</th>\n", | |
" <th>relapse</th>\n", | |
" <th>grp</th>\n", | |
" <th>age</th>\n", | |
" <th>gender</th>\n", | |
" <th>race</th>\n", | |
" <th>employment</th>\n", | |
" <th>yearsSmoking</th>\n", | |
" <th>levelSmoking</th>\n", | |
" <th>priorAttempts</th>\n", | |
" <th>longestNoSmoke</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>182</td>\n", | |
" <td>0</td>\n", | |
" <td>patchOnly</td>\n", | |
" <td>36</td>\n", | |
" <td>Male</td>\n", | |
" <td>white</td>\n", | |
" <td>ft</td>\n", | |
" <td>26</td>\n", | |
" <td>heavy</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>14</td>\n", | |
" <td>1</td>\n", | |
" <td>patchOnly</td>\n", | |
" <td>41</td>\n", | |
" <td>Male</td>\n", | |
" <td>white</td>\n", | |
" <td>other</td>\n", | |
" <td>27</td>\n", | |
" <td>heavy</td>\n", | |
" <td>3</td>\n", | |
" <td>90</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>combination</td>\n", | |
" <td>25</td>\n", | |
" <td>Female</td>\n", | |
" <td>white</td>\n", | |
" <td>other</td>\n", | |
" <td>12</td>\n", | |
" <td>heavy</td>\n", | |
" <td>3</td>\n", | |
" <td>21</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9fde91c1-aee4-4b18-8dc7-1999895338f4')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
" \n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", | |
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
" \n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" flex-wrap:wrap;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-9fde91c1-aee4-4b18-8dc7-1999895338f4 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-9fde91c1-aee4-4b18-8dc7-1999895338f4');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
" </div>\n", | |
" " | |
], | |
"text/plain": [ | |
" ttr relapse grp ... levelSmoking priorAttempts longestNoSmoke\n", | |
"0 182 0 patchOnly ... heavy 0 0\n", | |
"1 14 1 patchOnly ... heavy 3 90\n", | |
"2 5 1 combination ... heavy 3 21\n", | |
"\n", | |
"[3 rows x 11 columns]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 6 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sksurv.datasets import get_x_y\n", | |
"\n", | |
"X, y = get_x_y(data, attr_labels=[\"relapse\", \"ttr\"], pos_label=True)" | |
], | |
"metadata": { | |
"id": "stIN3Yx022Kw" | |
}, | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"for c in X.columns:\n", | |
" if X[c].dtype.kind not in ['i', 'f']:\n", | |
" X[c] = X[c].astype(\"category\")" | |
], | |
"metadata": { | |
"id": "vt5EOWz625xo" | |
}, | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"\n", | |
"X_trn, X_val, y_trn, y_val = train_test_split(X, y, random_state=0)" | |
], | |
"metadata": { | |
"id": "FVTTx2SZ-SCd" | |
}, | |
"execution_count": 9, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Preprocessing\n", | |
"from sksurv.preprocessing import OneHotEncoder\n", | |
"from sklearn.preprocessing import StandardScaler\n", | |
"\n", | |
"enc = OneHotEncoder()\n", | |
"scaler = StandardScaler()\n", | |
"X_trn = enc.fit_transform(X_trn)\n", | |
"X_trn = pd.DataFrame(scaler.fit_transform(X_trn), columns=X_trn.columns)\n", | |
"X_val = enc.transform(X_val)\n", | |
"X_val = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns)" | |
], | |
"metadata": { | |
"id": "u3OUsX2Q-XsB" | |
}, | |
"execution_count": 10, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from sksurv.ensemble import RandomSurvivalForest\n", | |
"from sksurv.metrics import integrated_brier_score\n", | |
"\n", | |
"model = RandomSurvivalForest(random_state=0)\n", | |
"model.fit(X_trn, y_trn)\n", | |
"# IBS\n", | |
"survs_val = model.predict_survival_function(X_val)\n", | |
"times = np.arange(y_val['ttr'].min(), y_trn['ttr'][y_trn['ttr']!=y_trn['ttr'].max()].max())\n", | |
"preds_val = np.asarray([[fn(t) for t in times] for fn in survs_val])\n", | |
"ibs_val = integrated_brier_score(y_trn, y_val, preds_val, times)\n", | |
"ibs_val" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Aoo-7DDe_IZ5", | |
"outputId": "37e15334-5b29-4835-ad6c-926c4338db77" | |
}, | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"/usr/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: sklearn.tree._tree.TreeBuilder size changed, may indicate binary incompatibility. Expected 72 from C header, got 80 from PyObject\n", | |
" return f(*args, **kwds)\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.21784153160409175" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 11 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"" | |
], | |
"metadata": { | |
"id": "Jd1siQON6-8x" | |
}, | |
"execution_count": 11, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment