Skip to content

Instantly share code, notes, and snippets.

@alonsosilvaallende
Created January 7, 2022 14:44
Show Gist options
  • Save alonsosilvaallende/90f09c26b03d4e496f175a5480cb0bfc to your computer and use it in GitHub Desktop.
Save alonsosilvaallende/90f09c26b03d4e496f175a5480cb0bfc to your computer and use it in GitHub Desktop.
pharmacoSmoking_RSF_IBS.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "pharmacoSmoking_RSF_IBS.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyNwKYo2gOGRDHnUcyUghjed",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/alonsosilvaallende/90f09c26b03d4e496f175a5480cb0bfc/pharmacosmoking_rsf_ibs.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"!pip install -q lifelines"
],
"metadata": {
"id": "mEAsH84H4XxX"
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install -q scikit-survival"
],
"metadata": {
"id": "OlU2I67w28pU"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "I5x52DW02cOg"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"source": [
"import statsmodels.api as sm\n",
"pharmacoSmoking = sm.datasets.get_rdataset(\"pharmacoSmoking\", \"asaur\")\n",
"data = pharmacoSmoking.data\n",
"data.head(3)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 198
},
"id": "jJPHKraK2fzC",
"outputId": "5c3a5777-905e-4c41-cd07-02dc7c67b4cb"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
" import pandas.util.testing as tm\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-c6911c52-c772-4151-a16a-8df6491b8be3\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>ttr</th>\n",
" <th>relapse</th>\n",
" <th>grp</th>\n",
" <th>age</th>\n",
" <th>gender</th>\n",
" <th>race</th>\n",
" <th>employment</th>\n",
" <th>yearsSmoking</th>\n",
" <th>levelSmoking</th>\n",
" <th>ageGroup2</th>\n",
" <th>ageGroup4</th>\n",
" <th>priorAttempts</th>\n",
" <th>longestNoSmoke</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>21</td>\n",
" <td>182</td>\n",
" <td>0</td>\n",
" <td>patchOnly</td>\n",
" <td>36</td>\n",
" <td>Male</td>\n",
" <td>white</td>\n",
" <td>ft</td>\n",
" <td>26</td>\n",
" <td>heavy</td>\n",
" <td>21-49</td>\n",
" <td>35-49</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>113</td>\n",
" <td>14</td>\n",
" <td>1</td>\n",
" <td>patchOnly</td>\n",
" <td>41</td>\n",
" <td>Male</td>\n",
" <td>white</td>\n",
" <td>other</td>\n",
" <td>27</td>\n",
" <td>heavy</td>\n",
" <td>21-49</td>\n",
" <td>35-49</td>\n",
" <td>3</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>39</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>combination</td>\n",
" <td>25</td>\n",
" <td>Female</td>\n",
" <td>white</td>\n",
" <td>other</td>\n",
" <td>12</td>\n",
" <td>heavy</td>\n",
" <td>21-49</td>\n",
" <td>21-34</td>\n",
" <td>3</td>\n",
" <td>21</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c6911c52-c772-4151-a16a-8df6491b8be3')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-c6911c52-c772-4151-a16a-8df6491b8be3 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-c6911c52-c772-4151-a16a-8df6491b8be3');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" id ttr relapse ... ageGroup4 priorAttempts longestNoSmoke\n",
"0 21 182 0 ... 35-49 0 0\n",
"1 113 14 1 ... 35-49 3 90\n",
"2 39 5 1 ... 21-34 3 21\n",
"\n",
"[3 rows x 14 columns]"
]
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"print(f\"sample size: {data.shape[0]}\")\n",
"print(f\"% censored: {100*len(data[data['relapse'] == 0])/len(data)}\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OJHyIFyW2lvb",
"outputId": "012f445a-93dc-4fd5-929b-caa647c16ba7"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"sample size: 125\n",
"% censored: 28.8\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Drop redundant information and ids\n",
"data = data.drop(columns=[\"id\",\"ageGroup2\",\"ageGroup4\"])\n",
"data.head(3)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 143
},
"id": "eSb77nMD2y6G",
"outputId": "676362b7-2c19-4c13-a807-8e37a6eba4fe"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-9fde91c1-aee4-4b18-8dc7-1999895338f4\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ttr</th>\n",
" <th>relapse</th>\n",
" <th>grp</th>\n",
" <th>age</th>\n",
" <th>gender</th>\n",
" <th>race</th>\n",
" <th>employment</th>\n",
" <th>yearsSmoking</th>\n",
" <th>levelSmoking</th>\n",
" <th>priorAttempts</th>\n",
" <th>longestNoSmoke</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>182</td>\n",
" <td>0</td>\n",
" <td>patchOnly</td>\n",
" <td>36</td>\n",
" <td>Male</td>\n",
" <td>white</td>\n",
" <td>ft</td>\n",
" <td>26</td>\n",
" <td>heavy</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14</td>\n",
" <td>1</td>\n",
" <td>patchOnly</td>\n",
" <td>41</td>\n",
" <td>Male</td>\n",
" <td>white</td>\n",
" <td>other</td>\n",
" <td>27</td>\n",
" <td>heavy</td>\n",
" <td>3</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>combination</td>\n",
" <td>25</td>\n",
" <td>Female</td>\n",
" <td>white</td>\n",
" <td>other</td>\n",
" <td>12</td>\n",
" <td>heavy</td>\n",
" <td>3</td>\n",
" <td>21</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9fde91c1-aee4-4b18-8dc7-1999895338f4')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-9fde91c1-aee4-4b18-8dc7-1999895338f4 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-9fde91c1-aee4-4b18-8dc7-1999895338f4');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" ttr relapse grp ... levelSmoking priorAttempts longestNoSmoke\n",
"0 182 0 patchOnly ... heavy 0 0\n",
"1 14 1 patchOnly ... heavy 3 90\n",
"2 5 1 combination ... heavy 3 21\n",
"\n",
"[3 rows x 11 columns]"
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"from sksurv.datasets import get_x_y\n",
"\n",
"X, y = get_x_y(data, attr_labels=[\"relapse\", \"ttr\"], pos_label=True)"
],
"metadata": {
"id": "stIN3Yx022Kw"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"for c in X.columns:\n",
" if X[c].dtype.kind not in ['i', 'f']:\n",
" X[c] = X[c].astype(\"category\")"
],
"metadata": {
"id": "vt5EOWz625xo"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_trn, X_val, y_trn, y_val = train_test_split(X, y, random_state=0)"
],
"metadata": {
"id": "FVTTx2SZ-SCd"
},
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Preprocessing\n",
"from sksurv.preprocessing import OneHotEncoder\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"enc = OneHotEncoder()\n",
"scaler = StandardScaler()\n",
"X_trn = enc.fit_transform(X_trn)\n",
"X_trn = pd.DataFrame(scaler.fit_transform(X_trn), columns=X_trn.columns)\n",
"X_val = enc.transform(X_val)\n",
"X_val = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns)"
],
"metadata": {
"id": "u3OUsX2Q-XsB"
},
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sksurv.ensemble import RandomSurvivalForest\n",
"from sksurv.metrics import integrated_brier_score\n",
"\n",
"model = RandomSurvivalForest(random_state=0)\n",
"model.fit(X_trn, y_trn)\n",
"# IBS\n",
"survs_val = model.predict_survival_function(X_val)\n",
"times = np.arange(y_val['ttr'].min(), y_trn['ttr'][y_trn['ttr']!=y_trn['ttr'].max()].max())\n",
"preds_val = np.asarray([[fn(t) for t in times] for fn in survs_val])\n",
"ibs_val = integrated_brier_score(y_trn, y_val, preds_val, times)\n",
"ibs_val"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Aoo-7DDe_IZ5",
"outputId": "37e15334-5b29-4835-ad6c-926c4338db77"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/lib/python3.7/importlib/_bootstrap.py:219: RuntimeWarning: sklearn.tree._tree.TreeBuilder size changed, may indicate binary incompatibility. Expected 72 from C header, got 80 from PyObject\n",
" return f(*args, **kwds)\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.21784153160409175"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "Jd1siQON6-8x"
},
"execution_count": 11,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment