Created
September 21, 2021 09:16
-
-
Save alonsosilvaallende/769531401d52a95e144e6001ff53fede to your computer and use it in GitHub Desktop.
Camila/Simulations/Untitled.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:24.497240Z", | |
"end_time": "2021-09-21T09:15:26.200848Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%load_ext autoreload\n%autoreload 2\n%matplotlib inline", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:26.204397Z", | |
"end_time": "2021-09-21T09:15:27.325771Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import sksurv\nsksurv.__version__", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": "'0.15.0.post0'" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:27.330251Z", | |
"end_time": "2021-09-21T09:15:28.366410Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:28.379443Z", | |
"end_time": "2021-09-21T09:15:28.537290Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sksurv.datasets import load_gbsg2\n\nX, y = load_gbsg2()", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:28.539539Z", | |
"end_time": "2021-09-21T09:15:28.618734Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "scaling_cols = [c for c in X.columns if X[c].dtype.kind in ['i', 'f']]\ncat_cols = [c for c in X.columns if X[c].dtype.kind not in ['i', 'f']]", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:28.622738Z", | |
"end_time": "2021-09-21T09:15:28.765335Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.compose import ColumnTransformer\nfrom sklearn.preprocessing import OrdinalEncoder\nfrom sklearn.preprocessing import StandardScaler\n\npreprocessor = ColumnTransformer(\n [('cat-preprocessor', OrdinalEncoder(), cat_cols),\n ('standard-scaler', StandardScaler(), scaling_cols)],\n remainder='passthrough', sparse_threshold=0)", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:28.770697Z", | |
"end_time": "2021-09-21T09:15:28.856886Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "param_grid_cph = {\n 'alpha': (0.01, 0.1, 0.5),\n}\n\nparam_grid_rsf = {\n 'max_features': (\"sqrt\", 0.5, 1),\n 'min_samples_leaf': (1,3,5),\n 'n_estimators': (50, 100, 200),\n 'max_depth': (3,5,7,10),\n}\n\nparam_grid_gbs = {\n 'learning_rate': (0.05, 0.1, 0.15),\n 'max_features': (\"sqrt\", 0.5, 1),\n 'min_samples_leaf': (1,3,5),\n 'n_estimators': (50, 100, 200),\n 'subsample': (0.7,0.9,1),\n 'max_depth': (3,5,7,10),\n}\n\nparam_distributions = [param_grid_cph, param_grid_rsf, param_grid_gbs]", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:28.875901Z", | |
"end_time": "2021-09-21T09:15:28.985453Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "seeds = np.random.RandomState(0).permutation(1000)[:3]", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:28.989409Z", | |
"end_time": "2021-09-21T09:15:55.299354Z" | |
}, | |
"trusted": true, | |
"scrolled": true | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.model_selection import train_test_split\nfrom sklearn.model_selection import RandomizedSearchCV\nfrom sklearn.model_selection import KFold\nfrom sksurv.linear_model import CoxPHSurvivalAnalysis\nfrom sksurv.ensemble import RandomSurvivalForest\nfrom sksurv.ensemble import GradientBoostingSurvivalAnalysis\nfrom lifelines.utils import concordance_index\nfrom util import compute_IPEC_scores\n\ndf_ci = pd.DataFrame(columns=['Model','Train/Test','Default/Best','Score'], index=None)\ndf_ipec = pd.DataFrame(columns=['Model','Train/Test','Default/Best','Score'], index=None)\nfor seed in seeds:\n X_trn, X_val, y_trn, y_val = train_test_split(X, y, random_state=seed)\n X_trn = pd.DataFrame(preprocessor.fit_transform(X_trn))\n X_val = pd.DataFrame(preprocessor.transform(X_val))\n for i, model in enumerate([CoxPHSurvivalAnalysis(alpha=0.1), RandomSurvivalForest(random_state=42), GradientBoostingSurvivalAnalysis(random_state=42)]):\n model.fit(X_trn, y_trn)\n ci_model_trn = concordance_index(y_trn['time'], -model.predict(X_trn), y_trn['cens'])\n ci_model_val = concordance_index(y_val['time'], -model.predict(X_val), y_val['cens'])\n df_ci = df_ci.append({'Model':f'{model}','Train/Test':'Train',\n 'Default/Best':'Default','Score':ci_model_trn}, ignore_index=True)\n df_ci = df_ci.append({'Model':f'{model}','Train/Test':'Test',\n 'Default/Best':'Default','Score':ci_model_val}, ignore_index=True)\n\n rs_model = RandomizedSearchCV(model, param_distributions=param_distributions[i], n_jobs=-1, cv=2, n_iter=3)\n rs_model.fit(X_trn, y_trn)\n ci_rs_model_trn = concordance_index(y_trn['time'], -rs_model.predict(X_trn), y_trn['cens'])\n ci_rs_model_val = concordance_index(y_val['time'], -rs_model.predict(X_val), y_val['cens'])\n df_ci = df_ci.append({'Model':f'{model}','Train/Test':'Train',\n 'Default/Best':'Best','Score':ci_rs_model_trn}, ignore_index=True)\n df_ci = df_ci.append({'Model':f'{model}','Train/Test':'Test',\n 'Default/Best':'Best','Score':ci_rs_model_val}, ignore_index=True)\n \n rsf_survfunc_test = model.predict_survival_function(X_val)\n y_trn_ip = np.array([[i,j] for i, j in zip(y_trn['time'], y_trn['cens'])])\n y_test_ip = np.array([[i,j] for i, j in zip(y_val['time'], y_val['cens'])])\n times = np.concatenate((np.array([0]), rsf_survfunc_test[0].x))\n rsf_survfunc_y = \\\n [np.concatenate((np.array([1]), rsf_survfunc_test[i].y)) for i in range(len(rsf_survfunc_test))]\n tau = [times[-1]]\n \n ipec_model_trn = compute_IPEC_scores(y_trn_ip, y_test_ip, times, rsf_survfunc_y, tau)[tau[0]]/tau[0]\n ipec_model_val = compute_IPEC_scores(y_trn_ip, y_test_ip, times, rsf_survfunc_y, tau)[tau[0]]/tau[0]\n df_ipec = df_ipec.append({'Model':f'{model}','Train/Test':'Train',\n 'Default/Best':'Default','Score':ipec_model_trn}, ignore_index=True)\n df_ipec = df_ipec.append({'Model':f'{model}','Train/Test':'Test',\n 'Default/Best':'Default','Score':ipec_model_val}, ignore_index=True)", | |
"execution_count": 9, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:55.309679Z", | |
"end_time": "2021-09-21T09:15:55.416414Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_ci[df_ci['Train/Test']=='Test'].groupby(['Model', 'Default/Best']).mean()", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 10, | |
"data": { | |
"text/plain": " Score\nModel Default/Best \nCoxPHSurvivalAnalysis(alpha=0.1) Best 0.657254\n Default 0.657220\nGradientBoostingSurvivalAnalysis(random_state=42) Best 0.681569\n Default 0.684195\nRandomSurvivalForest(random_state=42) Best 0.690954\n Default 0.683404", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th>Score</th>\n </tr>\n <tr>\n <th>Model</th>\n <th>Default/Best</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th rowspan=\"2\" valign=\"top\">CoxPHSurvivalAnalysis(alpha=0.1)</th>\n <th>Best</th>\n <td>0.657254</td>\n </tr>\n <tr>\n <th>Default</th>\n <td>0.657220</td>\n </tr>\n <tr>\n <th rowspan=\"2\" valign=\"top\">GradientBoostingSurvivalAnalysis(random_state=42)</th>\n <th>Best</th>\n <td>0.681569</td>\n </tr>\n <tr>\n <th>Default</th>\n <td>0.684195</td>\n </tr>\n <tr>\n <th rowspan=\"2\" valign=\"top\">RandomSurvivalForest(random_state=42)</th>\n <th>Best</th>\n <td>0.690954</td>\n </tr>\n <tr>\n <th>Default</th>\n <td>0.683404</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2021-09-21T09:15:55.436870Z", | |
"end_time": "2021-09-21T09:15:55.569750Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_ipec[df_ipec['Train/Test']=='Test'].groupby(['Model', 'Default/Best']).mean()", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 11, | |
"data": { | |
"text/plain": " Score\nModel Default/Best \nCoxPHSurvivalAnalysis(alpha=0.1) Default 0.472865\nGradientBoostingSurvivalAnalysis(random_state=42) Default 0.423120\nRandomSurvivalForest(random_state=42) Default 0.253419", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th>Score</th>\n </tr>\n <tr>\n <th>Model</th>\n <th>Default/Best</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>CoxPHSurvivalAnalysis(alpha=0.1)</th>\n <th>Default</th>\n <td>0.472865</td>\n </tr>\n <tr>\n <th>GradientBoostingSurvivalAnalysis(random_state=42)</th>\n <th>Default</th>\n <td>0.423120</td>\n </tr>\n <tr>\n <th>RandomSurvivalForest(random_state=42)</th>\n <th>Default</th>\n <td>0.253419</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.9.7", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "Camila/Simulations/Untitled.ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment