Skip to content

Instantly share code, notes, and snippets.

@alonsosilvaallende
Created September 21, 2021 09:16
Show Gist options
  • Save alonsosilvaallende/769531401d52a95e144e6001ff53fede to your computer and use it in GitHub Desktop.
Save alonsosilvaallende/769531401d52a95e144e6001ff53fede to your computer and use it in GitHub Desktop.
Camila/Simulations/Untitled.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:24.497240Z",
"end_time": "2021-09-21T09:15:26.200848Z"
},
"trusted": true
},
"cell_type": "code",
"source": "%load_ext autoreload\n%autoreload 2\n%matplotlib inline",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:26.204397Z",
"end_time": "2021-09-21T09:15:27.325771Z"
},
"trusted": true
},
"cell_type": "code",
"source": "import sksurv\nsksurv.__version__",
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": "'0.15.0.post0'"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:27.330251Z",
"end_time": "2021-09-21T09:15:28.366410Z"
},
"trusted": true
},
"cell_type": "code",
"source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:28.379443Z",
"end_time": "2021-09-21T09:15:28.537290Z"
},
"trusted": true
},
"cell_type": "code",
"source": "from sksurv.datasets import load_gbsg2\n\nX, y = load_gbsg2()",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:28.539539Z",
"end_time": "2021-09-21T09:15:28.618734Z"
},
"trusted": true
},
"cell_type": "code",
"source": "scaling_cols = [c for c in X.columns if X[c].dtype.kind in ['i', 'f']]\ncat_cols = [c for c in X.columns if X[c].dtype.kind not in ['i', 'f']]",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:28.622738Z",
"end_time": "2021-09-21T09:15:28.765335Z"
},
"trusted": true
},
"cell_type": "code",
"source": "from sklearn.compose import ColumnTransformer\nfrom sklearn.preprocessing import OrdinalEncoder\nfrom sklearn.preprocessing import StandardScaler\n\npreprocessor = ColumnTransformer(\n [('cat-preprocessor', OrdinalEncoder(), cat_cols),\n ('standard-scaler', StandardScaler(), scaling_cols)],\n remainder='passthrough', sparse_threshold=0)",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:28.770697Z",
"end_time": "2021-09-21T09:15:28.856886Z"
},
"trusted": true
},
"cell_type": "code",
"source": "param_grid_cph = {\n 'alpha': (0.01, 0.1, 0.5),\n}\n\nparam_grid_rsf = {\n 'max_features': (\"sqrt\", 0.5, 1),\n 'min_samples_leaf': (1,3,5),\n 'n_estimators': (50, 100, 200),\n 'max_depth': (3,5,7,10),\n}\n\nparam_grid_gbs = {\n 'learning_rate': (0.05, 0.1, 0.15),\n 'max_features': (\"sqrt\", 0.5, 1),\n 'min_samples_leaf': (1,3,5),\n 'n_estimators': (50, 100, 200),\n 'subsample': (0.7,0.9,1),\n 'max_depth': (3,5,7,10),\n}\n\nparam_distributions = [param_grid_cph, param_grid_rsf, param_grid_gbs]",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:28.875901Z",
"end_time": "2021-09-21T09:15:28.985453Z"
},
"trusted": true
},
"cell_type": "code",
"source": "seeds = np.random.RandomState(0).permutation(1000)[:3]",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:28.989409Z",
"end_time": "2021-09-21T09:15:55.299354Z"
},
"trusted": true,
"scrolled": true
},
"cell_type": "code",
"source": "from sklearn.model_selection import train_test_split\nfrom sklearn.model_selection import RandomizedSearchCV\nfrom sklearn.model_selection import KFold\nfrom sksurv.linear_model import CoxPHSurvivalAnalysis\nfrom sksurv.ensemble import RandomSurvivalForest\nfrom sksurv.ensemble import GradientBoostingSurvivalAnalysis\nfrom lifelines.utils import concordance_index\nfrom util import compute_IPEC_scores\n\ndf_ci = pd.DataFrame(columns=['Model','Train/Test','Default/Best','Score'], index=None)\ndf_ipec = pd.DataFrame(columns=['Model','Train/Test','Default/Best','Score'], index=None)\nfor seed in seeds:\n X_trn, X_val, y_trn, y_val = train_test_split(X, y, random_state=seed)\n X_trn = pd.DataFrame(preprocessor.fit_transform(X_trn))\n X_val = pd.DataFrame(preprocessor.transform(X_val))\n for i, model in enumerate([CoxPHSurvivalAnalysis(alpha=0.1), RandomSurvivalForest(random_state=42), GradientBoostingSurvivalAnalysis(random_state=42)]):\n model.fit(X_trn, y_trn)\n ci_model_trn = concordance_index(y_trn['time'], -model.predict(X_trn), y_trn['cens'])\n ci_model_val = concordance_index(y_val['time'], -model.predict(X_val), y_val['cens'])\n df_ci = df_ci.append({'Model':f'{model}','Train/Test':'Train',\n 'Default/Best':'Default','Score':ci_model_trn}, ignore_index=True)\n df_ci = df_ci.append({'Model':f'{model}','Train/Test':'Test',\n 'Default/Best':'Default','Score':ci_model_val}, ignore_index=True)\n\n rs_model = RandomizedSearchCV(model, param_distributions=param_distributions[i], n_jobs=-1, cv=2, n_iter=3)\n rs_model.fit(X_trn, y_trn)\n ci_rs_model_trn = concordance_index(y_trn['time'], -rs_model.predict(X_trn), y_trn['cens'])\n ci_rs_model_val = concordance_index(y_val['time'], -rs_model.predict(X_val), y_val['cens'])\n df_ci = df_ci.append({'Model':f'{model}','Train/Test':'Train',\n 'Default/Best':'Best','Score':ci_rs_model_trn}, ignore_index=True)\n df_ci = df_ci.append({'Model':f'{model}','Train/Test':'Test',\n 'Default/Best':'Best','Score':ci_rs_model_val}, ignore_index=True)\n \n rsf_survfunc_test = model.predict_survival_function(X_val)\n y_trn_ip = np.array([[i,j] for i, j in zip(y_trn['time'], y_trn['cens'])])\n y_test_ip = np.array([[i,j] for i, j in zip(y_val['time'], y_val['cens'])])\n times = np.concatenate((np.array([0]), rsf_survfunc_test[0].x))\n rsf_survfunc_y = \\\n [np.concatenate((np.array([1]), rsf_survfunc_test[i].y)) for i in range(len(rsf_survfunc_test))]\n tau = [times[-1]]\n \n ipec_model_trn = compute_IPEC_scores(y_trn_ip, y_test_ip, times, rsf_survfunc_y, tau)[tau[0]]/tau[0]\n ipec_model_val = compute_IPEC_scores(y_trn_ip, y_test_ip, times, rsf_survfunc_y, tau)[tau[0]]/tau[0]\n df_ipec = df_ipec.append({'Model':f'{model}','Train/Test':'Train',\n 'Default/Best':'Default','Score':ipec_model_trn}, ignore_index=True)\n df_ipec = df_ipec.append({'Model':f'{model}','Train/Test':'Test',\n 'Default/Best':'Default','Score':ipec_model_val}, ignore_index=True)",
"execution_count": 9,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:55.309679Z",
"end_time": "2021-09-21T09:15:55.416414Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df_ci[df_ci['Train/Test']=='Test'].groupby(['Model', 'Default/Best']).mean()",
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 10,
"data": {
"text/plain": " Score\nModel Default/Best \nCoxPHSurvivalAnalysis(alpha=0.1) Best 0.657254\n Default 0.657220\nGradientBoostingSurvivalAnalysis(random_state=42) Best 0.681569\n Default 0.684195\nRandomSurvivalForest(random_state=42) Best 0.690954\n Default 0.683404",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th>Score</th>\n </tr>\n <tr>\n <th>Model</th>\n <th>Default/Best</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th rowspan=\"2\" valign=\"top\">CoxPHSurvivalAnalysis(alpha=0.1)</th>\n <th>Best</th>\n <td>0.657254</td>\n </tr>\n <tr>\n <th>Default</th>\n <td>0.657220</td>\n </tr>\n <tr>\n <th rowspan=\"2\" valign=\"top\">GradientBoostingSurvivalAnalysis(random_state=42)</th>\n <th>Best</th>\n <td>0.681569</td>\n </tr>\n <tr>\n <th>Default</th>\n <td>0.684195</td>\n </tr>\n <tr>\n <th rowspan=\"2\" valign=\"top\">RandomSurvivalForest(random_state=42)</th>\n <th>Best</th>\n <td>0.690954</td>\n </tr>\n <tr>\n <th>Default</th>\n <td>0.683404</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2021-09-21T09:15:55.436870Z",
"end_time": "2021-09-21T09:15:55.569750Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df_ipec[df_ipec['Train/Test']=='Test'].groupby(['Model', 'Default/Best']).mean()",
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 11,
"data": {
"text/plain": " Score\nModel Default/Best \nCoxPHSurvivalAnalysis(alpha=0.1) Default 0.472865\nGradientBoostingSurvivalAnalysis(random_state=42) Default 0.423120\nRandomSurvivalForest(random_state=42) Default 0.253419",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th>Score</th>\n </tr>\n <tr>\n <th>Model</th>\n <th>Default/Best</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>CoxPHSurvivalAnalysis(alpha=0.1)</th>\n <th>Default</th>\n <td>0.472865</td>\n </tr>\n <tr>\n <th>GradientBoostingSurvivalAnalysis(random_state=42)</th>\n <th>Default</th>\n <td>0.423120</td>\n </tr>\n <tr>\n <th>RandomSurvivalForest(random_state=42)</th>\n <th>Default</th>\n <td>0.253419</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.9.7",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "Camila/Simulations/Untitled.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment