Created
April 7, 2025 11:47
-
-
Save brusangues/b32ab44aca358f08c590ab02fc02c7fc to your computer and use it in GitHub Desktop.
optuna.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyN3JmDkDBX9JVpAlJAS85I2", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/brusangues/b32ab44aca358f08c590ab02fc02c7fc/optuna.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install optuna xgboost lightgbm catboost --quiet" | |
], | |
"metadata": { | |
"id": "-8-cRHox3itE" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"id": "cqnXcs-B3aoM" | |
}, | |
"outputs": [], | |
"source": [ | |
"import optuna\n", | |
"\n", | |
"from sklearn.datasets import fetch_california_housing\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"from sklearn.metrics import mean_squared_error\n", | |
"\n", | |
"# Load dataset\n", | |
"data = fetch_california_housing()\n", | |
"X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import warnings\n", | |
"warnings.filterwarnings(\"ignore\")" | |
], | |
"metadata": { | |
"id": "INXQZR2IDTW0" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from catboost import CatBoostClassifier, CatBoostRegressor\n", | |
"from xgboost import XGBRegressor\n", | |
"from lightgbm import LGBMRegressor\n", | |
"import numpy as np\n", | |
"\n", | |
"def train_and_score(params={}, model_type=\"catboost\", silent=True):\n", | |
" if model_type == \"catboost\":\n", | |
" model = CatBoostRegressor(logging_level=\"Silent\", **params)\n", | |
" elif model_type == \"xgboost\":\n", | |
" model = XGBRegressor(verbosity=0, **params)\n", | |
" elif model_type == \"lightgbm\":\n", | |
" model = LGBMRegressor(verbosity=-1, **params)\n", | |
"\n", | |
" # Train the model\n", | |
" model.fit(X_train, y_train)\n", | |
" y_pred = model.predict(X_test)\n", | |
" mse = mean_squared_error(y_test, y_pred)\n", | |
" if not silent:\n", | |
" print(f\"{model_type=} MSE: {mse:0.4f}\")\n", | |
" return mse" | |
], | |
"metadata": { | |
"id": "7kGJHzQG43Jv" | |
}, | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"train_and_score({}, \"catboost\", False)\n", | |
"train_and_score({}, \"xgboost\", False)\n", | |
"train_and_score({}, \"lightgbm\", False)\n", | |
"print()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Xyezj_8g7tsw", | |
"outputId": "3e561b34-d097-45c3-90e5-b70e0f9f9bf9" | |
}, | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"model_type='xgboost' MSE: 0.2226\n", | |
"\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Define the objective function for optimization\n", | |
"def objective(trial):\n", | |
" params = {\n", | |
" \"iterations\": trial.suggest_int(\"iterations\", 100, 1000),\n", | |
" \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.001, 0.1, log=True),\n", | |
" \"depth\": trial.suggest_int(\"depth\", 4, 10),\n", | |
" \"l2_leaf_reg\": trial.suggest_int(\"l2_leaf_reg\", 1, 10),\n", | |
" \"bagging_temperature\": trial.suggest_float(\"bagging_temperature\", 0.01, 10),\n", | |
" }\n", | |
" return train_and_score(params)\n", | |
"\n", | |
"# Run the optimization\n", | |
"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n", | |
"study.optimize(objective, n_trials=4)\n", | |
"\n", | |
"# Best parameters found\n", | |
"print(\"Best parameters:\", study.best_params)\n", | |
"print(\"Best MSE:\", study.best_value)" | |
], | |
"metadata": { | |
"id": "r6Vr67Pk3f9V", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "6bd939cc-12e7-4bdb-8729-193479751141" | |
}, | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[I 2025-04-07 11:45:24,764] A new study created in memory with name: no-name-91b82145-44ee-42ee-b831-fe9e6516dee6\n", | |
"[I 2025-04-07 11:46:02,659] Trial 0 finished with value: 0.5540918108398565 and parameters: {'iterations': 798, 'learning_rate': 0.0012178002761617233, 'depth': 10, 'l2_leaf_reg': 5, 'bagging_temperature': 7.749189591256689}. Best is trial 0 with value: 0.5540918108398565.\n", | |
"[I 2025-04-07 11:46:03,875] Trial 1 finished with value: 0.6886207457721376 and parameters: {'iterations': 178, 'learning_rate': 0.004114184668962924, 'depth': 7, 'l2_leaf_reg': 6, 'bagging_temperature': 9.159671193678008}. Best is trial 0 with value: 0.5540918108398565.\n", | |
"[I 2025-04-07 11:46:05,809] Trial 2 finished with value: 0.25277147540504363 and parameters: {'iterations': 560, 'learning_rate': 0.028343198913913334, 'depth': 5, 'l2_leaf_reg': 2, 'bagging_temperature': 9.511635588003797}. Best is trial 2 with value: 0.25277147540504363.\n", | |
"[I 2025-04-07 11:46:10,182] Trial 3 finished with value: 0.29850473131503585 and parameters: {'iterations': 800, 'learning_rate': 0.009681623543015242, 'depth': 5, 'l2_leaf_reg': 4, 'bagging_temperature': 8.98692440367321}. Best is trial 2 with value: 0.25277147540504363.\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Best parameters: {'iterations': 560, 'learning_rate': 0.028343198913913334, 'depth': 5, 'l2_leaf_reg': 2, 'bagging_temperature': 9.511635588003797}\n", | |
"Best MSE: 0.25277147540504363\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Define the objective function for optimization\n", | |
"def objective(trial):\n", | |
" params = {\n", | |
" \"objective\": \"regression\",\n", | |
" \"metric\": \"rmse\",\n", | |
" \"n_estimators\": 1000,\n", | |
" # \"verbosity\": -1,\n", | |
" \"bagging_freq\": 1,\n", | |
" \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-3, 0.1, log=True),\n", | |
" \"num_leaves\": trial.suggest_int(\"num_leaves\", 2, 2**10),\n", | |
" \"subsample\": trial.suggest_float(\"subsample\", 0.05, 1.0),\n", | |
" \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.05, 1.0),\n", | |
" \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 1, 100),\n", | |
" }\n", | |
" return train_and_score(params, model_type=\"lightgbm\")\n", | |
"\n", | |
"# Run the optimization\n", | |
"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n", | |
"study.optimize(objective, n_trials=4)\n", | |
"\n", | |
"# Best parameters found\n", | |
"print(\"Best parameters:\", study.best_params)\n", | |
"print(\"Best MSE:\", study.best_value)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "yJtzOrdaBEbp", | |
"outputId": "85ea635c-2b7f-4ebd-b351-e17316f78edf" | |
}, | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[I 2025-04-07 11:46:10,197] A new study created in memory with name: no-name-163656f0-83f5-465b-9f89-0ceeb1c81598\n", | |
"[I 2025-04-07 11:46:16,371] Trial 0 finished with value: 0.21520473131668916 and parameters: {'learning_rate': 0.005809872199196733, 'num_leaves': 187, 'subsample': 0.31258169646380063, 'colsample_bytree': 0.7545050778815918, 'min_data_in_leaf': 38}. Best is trial 0 with value: 0.21520473131668916.\n", | |
"[I 2025-04-07 11:46:23,016] Trial 1 finished with value: 0.22123552055009862 and parameters: {'learning_rate': 0.006566272694699802, 'num_leaves': 620, 'subsample': 0.848252988047558, 'colsample_bytree': 0.3536480344274082, 'min_data_in_leaf': 64}. Best is trial 0 with value: 0.21520473131668916.\n", | |
"[I 2025-04-07 11:46:27,923] Trial 2 finished with value: 0.25586049266061156 and parameters: {'learning_rate': 0.0629060264955824, 'num_leaves': 103, 'subsample': 0.16800546154405946, 'colsample_bytree': 0.3109227822731441, 'min_data_in_leaf': 21}. Best is trial 0 with value: 0.21520473131668916.\n", | |
"[I 2025-04-07 11:46:31,582] Trial 3 finished with value: 0.37083046652455914 and parameters: {'learning_rate': 0.013938237551993922, 'num_leaves': 632, 'subsample': 0.8108757040907426, 'colsample_bytree': 0.0827501462036972, 'min_data_in_leaf': 82}. Best is trial 0 with value: 0.21520473131668916.\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Best parameters: {'learning_rate': 0.005809872199196733, 'num_leaves': 187, 'subsample': 0.31258169646380063, 'colsample_bytree': 0.7545050778815918, 'min_data_in_leaf': 38}\n", | |
"Best MSE: 0.21520473131668916\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def objective(trial):\n", | |
" params = {\n", | |
" \"objective\": \"reg:squarederror\",\n", | |
" \"n_estimators\": 1000,\n", | |
" # \"verbosity\": 0,\n", | |
" \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-3, 0.1, log=True),\n", | |
" \"max_depth\": trial.suggest_int(\"max_depth\", 1, 10),\n", | |
" \"subsample\": trial.suggest_float(\"subsample\", 0.05, 1.0),\n", | |
" \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.05, 1.0),\n", | |
" \"min_child_weight\": trial.suggest_int(\"min_child_weight\", 1, 20),\n", | |
" }\n", | |
" return train_and_score(params, model_type=\"xgboost\")\n", | |
"\n", | |
"# Run the optimization\n", | |
"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n", | |
"study.optimize(objective, n_trials=4)\n", | |
"\n", | |
"# Best parameters found\n", | |
"print(\"Best parameters:\", study.best_params)\n", | |
"print(\"Best MSE:\", study.best_value)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "M9nYBq6ZB2qr", | |
"outputId": "dd72fde8-347f-40eb-df34-2e7906a887f6" | |
}, | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[I 2025-04-07 11:46:31,594] A new study created in memory with name: no-name-12e25abd-e014-4ad6-8abd-acba7ecfdcd6\n", | |
"[I 2025-04-07 11:46:35,109] Trial 0 finished with value: 0.35967840280957875 and parameters: {'learning_rate': 0.003129235758308534, 'max_depth': 5, 'subsample': 0.11550003197083734, 'colsample_bytree': 0.48858752132869404, 'min_child_weight': 12}. Best is trial 0 with value: 0.35967840280957875.\n", | |
"[I 2025-04-07 11:46:36,671] Trial 1 finished with value: 0.3392855072888477 and parameters: {'learning_rate': 0.06184394939067719, 'max_depth': 7, 'subsample': 0.16130146316336486, 'colsample_bytree': 0.0901881930510108, 'min_child_weight': 16}. Best is trial 1 with value: 0.3392855072888477.\n", | |
"[I 2025-04-07 11:46:37,419] Trial 2 finished with value: 0.3942697822023086 and parameters: {'learning_rate': 0.006203384966031322, 'max_depth': 2, 'subsample': 0.9034740920361478, 'colsample_bytree': 0.6744748001374601, 'min_child_weight': 17}. Best is trial 1 with value: 0.3392855072888477.\n", | |
"[I 2025-04-07 11:46:39,352] Trial 3 finished with value: 0.3827892885907202 and parameters: {'learning_rate': 0.011980474404141008, 'max_depth': 7, 'subsample': 0.6354086350309843, 'colsample_bytree': 0.19576173535290747, 'min_child_weight': 2}. Best is trial 1 with value: 0.3392855072888477.\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Best parameters: {'learning_rate': 0.06184394939067719, 'max_depth': 7, 'subsample': 0.16130146316336486, 'colsample_bytree': 0.0901881930510108, 'min_child_weight': 16}\n", | |
"Best MSE: 0.3392855072888477\n" | |
] | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment