Skip to content

Instantly share code, notes, and snippets.

@brusangues
Created April 7, 2025 11:47
Show Gist options
  • Save brusangues/b32ab44aca358f08c590ab02fc02c7fc to your computer and use it in GitHub Desktop.
Save brusangues/b32ab44aca358f08c590ab02fc02c7fc to your computer and use it in GitHub Desktop.
optuna.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyN3JmDkDBX9JVpAlJAS85I2",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/brusangues/b32ab44aca358f08c590ab02fc02c7fc/optuna.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"!pip install optuna xgboost lightgbm catboost --quiet"
],
"metadata": {
"id": "-8-cRHox3itE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "cqnXcs-B3aoM"
},
"outputs": [],
"source": [
"import optuna\n",
"\n",
"from sklearn.datasets import fetch_california_housing\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_squared_error\n",
"\n",
"# Load dataset\n",
"data = fetch_california_housing()\n",
"X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "code",
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")"
],
"metadata": {
"id": "INXQZR2IDTW0"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from catboost import CatBoostClassifier, CatBoostRegressor\n",
"from xgboost import XGBRegressor\n",
"from lightgbm import LGBMRegressor\n",
"import numpy as np\n",
"\n",
"def train_and_score(params={}, model_type=\"catboost\", silent=True):\n",
" if model_type == \"catboost\":\n",
" model = CatBoostRegressor(logging_level=\"Silent\", **params)\n",
" elif model_type == \"xgboost\":\n",
" model = XGBRegressor(verbosity=0, **params)\n",
" elif model_type == \"lightgbm\":\n",
" model = LGBMRegressor(verbosity=-1, **params)\n",
"\n",
" # Train the model\n",
" model.fit(X_train, y_train)\n",
" y_pred = model.predict(X_test)\n",
" mse = mean_squared_error(y_test, y_pred)\n",
" if not silent:\n",
" print(f\"{model_type=} MSE: {mse:0.4f}\")\n",
" return mse"
],
"metadata": {
"id": "7kGJHzQG43Jv"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"source": [
"train_and_score({}, \"catboost\", False)\n",
"train_and_score({}, \"xgboost\", False)\n",
"train_and_score({}, \"lightgbm\", False)\n",
"print()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Xyezj_8g7tsw",
"outputId": "3e561b34-d097-45c3-90e5-b70e0f9f9bf9"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"model_type='xgboost' MSE: 0.2226\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Define the objective function for optimization\n",
"def objective(trial):\n",
" params = {\n",
" \"iterations\": trial.suggest_int(\"iterations\", 100, 1000),\n",
" \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.001, 0.1, log=True),\n",
" \"depth\": trial.suggest_int(\"depth\", 4, 10),\n",
" \"l2_leaf_reg\": trial.suggest_int(\"l2_leaf_reg\", 1, 10),\n",
" \"bagging_temperature\": trial.suggest_float(\"bagging_temperature\", 0.01, 10),\n",
" }\n",
" return train_and_score(params)\n",
"\n",
"# Run the optimization\n",
"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n",
"study.optimize(objective, n_trials=4)\n",
"\n",
"# Best parameters found\n",
"print(\"Best parameters:\", study.best_params)\n",
"print(\"Best MSE:\", study.best_value)"
],
"metadata": {
"id": "r6Vr67Pk3f9V",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6bd939cc-12e7-4bdb-8729-193479751141"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[I 2025-04-07 11:45:24,764] A new study created in memory with name: no-name-91b82145-44ee-42ee-b831-fe9e6516dee6\n",
"[I 2025-04-07 11:46:02,659] Trial 0 finished with value: 0.5540918108398565 and parameters: {'iterations': 798, 'learning_rate': 0.0012178002761617233, 'depth': 10, 'l2_leaf_reg': 5, 'bagging_temperature': 7.749189591256689}. Best is trial 0 with value: 0.5540918108398565.\n",
"[I 2025-04-07 11:46:03,875] Trial 1 finished with value: 0.6886207457721376 and parameters: {'iterations': 178, 'learning_rate': 0.004114184668962924, 'depth': 7, 'l2_leaf_reg': 6, 'bagging_temperature': 9.159671193678008}. Best is trial 0 with value: 0.5540918108398565.\n",
"[I 2025-04-07 11:46:05,809] Trial 2 finished with value: 0.25277147540504363 and parameters: {'iterations': 560, 'learning_rate': 0.028343198913913334, 'depth': 5, 'l2_leaf_reg': 2, 'bagging_temperature': 9.511635588003797}. Best is trial 2 with value: 0.25277147540504363.\n",
"[I 2025-04-07 11:46:10,182] Trial 3 finished with value: 0.29850473131503585 and parameters: {'iterations': 800, 'learning_rate': 0.009681623543015242, 'depth': 5, 'l2_leaf_reg': 4, 'bagging_temperature': 8.98692440367321}. Best is trial 2 with value: 0.25277147540504363.\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Best parameters: {'iterations': 560, 'learning_rate': 0.028343198913913334, 'depth': 5, 'l2_leaf_reg': 2, 'bagging_temperature': 9.511635588003797}\n",
"Best MSE: 0.25277147540504363\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Define the objective function for optimization\n",
"def objective(trial):\n",
" params = {\n",
" \"objective\": \"regression\",\n",
" \"metric\": \"rmse\",\n",
" \"n_estimators\": 1000,\n",
" # \"verbosity\": -1,\n",
" \"bagging_freq\": 1,\n",
" \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-3, 0.1, log=True),\n",
" \"num_leaves\": trial.suggest_int(\"num_leaves\", 2, 2**10),\n",
" \"subsample\": trial.suggest_float(\"subsample\", 0.05, 1.0),\n",
" \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.05, 1.0),\n",
" \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 1, 100),\n",
" }\n",
" return train_and_score(params, model_type=\"lightgbm\")\n",
"\n",
"# Run the optimization\n",
"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n",
"study.optimize(objective, n_trials=4)\n",
"\n",
"# Best parameters found\n",
"print(\"Best parameters:\", study.best_params)\n",
"print(\"Best MSE:\", study.best_value)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yJtzOrdaBEbp",
"outputId": "85ea635c-2b7f-4ebd-b351-e17316f78edf"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[I 2025-04-07 11:46:10,197] A new study created in memory with name: no-name-163656f0-83f5-465b-9f89-0ceeb1c81598\n",
"[I 2025-04-07 11:46:16,371] Trial 0 finished with value: 0.21520473131668916 and parameters: {'learning_rate': 0.005809872199196733, 'num_leaves': 187, 'subsample': 0.31258169646380063, 'colsample_bytree': 0.7545050778815918, 'min_data_in_leaf': 38}. Best is trial 0 with value: 0.21520473131668916.\n",
"[I 2025-04-07 11:46:23,016] Trial 1 finished with value: 0.22123552055009862 and parameters: {'learning_rate': 0.006566272694699802, 'num_leaves': 620, 'subsample': 0.848252988047558, 'colsample_bytree': 0.3536480344274082, 'min_data_in_leaf': 64}. Best is trial 0 with value: 0.21520473131668916.\n",
"[I 2025-04-07 11:46:27,923] Trial 2 finished with value: 0.25586049266061156 and parameters: {'learning_rate': 0.0629060264955824, 'num_leaves': 103, 'subsample': 0.16800546154405946, 'colsample_bytree': 0.3109227822731441, 'min_data_in_leaf': 21}. Best is trial 0 with value: 0.21520473131668916.\n",
"[I 2025-04-07 11:46:31,582] Trial 3 finished with value: 0.37083046652455914 and parameters: {'learning_rate': 0.013938237551993922, 'num_leaves': 632, 'subsample': 0.8108757040907426, 'colsample_bytree': 0.0827501462036972, 'min_data_in_leaf': 82}. Best is trial 0 with value: 0.21520473131668916.\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Best parameters: {'learning_rate': 0.005809872199196733, 'num_leaves': 187, 'subsample': 0.31258169646380063, 'colsample_bytree': 0.7545050778815918, 'min_data_in_leaf': 38}\n",
"Best MSE: 0.21520473131668916\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"def objective(trial):\n",
" params = {\n",
" \"objective\": \"reg:squarederror\",\n",
" \"n_estimators\": 1000,\n",
" # \"verbosity\": 0,\n",
" \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-3, 0.1, log=True),\n",
" \"max_depth\": trial.suggest_int(\"max_depth\", 1, 10),\n",
" \"subsample\": trial.suggest_float(\"subsample\", 0.05, 1.0),\n",
" \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.05, 1.0),\n",
" \"min_child_weight\": trial.suggest_int(\"min_child_weight\", 1, 20),\n",
" }\n",
" return train_and_score(params, model_type=\"xgboost\")\n",
"\n",
"# Run the optimization\n",
"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n",
"study.optimize(objective, n_trials=4)\n",
"\n",
"# Best parameters found\n",
"print(\"Best parameters:\", study.best_params)\n",
"print(\"Best MSE:\", study.best_value)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "M9nYBq6ZB2qr",
"outputId": "dd72fde8-347f-40eb-df34-2e7906a887f6"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[I 2025-04-07 11:46:31,594] A new study created in memory with name: no-name-12e25abd-e014-4ad6-8abd-acba7ecfdcd6\n",
"[I 2025-04-07 11:46:35,109] Trial 0 finished with value: 0.35967840280957875 and parameters: {'learning_rate': 0.003129235758308534, 'max_depth': 5, 'subsample': 0.11550003197083734, 'colsample_bytree': 0.48858752132869404, 'min_child_weight': 12}. Best is trial 0 with value: 0.35967840280957875.\n",
"[I 2025-04-07 11:46:36,671] Trial 1 finished with value: 0.3392855072888477 and parameters: {'learning_rate': 0.06184394939067719, 'max_depth': 7, 'subsample': 0.16130146316336486, 'colsample_bytree': 0.0901881930510108, 'min_child_weight': 16}. Best is trial 1 with value: 0.3392855072888477.\n",
"[I 2025-04-07 11:46:37,419] Trial 2 finished with value: 0.3942697822023086 and parameters: {'learning_rate': 0.006203384966031322, 'max_depth': 2, 'subsample': 0.9034740920361478, 'colsample_bytree': 0.6744748001374601, 'min_child_weight': 17}. Best is trial 1 with value: 0.3392855072888477.\n",
"[I 2025-04-07 11:46:39,352] Trial 3 finished with value: 0.3827892885907202 and parameters: {'learning_rate': 0.011980474404141008, 'max_depth': 7, 'subsample': 0.6354086350309843, 'colsample_bytree': 0.19576173535290747, 'min_child_weight': 2}. Best is trial 1 with value: 0.3392855072888477.\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Best parameters: {'learning_rate': 0.06184394939067719, 'max_depth': 7, 'subsample': 0.16130146316336486, 'colsample_bytree': 0.0901881930510108, 'min_child_weight': 16}\n",
"Best MSE: 0.3392855072888477\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment