brusangues · April 7, 2025 11:47
diff --git a/optuna.ipynb b/optuna.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyN3JmDkDBX9JVpAlJAS85I2",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/brusangues/b32ab44aca358f08c590ab02fc02c7fc/optuna.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install optuna xgboost lightgbm catboost --quiet"
      ],
      "metadata": {
        "id": "-8-cRHox3itE"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "cqnXcs-B3aoM"
      },
      "outputs": [],
      "source": [
        "import optuna\n",
        "\n",
        "from sklearn.datasets import fetch_california_housing\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.metrics import mean_squared_error\n",
        "\n",
        "# Load dataset\n",
        "data = fetch_california_housing()\n",
        "X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")"
      ],
      "metadata": {
        "id": "INXQZR2IDTW0"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from catboost import CatBoostClassifier, CatBoostRegressor\n",
        "from xgboost import XGBRegressor\n",
        "from lightgbm import LGBMRegressor\n",
        "import numpy as np\n",
        "\n",
        "def train_and_score(params={}, model_type=\"catboost\", silent=True):\n",
        "    if model_type == \"catboost\":\n",
        "        model = CatBoostRegressor(logging_level=\"Silent\", **params)\n",
        "    elif model_type == \"xgboost\":\n",
        "        model = XGBRegressor(verbosity=0, **params)\n",
        "    elif model_type == \"lightgbm\":\n",
        "        model = LGBMRegressor(verbosity=-1, **params)\n",
        "\n",
        "    # Train the model\n",
        "    model.fit(X_train, y_train)\n",
        "    y_pred = model.predict(X_test)\n",
        "    mse = mean_squared_error(y_test, y_pred)\n",
        "    if not silent:\n",
        "        print(f\"{model_type=} MSE: {mse:0.4f}\")\n",
        "    return mse"
      ],
      "metadata": {
        "id": "7kGJHzQG43Jv"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train_and_score({}, \"catboost\", False)\n",
        "train_and_score({}, \"xgboost\", False)\n",
        "train_and_score({}, \"lightgbm\", False)\n",
        "print()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Xyezj_8g7tsw",
        "outputId": "3e561b34-d097-45c3-90e5-b70e0f9f9bf9"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "model_type='xgboost' MSE: 0.2226\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Define the objective function for optimization\n",
        "def objective(trial):\n",
        "    params = {\n",
        "        \"iterations\": trial.suggest_int(\"iterations\", 100, 1000),\n",
        "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.001, 0.1, log=True),\n",
        "        \"depth\": trial.suggest_int(\"depth\", 4, 10),\n",
        "        \"l2_leaf_reg\": trial.suggest_int(\"l2_leaf_reg\", 1, 10),\n",
        "        \"bagging_temperature\": trial.suggest_float(\"bagging_temperature\", 0.01, 10),\n",
        "    }\n",
        "    return train_and_score(params)\n",
        "\n",
        "# Run the optimization\n",
        "study = optuna.create_study(direction=\"minimize\")  # Minimize MSE\n",
        "study.optimize(objective, n_trials=4)\n",
        "\n",
        "# Best parameters found\n",
        "print(\"Best parameters:\", study.best_params)\n",
        "print(\"Best MSE:\", study.best_value)"
      ],
      "metadata": {
        "id": "r6Vr67Pk3f9V",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6bd939cc-12e7-4bdb-8729-193479751141"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-04-07 11:45:24,764] A new study created in memory with name: no-name-91b82145-44ee-42ee-b831-fe9e6516dee6\n",
            "[I 2025-04-07 11:46:02,659] Trial 0 finished with value: 0.5540918108398565 and parameters: {'iterations': 798, 'learning_rate': 0.0012178002761617233, 'depth': 10, 'l2_leaf_reg': 5, 'bagging_temperature': 7.749189591256689}. Best is trial 0 with value: 0.5540918108398565.\n",
            "[I 2025-04-07 11:46:03,875] Trial 1 finished with value: 0.6886207457721376 and parameters: {'iterations': 178, 'learning_rate': 0.004114184668962924, 'depth': 7, 'l2_leaf_reg': 6, 'bagging_temperature': 9.159671193678008}. Best is trial 0 with value: 0.5540918108398565.\n",
            "[I 2025-04-07 11:46:05,809] Trial 2 finished with value: 0.25277147540504363 and parameters: {'iterations': 560, 'learning_rate': 0.028343198913913334, 'depth': 5, 'l2_leaf_reg': 2, 'bagging_temperature': 9.511635588003797}. Best is trial 2 with value: 0.25277147540504363.\n",
            "[I 2025-04-07 11:46:10,182] Trial 3 finished with value: 0.29850473131503585 and parameters: {'iterations': 800, 'learning_rate': 0.009681623543015242, 'depth': 5, 'l2_leaf_reg': 4, 'bagging_temperature': 8.98692440367321}. Best is trial 2 with value: 0.25277147540504363.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Best parameters: {'iterations': 560, 'learning_rate': 0.028343198913913334, 'depth': 5, 'l2_leaf_reg': 2, 'bagging_temperature': 9.511635588003797}\n",
            "Best MSE: 0.25277147540504363\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Define the objective function for optimization\n",
        "def objective(trial):\n",
        "    params = {\n",
        "        \"objective\": \"regression\",\n",
        "        \"metric\": \"rmse\",\n",
        "        \"n_estimators\": 1000,\n",
        "        # \"verbosity\": -1,\n",
        "        \"bagging_freq\": 1,\n",
        "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-3, 0.1, log=True),\n",
        "        \"num_leaves\": trial.suggest_int(\"num_leaves\", 2, 2**10),\n",
        "        \"subsample\": trial.suggest_float(\"subsample\", 0.05, 1.0),\n",
        "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.05, 1.0),\n",
        "        \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 1, 100),\n",
        "    }\n",
        "    return train_and_score(params, model_type=\"lightgbm\")\n",
        "\n",
        "# Run the optimization\n",
        "study = optuna.create_study(direction=\"minimize\")  # Minimize MSE\n",
        "study.optimize(objective, n_trials=4)\n",
        "\n",
        "# Best parameters found\n",
        "print(\"Best parameters:\", study.best_params)\n",
        "print(\"Best MSE:\", study.best_value)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "yJtzOrdaBEbp",
        "outputId": "85ea635c-2b7f-4ebd-b351-e17316f78edf"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-04-07 11:46:10,197] A new study created in memory with name: no-name-163656f0-83f5-465b-9f89-0ceeb1c81598\n",
            "[I 2025-04-07 11:46:16,371] Trial 0 finished with value: 0.21520473131668916 and parameters: {'learning_rate': 0.005809872199196733, 'num_leaves': 187, 'subsample': 0.31258169646380063, 'colsample_bytree': 0.7545050778815918, 'min_data_in_leaf': 38}. Best is trial 0 with value: 0.21520473131668916.\n",
            "[I 2025-04-07 11:46:23,016] Trial 1 finished with value: 0.22123552055009862 and parameters: {'learning_rate': 0.006566272694699802, 'num_leaves': 620, 'subsample': 0.848252988047558, 'colsample_bytree': 0.3536480344274082, 'min_data_in_leaf': 64}. Best is trial 0 with value: 0.21520473131668916.\n",
            "[I 2025-04-07 11:46:27,923] Trial 2 finished with value: 0.25586049266061156 and parameters: {'learning_rate': 0.0629060264955824, 'num_leaves': 103, 'subsample': 0.16800546154405946, 'colsample_bytree': 0.3109227822731441, 'min_data_in_leaf': 21}. Best is trial 0 with value: 0.21520473131668916.\n",
            "[I 2025-04-07 11:46:31,582] Trial 3 finished with value: 0.37083046652455914 and parameters: {'learning_rate': 0.013938237551993922, 'num_leaves': 632, 'subsample': 0.8108757040907426, 'colsample_bytree': 0.0827501462036972, 'min_data_in_leaf': 82}. Best is trial 0 with value: 0.21520473131668916.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Best parameters: {'learning_rate': 0.005809872199196733, 'num_leaves': 187, 'subsample': 0.31258169646380063, 'colsample_bytree': 0.7545050778815918, 'min_data_in_leaf': 38}\n",
            "Best MSE: 0.21520473131668916\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def objective(trial):\n",
        "    params = {\n",
        "        \"objective\": \"reg:squarederror\",\n",
        "        \"n_estimators\": 1000,\n",
        "        # \"verbosity\": 0,\n",
        "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-3, 0.1, log=True),\n",
        "        \"max_depth\": trial.suggest_int(\"max_depth\", 1, 10),\n",
        "        \"subsample\": trial.suggest_float(\"subsample\", 0.05, 1.0),\n",
        "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.05, 1.0),\n",
        "        \"min_child_weight\": trial.suggest_int(\"min_child_weight\", 1, 20),\n",
        "    }\n",
        "    return train_and_score(params, model_type=\"xgboost\")\n",
        "\n",
        "# Run the optimization\n",
        "study = optuna.create_study(direction=\"minimize\")  # Minimize MSE\n",
        "study.optimize(objective, n_trials=4)\n",
        "\n",
        "# Best parameters found\n",
        "print(\"Best parameters:\", study.best_params)\n",
        "print(\"Best MSE:\", study.best_value)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "M9nYBq6ZB2qr",
        "outputId": "dd72fde8-347f-40eb-df34-2e7906a887f6"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-04-07 11:46:31,594] A new study created in memory with name: no-name-12e25abd-e014-4ad6-8abd-acba7ecfdcd6\n",
            "[I 2025-04-07 11:46:35,109] Trial 0 finished with value: 0.35967840280957875 and parameters: {'learning_rate': 0.003129235758308534, 'max_depth': 5, 'subsample': 0.11550003197083734, 'colsample_bytree': 0.48858752132869404, 'min_child_weight': 12}. Best is trial 0 with value: 0.35967840280957875.\n",
            "[I 2025-04-07 11:46:36,671] Trial 1 finished with value: 0.3392855072888477 and parameters: {'learning_rate': 0.06184394939067719, 'max_depth': 7, 'subsample': 0.16130146316336486, 'colsample_bytree': 0.0901881930510108, 'min_child_weight': 16}. Best is trial 1 with value: 0.3392855072888477.\n",
            "[I 2025-04-07 11:46:37,419] Trial 2 finished with value: 0.3942697822023086 and parameters: {'learning_rate': 0.006203384966031322, 'max_depth': 2, 'subsample': 0.9034740920361478, 'colsample_bytree': 0.6744748001374601, 'min_child_weight': 17}. Best is trial 1 with value: 0.3392855072888477.\n",
            "[I 2025-04-07 11:46:39,352] Trial 3 finished with value: 0.3827892885907202 and parameters: {'learning_rate': 0.011980474404141008, 'max_depth': 7, 'subsample': 0.6354086350309843, 'colsample_bytree': 0.19576173535290747, 'min_child_weight': 2}. Best is trial 1 with value: 0.3392855072888477.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Best parameters: {'learning_rate': 0.06184394939067719, 'max_depth': 7, 'subsample': 0.16130146316336486, 'colsample_bytree': 0.0901881930510108, 'min_child_weight': 16}\n",
            "Best MSE: 0.3392855072888477\n"
          ]
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"authorship_tag": "ABX9TyN3JmDkDBX9JVpAlJAS85I2",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/brusangues/b32ab44aca358f08c590ab02fc02c7fc/optuna.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"!pip install optuna xgboost lightgbm catboost --quiet"
	],
	"metadata": {
	"id": "-8-cRHox3itE"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"id": "cqnXcs-B3aoM"
	},
	"outputs": [],
	"source": [
	"import optuna\n",
	"\n",
	"from sklearn.datasets import fetch_california_housing\n",
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.metrics import mean_squared_error\n",
	"\n",
	"# Load dataset\n",
	"data = fetch_california_housing()\n",
	"X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"import warnings\n",
	"warnings.filterwarnings(\"ignore\")"
	],
	"metadata": {
	"id": "INXQZR2IDTW0"
	},
	"execution_count": 2,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"from catboost import CatBoostClassifier, CatBoostRegressor\n",
	"from xgboost import XGBRegressor\n",
	"from lightgbm import LGBMRegressor\n",
	"import numpy as np\n",
	"\n",
	"def train_and_score(params={}, model_type=\"catboost\", silent=True):\n",
	" if model_type == \"catboost\":\n",
	" model = CatBoostRegressor(logging_level=\"Silent\", **params)\n",
	" elif model_type == \"xgboost\":\n",
	" model = XGBRegressor(verbosity=0, **params)\n",
	" elif model_type == \"lightgbm\":\n",
	" model = LGBMRegressor(verbosity=-1, **params)\n",
	"\n",
	" # Train the model\n",
	" model.fit(X_train, y_train)\n",
	" y_pred = model.predict(X_test)\n",
	" mse = mean_squared_error(y_test, y_pred)\n",
	" if not silent:\n",
	" print(f\"{model_type=} MSE: {mse:0.4f}\")\n",
	" return mse"
	],
	"metadata": {
	"id": "7kGJHzQG43Jv"
	},
	"execution_count": 5,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"train_and_score({}, \"catboost\", False)\n",
	"train_and_score({}, \"xgboost\", False)\n",
	"train_and_score({}, \"lightgbm\", False)\n",
	"print()"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "Xyezj_8g7tsw",
	"outputId": "3e561b34-d097-45c3-90e5-b70e0f9f9bf9"
	},
	"execution_count": 6,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"model_type='xgboost' MSE: 0.2226\n",
	"\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"# Define the objective function for optimization\n",
	"def objective(trial):\n",
	" params = {\n",
	" \"iterations\": trial.suggest_int(\"iterations\", 100, 1000),\n",
	" \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.001, 0.1, log=True),\n",
	" \"depth\": trial.suggest_int(\"depth\", 4, 10),\n",
	" \"l2_leaf_reg\": trial.suggest_int(\"l2_leaf_reg\", 1, 10),\n",
	" \"bagging_temperature\": trial.suggest_float(\"bagging_temperature\", 0.01, 10),\n",
	" }\n",
	" return train_and_score(params)\n",
	"\n",
	"# Run the optimization\n",
	"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n",
	"study.optimize(objective, n_trials=4)\n",
	"\n",
	"# Best parameters found\n",
	"print(\"Best parameters:\", study.best_params)\n",
	"print(\"Best MSE:\", study.best_value)"
	],
	"metadata": {
	"id": "r6Vr67Pk3f9V",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "6bd939cc-12e7-4bdb-8729-193479751141"
	},
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stderr",
	"text": [
	"[I 2025-04-07 11:45:24,764] A new study created in memory with name: no-name-91b82145-44ee-42ee-b831-fe9e6516dee6\n",
	"[I 2025-04-07 11:46:02,659] Trial 0 finished with value: 0.5540918108398565 and parameters: {'iterations': 798, 'learning_rate': 0.0012178002761617233, 'depth': 10, 'l2_leaf_reg': 5, 'bagging_temperature': 7.749189591256689}. Best is trial 0 with value: 0.5540918108398565.\n",
	"[I 2025-04-07 11:46:03,875] Trial 1 finished with value: 0.6886207457721376 and parameters: {'iterations': 178, 'learning_rate': 0.004114184668962924, 'depth': 7, 'l2_leaf_reg': 6, 'bagging_temperature': 9.159671193678008}. Best is trial 0 with value: 0.5540918108398565.\n",
	"[I 2025-04-07 11:46:05,809] Trial 2 finished with value: 0.25277147540504363 and parameters: {'iterations': 560, 'learning_rate': 0.028343198913913334, 'depth': 5, 'l2_leaf_reg': 2, 'bagging_temperature': 9.511635588003797}. Best is trial 2 with value: 0.25277147540504363.\n",
	"[I 2025-04-07 11:46:10,182] Trial 3 finished with value: 0.29850473131503585 and parameters: {'iterations': 800, 'learning_rate': 0.009681623543015242, 'depth': 5, 'l2_leaf_reg': 4, 'bagging_temperature': 8.98692440367321}. Best is trial 2 with value: 0.25277147540504363.\n"
	]
	},
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Best parameters: {'iterations': 560, 'learning_rate': 0.028343198913913334, 'depth': 5, 'l2_leaf_reg': 2, 'bagging_temperature': 9.511635588003797}\n",
	"Best MSE: 0.25277147540504363\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"# Define the objective function for optimization\n",
	"def objective(trial):\n",
	" params = {\n",
	" \"objective\": \"regression\",\n",
	" \"metric\": \"rmse\",\n",
	" \"n_estimators\": 1000,\n",
	" # \"verbosity\": -1,\n",
	" \"bagging_freq\": 1,\n",
	" \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-3, 0.1, log=True),\n",
	" \"num_leaves\": trial.suggest_int(\"num_leaves\", 2, 2**10),\n",
	" \"subsample\": trial.suggest_float(\"subsample\", 0.05, 1.0),\n",
	" \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.05, 1.0),\n",
	" \"min_data_in_leaf\": trial.suggest_int(\"min_data_in_leaf\", 1, 100),\n",
	" }\n",
	" return train_and_score(params, model_type=\"lightgbm\")\n",
	"\n",
	"# Run the optimization\n",
	"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n",
	"study.optimize(objective, n_trials=4)\n",
	"\n",
	"# Best parameters found\n",
	"print(\"Best parameters:\", study.best_params)\n",
	"print(\"Best MSE:\", study.best_value)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "yJtzOrdaBEbp",
	"outputId": "85ea635c-2b7f-4ebd-b351-e17316f78edf"
	},
	"execution_count": 9,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stderr",
	"text": [
	"[I 2025-04-07 11:46:10,197] A new study created in memory with name: no-name-163656f0-83f5-465b-9f89-0ceeb1c81598\n",
	"[I 2025-04-07 11:46:16,371] Trial 0 finished with value: 0.21520473131668916 and parameters: {'learning_rate': 0.005809872199196733, 'num_leaves': 187, 'subsample': 0.31258169646380063, 'colsample_bytree': 0.7545050778815918, 'min_data_in_leaf': 38}. Best is trial 0 with value: 0.21520473131668916.\n",
	"[I 2025-04-07 11:46:23,016] Trial 1 finished with value: 0.22123552055009862 and parameters: {'learning_rate': 0.006566272694699802, 'num_leaves': 620, 'subsample': 0.848252988047558, 'colsample_bytree': 0.3536480344274082, 'min_data_in_leaf': 64}. Best is trial 0 with value: 0.21520473131668916.\n",
	"[I 2025-04-07 11:46:27,923] Trial 2 finished with value: 0.25586049266061156 and parameters: {'learning_rate': 0.0629060264955824, 'num_leaves': 103, 'subsample': 0.16800546154405946, 'colsample_bytree': 0.3109227822731441, 'min_data_in_leaf': 21}. Best is trial 0 with value: 0.21520473131668916.\n",
	"[I 2025-04-07 11:46:31,582] Trial 3 finished with value: 0.37083046652455914 and parameters: {'learning_rate': 0.013938237551993922, 'num_leaves': 632, 'subsample': 0.8108757040907426, 'colsample_bytree': 0.0827501462036972, 'min_data_in_leaf': 82}. Best is trial 0 with value: 0.21520473131668916.\n"
	]
	},
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Best parameters: {'learning_rate': 0.005809872199196733, 'num_leaves': 187, 'subsample': 0.31258169646380063, 'colsample_bytree': 0.7545050778815918, 'min_data_in_leaf': 38}\n",
	"Best MSE: 0.21520473131668916\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"def objective(trial):\n",
	" params = {\n",
	" \"objective\": \"reg:squarederror\",\n",
	" \"n_estimators\": 1000,\n",
	" # \"verbosity\": 0,\n",
	" \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-3, 0.1, log=True),\n",
	" \"max_depth\": trial.suggest_int(\"max_depth\", 1, 10),\n",
	" \"subsample\": trial.suggest_float(\"subsample\", 0.05, 1.0),\n",
	" \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.05, 1.0),\n",
	" \"min_child_weight\": trial.suggest_int(\"min_child_weight\", 1, 20),\n",
	" }\n",
	" return train_and_score(params, model_type=\"xgboost\")\n",
	"\n",
	"# Run the optimization\n",
	"study = optuna.create_study(direction=\"minimize\") # Minimize MSE\n",
	"study.optimize(objective, n_trials=4)\n",
	"\n",
	"# Best parameters found\n",
	"print(\"Best parameters:\", study.best_params)\n",
	"print(\"Best MSE:\", study.best_value)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "M9nYBq6ZB2qr",
	"outputId": "dd72fde8-347f-40eb-df34-2e7906a887f6"
	},
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stderr",
	"text": [
	"[I 2025-04-07 11:46:31,594] A new study created in memory with name: no-name-12e25abd-e014-4ad6-8abd-acba7ecfdcd6\n",
	"[I 2025-04-07 11:46:35,109] Trial 0 finished with value: 0.35967840280957875 and parameters: {'learning_rate': 0.003129235758308534, 'max_depth': 5, 'subsample': 0.11550003197083734, 'colsample_bytree': 0.48858752132869404, 'min_child_weight': 12}. Best is trial 0 with value: 0.35967840280957875.\n",
	"[I 2025-04-07 11:46:36,671] Trial 1 finished with value: 0.3392855072888477 and parameters: {'learning_rate': 0.06184394939067719, 'max_depth': 7, 'subsample': 0.16130146316336486, 'colsample_bytree': 0.0901881930510108, 'min_child_weight': 16}. Best is trial 1 with value: 0.3392855072888477.\n",
	"[I 2025-04-07 11:46:37,419] Trial 2 finished with value: 0.3942697822023086 and parameters: {'learning_rate': 0.006203384966031322, 'max_depth': 2, 'subsample': 0.9034740920361478, 'colsample_bytree': 0.6744748001374601, 'min_child_weight': 17}. Best is trial 1 with value: 0.3392855072888477.\n",
	"[I 2025-04-07 11:46:39,352] Trial 3 finished with value: 0.3827892885907202 and parameters: {'learning_rate': 0.011980474404141008, 'max_depth': 7, 'subsample': 0.6354086350309843, 'colsample_bytree': 0.19576173535290747, 'min_child_weight': 2}. Best is trial 1 with value: 0.3392855072888477.\n"
	]
	},
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Best parameters: {'learning_rate': 0.06184394939067719, 'max_depth': 7, 'subsample': 0.16130146316336486, 'colsample_bytree': 0.0901881930510108, 'min_child_weight': 16}\n",
	"Best MSE: 0.3392855072888477\n"
	]
	}
	]
	}
	]
	}