Skip to content

Instantly share code, notes, and snippets.

@mihaidusmanu
Last active July 6, 2022 14:45
Show Gist options
  • Save mihaidusmanu/6fe23145484395eeecd97eb7927c469c to your computer and use it in GitHub Desktop.
Save mihaidusmanu/6fe23145484395eeecd97eb7927c469c to your computer and use it in GitHub Desktop.
LightGBM with Stratified K-Fold CV
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "lightgbm_kfold.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/mihaidusmanu/6fe23145484395eeecd97eb7927c469c/lightgbm_kfold.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"metadata": {
"id": "EEUIx_f2mjMo",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"import lightgbm as lgb\n",
"\n",
"import numpy as np\n",
"\n",
"from sklearn.model_selection import StratifiedKFold"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "agqFR_2DmjM1",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"# General parameters\n",
"n_folds = 5"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "mBXxgKx3mjNF",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"# Training data\n",
"print(\"Loading training data... \")\n",
"\n",
"# TODO: read training data"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "YCDavEDOmjNP",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"# LightGBM parameters\n",
"params = {\n",
" 'task':'train',\n",
" 'boosting_type': 'gbdt',\n",
" 'objective': 'binary',\n",
" 'metric': {'binary_logloss', 'auc'},\n",
" 'num_leaves': 31,\n",
" 'learning_rate': 0.05,\n",
" 'feature_fraction': 0.9,\n",
" 'bagging_fraction': 0.8,\n",
" 'bagging_freq': 5,\n",
" 'verbose': 1\n",
"}"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Sts7yHp9mjNV",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"skfold = StratifiedKFold(n_splits=n_folds, random_state=42)\n",
"\n",
"val_scores = []\n",
"for fold_id, (train_ids, valid_ids) in enumerate(skfold.split(train_data, train_ann)):\n",
" print(\"Fold %d / %d\" % (fold_id + 1, n_folds))\n",
"\n",
" lgb_train = lgb.Dataset(train_data[train_ids, :], train_ann[train_ids])\n",
" lgb_valid = lgb.Dataset(train_data[valid_ids, :], train_ann[valid_ids])\n",
" \n",
" res = {}\n",
" \n",
" gbm = lgb.train(params, lgb_train, num_boost_round=2500, \n",
" valid_sets=[lgb_valid], valid_names=['valid'],\n",
" evals_result=res, verbose_eval=100)\n",
" \n",
" gbm.save_model('lightgbm_model_%d.txt' % fold_id)\n",
" \n",
" val_scores.append(res['valid']['auc'][-1])\n",
"print(\"avg_val_score: %4f\" % (np.mean(val_scores)))\n"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "v-Z14BhMmjNs",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"# Testing\n",
"print(\"Loading test data... \")\n",
"\n",
"# TODO: read test data"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Ec4WUV7CmjN-",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"pred = np.zeros((test_data.shape[0],))\n",
"for fold_id in range(n_folds):\n",
" gbm = lgb.Booster(model_file='lightgbm_model_%d.txt' % fold_id)\n",
" current_pred = gbm.predict(test_data)\n",
" pred += current_pred\n",
"pred /= n_folds"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "OyDHoXG5mjOK",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"out = open('answers.csv', 'w')\n",
"out.write(\"Id;TARGET\\n\")\n",
"\n",
"for row_id in range(pred.shape[0]):\n",
" out.write(\"ID\" + str(row_id) + \";\" + str(pred[row_id]) + \"\\n\")\n",
"\n",
"out.close()"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "AKyjVyd4mjOQ",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment