Skip to content

Instantly share code, notes, and snippets.

@MarcoGorelli
Last active January 22, 2022 21:50
Show Gist options
  • Save MarcoGorelli/88d358ecc1d694c76e3c6b1775998092 to your computer and use it in GitHub Desktop.
Save MarcoGorelli/88d358ecc1d694c76e3c6b1775998092 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor, log_evaluation
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)
folds = KFold(5)
def neg_correlation(preds, labels):
is_higher_better = False
return "neg_correlation", -np.corrcoef(preds, labels)[1, 0], is_higher_better
eval_results = []
for train_idx, val_idx in folds.split(X):
X_train, X_valid = X.iloc[train_idx], X.iloc[val_idx]
y_train, y_valid = y.iloc[train_idx], y.iloc[val_idx]
model = LGBMRegressor(n_estimators=100)
model.fit(
X.iloc[train_idx],
y.iloc[train_idx],
eval_set=[(X.iloc[val_idx], y.iloc[val_idx])],
eval_metric=neg_correlation,
callbacks=[log_evaluation(0)],
)
eval_results.append(
np.asarray(model.evals_result_["valid_0"]["neg_correlation"])[:, np.newaxis]
)
cv_results = np.hstack(eval_results)
best_n_estimators = np.argmin(cv_results.mean(axis=1)) + 1
model = LGBMRegressor(n_estimators=best_n_estimators)
model.fit(X, y, callbacks=[log_evaluation(0)])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment