Skip to content

Instantly share code, notes, and snippets.

@goraj
Created February 2, 2018 16:14
Show Gist options
  • Save goraj/0945c25a110650d58fb8d546c8c4edc9 to your computer and use it in GitHub Desktop.
Save goraj/0945c25a110650d58fb8d546c8c4edc9 to your computer and use it in GitHub Desktop.
Custom fold indices using lightgdm
import numpy as np
from sklearn.datasets import load_digits
from sklearn.metrics import roc_auc_score
import lightgbm as lgbm
if __name__ == '__main__':
np.random.seed(4242)
d = load_digits()
xs = d['data']
ys = d['target']
indices = np.where((ys == 1) | (ys == 0))
ys = ys[indices]
xs = xs[indices]
dset = lgbm.Dataset(xs, ys)
model = None
params = {
'boosting_type': 'gbdt',
'objective': 'binary',
'learning_rate': 0.05,
'num_leaves': 35,
'metric': 'auc',
'is_unbalance': True,
'seed': 1024,
'verbosity': -1
}
# create some folds
niter = 100
outer_folds = []
aucv = []
k = 5
ss = len(xs)//k
for i in np.arange(0, len(xs)-ss, ss):
outer_folds.append((list(range(i, i+ss-10)), list(range(i+ss-10, i+ss))))
# run a kfold cross validation using loop
for i, (outer_trainset, outer_testset) in enumerate(outer_folds):
trainset = lgbm.Dataset(xs[outer_trainset], ys[outer_trainset])
model = lgbm.train(params, trainset, num_boost_round=niter)
ydelta = model.predict(xs[outer_testset])
aucv.append(roc_auc_score(ys[outer_testset], ydelta))
# run a kfold cross validation using lightgbm cv
r = lgbm.cv(params, folds=outer_folds, nfold=k, train_set=dset, shuffle=False, num_boost_round=niter)
auc = r['auc-mean'][niter-1]
stdev = r['auc-stdv'][niter-1]
print('lightgbm cv auc: {:.4f} (+-{:.4f})'.format(auc, stdev))
print('manual cv auc: {:.4f} (+-{:.4f})'.format(np.mean(aucv), np.std(aucv)))
@goraj
Copy link
Author

goraj commented Feb 2, 2018

lightgbm cv auc: 1.0000 (+-0.0000)
manual cv auc: 0.9792 (+-0.0361)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment