xu-li · February 14, 2023 08:12
diff --git a/lightgbm_with_binary_focal_crossentropy.py b/lightgbm_with_binary_focal_crossentropy.py
 '''
 LightGBM with Binary Focal Crossentropy

 There are a lot of articles talking about how to calculate the first and second order derivatives. They are complex and
 I don't understand them in full. Here, I am using Tensorflow which provides both the BinaryFocalCrossentropy loss and
 the auto-differentiation. Within a few lines of code, I am able to use BinaryFocalCrossentropy as the loss function.

 Focal crossentropy: https://arxiv.org/abs/1708.02002
 '''
 import numpy as np
 from lightgbm import LGBMClassifier
 from sklearn.datasets import make_classification
 from sklearn.metrics import confusion_matrix
 from sklearn.model_selection import train_test_split
 from scipy import special
 import tensorflow as tf

 # Generate the dataset
 X, y = make_classification(n_samples=10000, weights=(0.9,), random_state=42, flip_y=0.4)
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


 def print_metrics(clf):
    '''
    Helper function to print the metrics
    '''
    y_pred = clf.predict(X_test, raw_score=True)
    y_pred = special.expit(y_pred)
    for threshold in np.linspace(0.5, 0.75, 5):
        tn, fp, fn, tp = confusion_matrix(y_test, np.where(y_pred > threshold, 1, 0)).ravel()
        precision = tp / (tp + fp) if tp > 0 else 0
        print(f"tn, fp, fn, tp, precision(>{threshold}): {tn, fp, fn, tp}, {precision:.6f}")


 # Use the default binary_logloss loss function
 lgb_params1 = {
    'objective': 'binary',
    'early_stopping_round': 20,
    'verbose': 1,
    'metric': 'binary_logloss',
    'boost_from_average': False
 }

 clf1 = LGBMClassifier(**lgb_params1)
 clf1.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)])
 print_metrics(clf1)

 # The BinaryFocalCrossentropy
 bfce = tf.keras.losses.BinaryFocalCrossentropy(from_logits=True)


 def fobj_focal_crossentropy(y_true, y_pred):
    y_pred_tensor = tf.Variable(y_pred)
    with tf.GradientTape(persistent=True) as t2:
        with tf.GradientTape() as t1:
            loss = bfce(y_true, y_pred_tensor)
        grad = t1.gradient(loss, y_pred_tensor)

    hess = t2.gradient(grad, y_pred_tensor, unconnected_gradients=tf.UnconnectedGradients.ZERO)
    return grad.numpy(), hess.numpy()


 def eval_metric_focal_crossentropy(y_true, y_pred):
    return 'focal_crossentropy', bfce(y_true, y_pred), False


 lgb_params2 = {
    'objective': fobj_focal_crossentropy,
    'early_stopping_round': 20,
    'verbose': 1,
    'metric': 'None',
    'boost_from_average': False
 }

 clf2 = LGBMClassifier(**lgb_params2)
 clf2.fit(X_train, y_train, eval_metric=eval_metric_focal_crossentropy, eval_set=[(X_train, y_train), (X_test, y_test)])
 print_metrics(clf2)
	'''
	LightGBM with Binary Focal Crossentropy

	There are a lot of articles talking about how to calculate the first and second order derivatives. They are complex and
	I don't understand them in full. Here, I am using Tensorflow which provides both the BinaryFocalCrossentropy loss and
	the auto-differentiation. Within a few lines of code, I am able to use BinaryFocalCrossentropy as the loss function.

	Focal crossentropy: https://arxiv.org/abs/1708.02002
	'''
	import numpy as np
	from lightgbm import LGBMClassifier
	from sklearn.datasets import make_classification
	from sklearn.metrics import confusion_matrix
	from sklearn.model_selection import train_test_split
	from scipy import special
	import tensorflow as tf

	# Generate the dataset
	X, y = make_classification(n_samples=10000, weights=(0.9,), random_state=42, flip_y=0.4)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


	def print_metrics(clf):
	'''
	Helper function to print the metrics
	'''
	y_pred = clf.predict(X_test, raw_score=True)
	y_pred = special.expit(y_pred)
	for threshold in np.linspace(0.5, 0.75, 5):
	tn, fp, fn, tp = confusion_matrix(y_test, np.where(y_pred > threshold, 1, 0)).ravel()
	precision = tp / (tp + fp) if tp > 0 else 0
	print(f"tn, fp, fn, tp, precision(>{threshold}): {tn, fp, fn, tp}, {precision:.6f}")


	# Use the default binary_logloss loss function
	lgb_params1 = {
	'objective': 'binary',
	'early_stopping_round': 20,
	'verbose': 1,
	'metric': 'binary_logloss',
	'boost_from_average': False
	}

	clf1 = LGBMClassifier(**lgb_params1)
	clf1.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)])
	print_metrics(clf1)

	# The BinaryFocalCrossentropy
	bfce = tf.keras.losses.BinaryFocalCrossentropy(from_logits=True)


	def fobj_focal_crossentropy(y_true, y_pred):
	y_pred_tensor = tf.Variable(y_pred)
	with tf.GradientTape(persistent=True) as t2:
	with tf.GradientTape() as t1:
	loss = bfce(y_true, y_pred_tensor)
	grad = t1.gradient(loss, y_pred_tensor)

	hess = t2.gradient(grad, y_pred_tensor, unconnected_gradients=tf.UnconnectedGradients.ZERO)
	return grad.numpy(), hess.numpy()


	def eval_metric_focal_crossentropy(y_true, y_pred):
	return 'focal_crossentropy', bfce(y_true, y_pred), False


	lgb_params2 = {
	'objective': fobj_focal_crossentropy,
	'early_stopping_round': 20,
	'verbose': 1,
	'metric': 'None',
	'boost_from_average': False
	}

	clf2 = LGBMClassifier(**lgb_params2)
	clf2.fit(X_train, y_train, eval_metric=eval_metric_focal_crossentropy, eval_set=[(X_train, y_train), (X_test, y_test)])
	print_metrics(clf2)