Skip to content

Instantly share code, notes, and snippets.

@Kulbear
Created November 8, 2018 18:27
Show Gist options
  • Save Kulbear/f58478d412e3f37b88dcd0521eb3abb8 to your computer and use it in GitHub Desktop.
Save Kulbear/f58478d412e3f37b88dcd0521eb3abb8 to your computer and use it in GitHub Desktop.
multi_weighted_logloss.py
def multi_weighted_logloss(y_ohe, y_p):
"""
@author olivier https://www.kaggle.com/ogrellier
multi logloss for PLAsTiCC challenge
"""
classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]
class_weight = {6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, 64: 2, 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1}
# Normalize rows and limit y_preds to 1e-15, 1-1e-15
y_p = np.clip(a=y_p, a_min=1e-15, a_max=1-1e-15)
# Transform to log
y_p_log = np.log(y_p)
# Get the log for ones, .values is used to drop the index of DataFrames
# Exclude class 99 for now, since there is no class99 in the training set
# we gave a special process for that class
y_log_ones = np.sum(y_ohe * y_p_log, axis=0)
# Get the number of positives for each class
nb_pos = y_ohe.sum(axis=0).astype(float)
# Weight average and divide by the number of positives
class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])
y_w = y_log_ones * class_arr / nb_pos
loss = - np.sum(y_w) / np.sum(class_arr)
return loss
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment