Created
November 8, 2018 18:27
-
-
Save Kulbear/f58478d412e3f37b88dcd0521eb3abb8 to your computer and use it in GitHub Desktop.
multi_weighted_logloss.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def multi_weighted_logloss(y_ohe, y_p): | |
""" | |
@author olivier https://www.kaggle.com/ogrellier | |
multi logloss for PLAsTiCC challenge | |
""" | |
classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95] | |
class_weight = {6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, 64: 2, 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1} | |
# Normalize rows and limit y_preds to 1e-15, 1-1e-15 | |
y_p = np.clip(a=y_p, a_min=1e-15, a_max=1-1e-15) | |
# Transform to log | |
y_p_log = np.log(y_p) | |
# Get the log for ones, .values is used to drop the index of DataFrames | |
# Exclude class 99 for now, since there is no class99 in the training set | |
# we gave a special process for that class | |
y_log_ones = np.sum(y_ohe * y_p_log, axis=0) | |
# Get the number of positives for each class | |
nb_pos = y_ohe.sum(axis=0).astype(float) | |
# Weight average and divide by the number of positives | |
class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())]) | |
y_w = y_log_ones * class_arr / nb_pos | |
loss = - np.sum(y_w) / np.sum(class_arr) | |
return loss |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment