Created
May 22, 2018 16:17
-
-
Save gombru/53f02ae717cb1dd2525be090f2d41055 to your computer and use it in GitHub Desktop.
Caffe Python layer implementing Cross-Entropy with Softmax activation Loss to deal with multi-label classification, were labels can be input as real numbers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import caffe | |
import numpy as np | |
class CustomSoftmaxLoss(caffe.Layer): | |
""" | |
Compute Cross Entropy loss with Softmax activations for multi-label classifications, accepting real numbers as labels | |
Inspired by https://arxiv.org/abs/1805.00932 | |
Raul Gomez Bruballa | |
https://gombru.github.io/ | |
""" | |
def setup(self, bottom, top): | |
# check input pair | |
if len(bottom) != 2: | |
raise Exception("Need two inputs to compute distance (inference and labels).") | |
def reshape(self, bottom, top): | |
# check input dimensions match | |
if bottom[0].num != bottom[1].num: | |
raise Exception("Infered scores and labels must have the same dimension.") | |
# difference is shape of inputs | |
self.diff = np.zeros_like(bottom[0].data, dtype=np.float32) | |
# loss output is scalar | |
top[0].reshape(1) | |
def forward(self, bottom, top): | |
labels = bottom[1].data | |
scores = bottom[0].data | |
# Normalizing to avoid instability | |
scores -= np.max(scores, axis=1, keepdims=True) | |
# Compute Softmax activations | |
exp_scores = np.exp(scores) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # Store softmax activations | |
logprobs = np.zeros([bottom[0].num,1]) | |
# Compute cross-entropy loss | |
for r in range(bottom[0].num): # For each element in the batch | |
scale_factor = 1 / float(np.count_nonzero(labels[r, :])) | |
for c in range(len(labels[r,:])): # For each class we compute the cross-entropy loss using the Softmax activation | |
if labels[r,c] != 0: | |
logprobs[r] += -np.log(probs[r,c]) * labels[r,c] * scale_factor # We sum the loss per class for each element of the batch | |
data_loss = np.sum(logprobs) / bottom[0].num | |
self.diff[...] = probs | |
top[0].data[...] = data_loss | |
def backward(self, top, propagate_down, bottom): | |
delta = self.diff # If the class label is 0, the gradient is equal to probs | |
labels = bottom[1].data | |
for r in range(bottom[0].num): # For each element in the batch | |
scale_factor = 1 / float(np.count_nonzero(labels[r, :])) | |
for c in range(len(labels[r,:])): # For each class | |
if labels[r, c] != 0: # If positive class | |
delta[r, c] = scale_factor * (delta[r, c] - 1) + (1 - scale_factor) * delta[r, c] # Gradient for classes with positive labels considering scale factor | |
bottom[0].diff[...] = delta / bottom[0].num |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment