Rocketknight1 · December 31, 2022 18:48
diff --git a/cosine_loss.py b/cosine_loss.py
 def cosine_loss(y_true, y_pred):
    # Computes the cosine similarity loss across the last dimension
    # The cosine similarity loss between two vectors a and b is defined as
    #     In Keras: -cos(a, b)
    #     In PyTorch: 1 - cos(a, b)
    # These definitions give the same gradient so it doesn't really matter; the PyTorch
    # version just avoids negative values for the loss.
    
    # To compute cos(a, b), just take the dot product of the two vectors
    # divided by the product of their magnitudes. If we normalize the vectors first,
    # then we can just take the dot product of the normalized vectors because the 
    # magnitudes will be 1.
    
    y_true = tf.linalg.l2_normalize(y_true, axis=-1)  # Shape is (batch_dimensions, N)
    y_pred = tf.linalg.l2_normalize(y_pred, axis=-1)  # Shape is (batch_dimensions, N)
    cosine_loss = -tf.einsum("...i, ...i -> ...", y_true, y_pred)  # Shape is (batch_dimensions,)
    return cosine_loss
	def cosine_loss(y_true, y_pred):
	# Computes the cosine similarity loss across the last dimension
	# The cosine similarity loss between two vectors a and b is defined as
	# In Keras: -cos(a, b)
	# In PyTorch: 1 - cos(a, b)
	# These definitions give the same gradient so it doesn't really matter; the PyTorch
	# version just avoids negative values for the loss.

	# To compute cos(a, b), just take the dot product of the two vectors
	# divided by the product of their magnitudes. If we normalize the vectors first,
	# then we can just take the dot product of the normalized vectors because the
	# magnitudes will be 1.

	y_true = tf.linalg.l2_normalize(y_true, axis=-1) # Shape is (batch_dimensions, N)
	y_pred = tf.linalg.l2_normalize(y_pred, axis=-1) # Shape is (batch_dimensions, N)
	cosine_loss = -tf.einsum("...i, ...i -> ...", y_true, y_pred) # Shape is (batch_dimensions,)
	return cosine_loss