Rocketknight1 · March 18, 2023 15:09
diff --git a/bias_initialization.py b/bias_initialization.py
 # Assume labels is a possibly multidimensional array of categories / token indices

 _, label_counts = np.unique(labels, axis=None, return_counts=True)  # Will flatten multidimensional arrays

 # For multi-label classification you should normalize by the number of samples instead
 label_frequencies = label_counts.astype(np.float) / np.sum(label_counts)

 label_logprobs = np.log(label_frequencies)

 # Now you just need to assign the values in label_logprobs to your bias vector!

 # In TensorFlow, this will look something like:

 model.classifier.bias.assign(label_logprobs)

 # In PyTorch:

 with torch.no_grad():
  model.classifier.bias.data[:] = label_logprobs

 # The exact name of the weight to assign to will depend on the specific model head you're using
	# Assume labels is a possibly multidimensional array of categories / token indices

	_, label_counts = np.unique(labels, axis=None, return_counts=True) # Will flatten multidimensional arrays

	# For multi-label classification you should normalize by the number of samples instead
	label_frequencies = label_counts.astype(np.float) / np.sum(label_counts)

	label_logprobs = np.log(label_frequencies)

	# Now you just need to assign the values in label_logprobs to your bias vector!

	# In TensorFlow, this will look something like:

	model.classifier.bias.assign(label_logprobs)

	# In PyTorch:

	with torch.no_grad():
	model.classifier.bias.data[:] = label_logprobs

	# The exact name of the weight to assign to will depend on the specific model head you're using
No results found