albertbuchard · April 18, 2023 18:31
diff --git a/sparsemax_torch.py b/sparsemax_torch.py
 import torch
 import torch.nn as nn

 class Sparsemax(nn.Module):
    def __init__(self, dim=-1):
        super(Sparsemax, self).__init__()
        self.dim = dim

    def forward(self, x):
        # Move the dimension to apply Sparsemax to the front
        x = x.transpose(self.dim, -1)

        # Calculate the cumulative sum of the sorted input
        z, _ = torch.sort(x, dim=-1, descending=True)
        cumsums = torch.cumsum(z, dim=-1)

        # Project to the simplex; see details in https://arxiv.org/pdf/1602.02068.pdf
        K = torch.arange(1, x.shape[-1] + 1, device=x.device)
        K = K.repeat(*x.shape[:-1], 1)
        support = 1 + K * z - cumsums > 0
        k_z = (K * support).max(dim=-1, keepdim=True).values

        # Compute the threshold and apply it to the input
        # (k_z - 1) is necessary to correct for the 1-indexing in the paper
        cumsums_element = torch.gather(cumsums, dim=-1, index=(k_z - 1))
        thresholds = (cumsums_element - 1) / k_z
        output = torch.clamp(x - thresholds, min=0)

        # Transpose back the dimensions
        output = output.transpose(self.dim, -1)

        return output
	import torch
	import torch.nn as nn

	class Sparsemax(nn.Module):
	def __init__(self, dim=-1):
	super(Sparsemax, self).__init__()
	self.dim = dim

	def forward(self, x):
	# Move the dimension to apply Sparsemax to the front
	x = x.transpose(self.dim, -1)

	# Calculate the cumulative sum of the sorted input
	z, _ = torch.sort(x, dim=-1, descending=True)
	cumsums = torch.cumsum(z, dim=-1)

	# Project to the simplex; see details in https://arxiv.org/pdf/1602.02068.pdf
	K = torch.arange(1, x.shape[-1] + 1, device=x.device)
	K = K.repeat(*x.shape[:-1], 1)
	support = 1 + K * z - cumsums > 0
	k_z = (K * support).max(dim=-1, keepdim=True).values

	# Compute the threshold and apply it to the input
	# (k_z - 1) is necessary to correct for the 1-indexing in the paper
	cumsums_element = torch.gather(cumsums, dim=-1, index=(k_z - 1))
	thresholds = (cumsums_element - 1) / k_z
	output = torch.clamp(x - thresholds, min=0)

	# Transpose back the dimensions
	output = output.transpose(self.dim, -1)

	return output