Skip to content

Instantly share code, notes, and snippets.

@lucidrains
Last active May 31, 2025 16:02
Show Gist options
  • Save lucidrains/9c7508a479e9304618f00890e4b9dd0a to your computer and use it in GitHub Desktop.
Save lucidrains/9c7508a479e9304618f00890e4b9dd0a to your computer and use it in GitHub Desktop.
proposed SUGAR with BSiLU
# https://arxiv.org/abs/2505.22074
import torch
from torch.nn import Module
class SugarBSiLU(Module):
# proposed SUGAR with B-SiLU section 3.1
# it was their best performing
def __init__(
self,
alpha = 1.67
):
super().__init__()
self.alpha = alpha
def forward(self, x):
α = self.alpha
relu_out = x.relu() # forward out is just a relu
if not self.training:
return relu_out
# eq (7) in paper
bsilu_out = (x + α) * x.sigmoid() - α / 2
# only neg region for bsilu gradients
bsilu_out = torch.where(x > 0, x, bsilu_out)
# straight-through during training
return bsilu_out + (relu_out - bsilu_out).detach()
# quick test
if __name__ == '__main__':
act = SugarBSiLU()
act(torch.randn(2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment