InnovArul · November 20, 2021 21:16
diff --git a/linear_partial_freeze_no_weightdecay.py b/linear_partial_freeze_no_weightdecay.py
 import torch, torch.nn as nn                                                         
 import torch.optim as optim, torch.nn.functional as F                                
                                                                                     
 class CustomLinearNoWeightDecay(nn.Module):                                                       
    def __init__(self, mask):                                                        
        super().__init__()                                                           
        self.register_buffer("mask", mask)                                           
        out_channels, in_channels = mask.shape                                       
        self.weight = nn.Parameter(torch.randn(out_channels, in_channels))       
        
        fixed_weight = (mask * self.weight).detach()                                 
        self.register_buffer("fixed_weight", fixed_weight)   
        
        self.bias = nn.Parameter(torch.randn(out_channels))                          
                                                                                     
    def forward(self, x):                                                            
        weight = (self.mask * self.fixed_weight) + (1 - self.mask) * self.weight     
        out = F.linear(x, weight, self.bias)                                         
        return out                                                                   
                                                                                     
                                                                                     
 if __name__ == '__main__':                                                           
    mask = (torch.rand(3,4) > 0.5).float()                                           
    print("mask", mask)                                                              
    lin = CustomLinearNoWeightDecay(mask)                                                         
                                                                                     
    for i in range(100):                                                             
        inp = torch.randn(10, 4)                                                     
        out = lin(inp)                                                               
        out.sum().backward()                                                         
        print(lin.weight.grad)                                                       
        lin.weight.grad = None                                                       
        input()                                                                      
                                                                                     
                                                                                     
                                                                                     
diff --git a/linear_partial_freeze_with_weightdecay.py b/linear_partial_freeze_with_weightdecay.py
 import torch, torch.nn as nn                                                            
 import torch.optim as optim, torch.nn.functional as F                                   
                                                                                        
 class CustomLinearWithWeightDecay(nn.Module):                                                          
    def __init__(self, mask):                                                           
        super().__init__()                                                              
        self.register_buffer("mask", mask)                                              
        out_channels, in_channels = mask.shape                                          
        self.weight = nn.Parameter(torch.randn(out_channels, in_channels))              
        self.bias = nn.Parameter(torch.randn(out_channels))                             
                                                                                        
    def forward(self, x):                                                               
        weight = (self.mask * self.weight).detach() + (1 - self.mask) * self.weight     
        out = F.linear(x, weight, self.bias)                                            
        return out                                                                      
                                                                                        
                                                                                        
 if __name__ == '__main__':                                                              
    mask = (torch.rand(3,4) > 0.5).float()                                              
    print("mask", mask)                                                                 
    lin = CustomLinearWithWeightDecay(mask)                                                            
                                                                                        
    for i in range(100):                                                                
        inp = torch.randn(10, 4)                                                        
        out = lin(inp)                                                                  
        out.sum().backward()                                                            
        print(lin.weight.grad)                                                          
        lin.weight.grad = None                                                          
        input()
	import torch, torch.nn as nn
	import torch.optim as optim, torch.nn.functional as F

	class CustomLinearNoWeightDecay(nn.Module):
	def __init__(self, mask):
	super().__init__()
	self.register_buffer("mask", mask)
	out_channels, in_channels = mask.shape
	self.weight = nn.Parameter(torch.randn(out_channels, in_channels))

	fixed_weight = (mask * self.weight).detach()
	self.register_buffer("fixed_weight", fixed_weight)

	self.bias = nn.Parameter(torch.randn(out_channels))

	def forward(self, x):
	weight = (self.mask * self.fixed_weight) + (1 - self.mask) * self.weight
	out = F.linear(x, weight, self.bias)
	return out


	if __name__ == '__main__':
	mask = (torch.rand(3,4) > 0.5).float()
	print("mask", mask)
	lin = CustomLinearNoWeightDecay(mask)

	for i in range(100):
	inp = torch.randn(10, 4)
	out = lin(inp)
	out.sum().backward()
	print(lin.weight.grad)
	lin.weight.grad = None
	input()
	import torch, torch.nn as nn
	import torch.optim as optim, torch.nn.functional as F

	class CustomLinearWithWeightDecay(nn.Module):
	def __init__(self, mask):
	super().__init__()
	self.register_buffer("mask", mask)
	out_channels, in_channels = mask.shape
	self.weight = nn.Parameter(torch.randn(out_channels, in_channels))
	self.bias = nn.Parameter(torch.randn(out_channels))

	def forward(self, x):
	weight = (self.mask * self.weight).detach() + (1 - self.mask) * self.weight
	out = F.linear(x, weight, self.bias)
	return out


	if __name__ == '__main__':
	mask = (torch.rand(3,4) > 0.5).float()
	print("mask", mask)
	lin = CustomLinearWithWeightDecay(mask)

	for i in range(100):
	inp = torch.randn(10, 4)
	out = lin(inp)
	out.sum().backward()
	print(lin.weight.grad)
	lin.weight.grad = None
	input()