-
-
Save MasanoriYamada/d1d8ca884d200e73cca66a4387c7470a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
def get_batch_jacobian(net, x, to): | |
# noutputs: total output dim (e.g. net(x).shape(b,1,4,4) noutputs=1*4*4 | |
# b: batch | |
# i: in_dim | |
# o: out_dim | |
# ti: total input dim | |
# to: total output dim | |
x_batch = x.shape[0] | |
x_shape = x.shape[1:] | |
x = x.unsqueeze(1) # b, 1 ,i | |
x = x.repeat(1, to, *(1,)*len(x.shape[2:])) # b * to,i copy to o dim | |
x.requires_grad_(True) | |
tmp_shape = x.shape | |
y = net(x.reshape(-1, *tmp_shape[2:])) # x.shape = b*to,i y.shape = b*to,to | |
y_shape = y.shape[1:] # y.shape = b*to,to | |
y = y.reshape(x_batch, to, to) # y.shape = b,to,to | |
input_val = torch.eye(to).reshape(1, to, to).repeat(x_batch, 1, 1) # input_val.shape = b,to,to value is (eye) | |
y.backward(input_val) # y.shape = b,to,to | |
return x.grad.reshape(x_batch, *y_shape, *x_shape).data # x.shape = b,o,i | |
class CNNNet(torch.nn.Module): | |
def __init__(self): | |
super(CNNNet, self).__init__() | |
self.cnn = torch.nn.Conv2d(1, 3, 5) | |
self.fc1 = torch.nn.Linear(3, 4) | |
def forward(self, x): | |
print('x: {}'.format(x.shape)) | |
x = torch.nn.functional.relu(self.cnn(x)) | |
print('co: {}'.format(x.shape)) | |
#x = x.reshape(x.shape[0], -1) | |
#x = torch.nn.functional.relu(self.fc1(x)) | |
#print('li: {}'.format(x.shape)) | |
return x | |
cnet = CNNNet() | |
batch = 10 | |
x = torch.randn(batch,1,5,5) | |
y = cnet(x) | |
ret = get_batch_jacobian(cnet, x, 3) # y.shape=10,3,1.1 | |
print(ret.shape) | |
I have a question. What would the consequence be of not repeating input x
before the forward pass, but repeating the output y
after the forward pass? What would the resulting .grad
field contain?
I believe that in the comment, y.shape should be b*to, to. Just for anyone else that comes along after me!
Thank you!
Could you show me your complete code for reproducing your error?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also, for anyone else wondering, the code
*(1,) * len(x.shape[2:])
means to construct a tuple of ones of lengthx.shape[2:]
, and the star means expand this tuple when passing into the function.