-
-
Save MasanoriYamada/d1d8ca884d200e73cca66a4387c7470a to your computer and use it in GitHub Desktop.
import torch | |
def get_batch_jacobian(net, x, to): | |
# noutputs: total output dim (e.g. net(x).shape(b,1,4,4) noutputs=1*4*4 | |
# b: batch | |
# i: in_dim | |
# o: out_dim | |
# ti: total input dim | |
# to: total output dim | |
x_batch = x.shape[0] | |
x_shape = x.shape[1:] | |
x = x.unsqueeze(1) # b, 1 ,i | |
x = x.repeat(1, to, *(1,)*len(x.shape[2:])) # b * to,i copy to o dim | |
x.requires_grad_(True) | |
tmp_shape = x.shape | |
y = net(x.reshape(-1, *tmp_shape[2:])) # x.shape = b*to,i y.shape = b*to,to | |
y_shape = y.shape[1:] # y.shape = b*to,to | |
y = y.reshape(x_batch, to, to) # y.shape = b,to,to | |
input_val = torch.eye(to).reshape(1, to, to).repeat(x_batch, 1, 1) # input_val.shape = b,to,to value is (eye) | |
y.backward(input_val) # y.shape = b,to,to | |
return x.grad.reshape(x_batch, *y_shape, *x_shape).data # x.shape = b,o,i | |
class CNNNet(torch.nn.Module): | |
def __init__(self): | |
super(CNNNet, self).__init__() | |
self.cnn = torch.nn.Conv2d(1, 3, 5) | |
self.fc1 = torch.nn.Linear(3, 4) | |
def forward(self, x): | |
print('x: {}'.format(x.shape)) | |
x = torch.nn.functional.relu(self.cnn(x)) | |
print('co: {}'.format(x.shape)) | |
#x = x.reshape(x.shape[0], -1) | |
#x = torch.nn.functional.relu(self.fc1(x)) | |
#print('li: {}'.format(x.shape)) | |
return x | |
cnet = CNNNet() | |
batch = 10 | |
x = torch.randn(batch,1,5,5) | |
y = cnet(x) | |
ret = get_batch_jacobian(cnet, x, 3) # y.shape=10,3,1.1 | |
print(ret.shape) | |
I think there's small typo in a comment. On the line
y = net(x.reshape(-1, *tmp_shape[2:])) # x.shape = b*to,i y.shape = b,to,to
I believe that in the comment, y.shape
should be b*to, to
. Just for anyone else that comes along after me!
Also, for anyone else wondering, the code *(1,) * len(x.shape[2:])
means to construct a tuple of ones of length x.shape[2:]
, and the star means expand this tuple when passing into the function.
I have a question. What would the consequence be of not repeating input x
before the forward pass, but repeating the output y
after the forward pass? What would the resulting .grad
field contain?
I believe that in the comment, y.shape should be b*to, to. Just for anyone else that comes along after me!
Thank you!
Could you show me your complete code for reproducing your error?
get jacobian in pytorch. The base implementation is https://gist.github.com/sbarratt/37356c46ad1350d4c30aefbd488a4faa
In order to calculate the differential between vectors efficiently, the number of batches is increased.