ptrblck · March 28, 2018 21:20
diff --git a/accumulated_gradient b/accumulated_gradient

 import torch
 import torch.nn as nn
 from torch.autograd import Variable

 # batch_size = 2
 x1 = Variable(torch.ones(2, 1))
 w1 = Variable(torch.ones(1, 1), requires_grad=True)
 y1 = Variable(torch.ones(2, 1) * 2)

 criterion = nn.MSELoss()

 # accumulate for 10 iterations, sum losses
 loss1 = 0
 for i in range(10):
    output1 = torch.matmul(x1, w1)
    loss1 += criterion(output1, y1)
 loss1 /= 10
 loss1.backward()

 print(w1.grad)

 # batch_size = 20, vanilla method
 x2 = Variable(torch.ones(20, 1))
 w2 = Variable(torch.ones(1, 1), requires_grad=True)
 y2 = Variable(torch.ones(20, 1) * 2)

 output2 = torch.matmul(x2, w2)
 loss2 = criterion(output2, y2)
 loss2.backward()
 print(w2.grad)

 # batch_size = 2, scale loss and accumulate gradients
 x3 = Variable(torch.ones(2, 1))
 w3 = Variable(torch.ones(1, 1), requires_grad=True)
 y3 = Variable(torch.ones(2, 1) * 2)

 for i in range(10):
    output3 = torch.matmul(x3, w3)
    loss3 = criterion(output3, y3)
    loss3 /= 10
    loss3.backward()

 print(w3.grad)

	import torch
	import torch.nn as nn
	from torch.autograd import Variable

	# batch_size = 2
	x1 = Variable(torch.ones(2, 1))
	w1 = Variable(torch.ones(1, 1), requires_grad=True)
	y1 = Variable(torch.ones(2, 1) * 2)

	criterion = nn.MSELoss()

	# accumulate for 10 iterations, sum losses
	loss1 = 0
	for i in range(10):
	output1 = torch.matmul(x1, w1)
	loss1 += criterion(output1, y1)
	loss1 /= 10
	loss1.backward()

	print(w1.grad)

	# batch_size = 20, vanilla method
	x2 = Variable(torch.ones(20, 1))
	w2 = Variable(torch.ones(1, 1), requires_grad=True)
	y2 = Variable(torch.ones(20, 1) * 2)

	output2 = torch.matmul(x2, w2)
	loss2 = criterion(output2, y2)
	loss2.backward()
	print(w2.grad)

	# batch_size = 2, scale loss and accumulate gradients
	x3 = Variable(torch.ones(2, 1))
	w3 = Variable(torch.ones(1, 1), requires_grad=True)
	y3 = Variable(torch.ones(2, 1) * 2)

	for i in range(10):
	output3 = torch.matmul(x3, w3)
	loss3 = criterion(output3, y3)
	loss3 /= 10
	loss3.backward()

	print(w3.grad)