ebetica · January 23, 2017 19:42
diff --git a/snippet.py b/snippet.py
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd import Variable


 class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.affine1 = nn.Linear(4, 128)
        self.affine2 = nn.Linear(128, 2)

    def forward(self, x):
        x = F.relu(self.affine1(x))
        probs = F.softmax(self.affine2(x))
        return probs.multinomial()


 model = Policy()

 input = Variable(torch.randn(1, 4))

 a = model(input)
 action = a.data[0, 0]  # Set action to 0
 print("Action: ", action)
 a.reinforce(1)
 a.backward()
 for param in model.parameters():
    print(param.grad.data.storage()[0])


 print("The following grads should be scaled by a factor of 2, but are not")

 a = model(input)
 while a.data[0, 0] != action:
    a = model(input)
 print("Action: ", a.data[0, 0])
 a.reinforce(2)  # reinforce with twice as much
 a.backward()
 # Expect grads are scaled by factor of 2
 for param in model.parameters():
    print(param.grad.data.storage()[0])
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.autograd import Variable


	class Policy(nn.Module):
	def __init__(self):
	super(Policy, self).__init__()
	self.affine1 = nn.Linear(4, 128)
	self.affine2 = nn.Linear(128, 2)

	def forward(self, x):
	x = F.relu(self.affine1(x))
	probs = F.softmax(self.affine2(x))
	return probs.multinomial()


	model = Policy()

	input = Variable(torch.randn(1, 4))

	a = model(input)
	action = a.data[0, 0] # Set action to 0
	print("Action: ", action)
	a.reinforce(1)
	a.backward()
	for param in model.parameters():
	print(param.grad.data.storage()[0])


	print("The following grads should be scaled by a factor of 2, but are not")

	a = model(input)
	while a.data[0, 0] != action:
	a = model(input)
	print("Action: ", a.data[0, 0])
	a.reinforce(2) # reinforce with twice as much
	a.backward()
	# Expect grads are scaled by factor of 2
	for param in model.parameters():
	print(param.grad.data.storage()[0])