Skip to content

Instantly share code, notes, and snippets.

@rahulbhadani
Last active August 15, 2019 18:27
Show Gist options
  • Save rahulbhadani/f1d64042cc5a80280755cac262aa48aa to your computer and use it in GitHub Desktop.
Save rahulbhadani/f1d64042cc5a80280755cac262aa48aa to your computer and use it in GitHub Desktop.
def learn2(optimizee, unroll_train_steps, retain_graph_flag = False, reset_theta = False):
"""retain_graph_flag=False
By default, the dynamic graph is released after each loss_backward
# reset_theta = False
By default, parameters are not initialized randomly before each learning.
"""
if reset_theta == True:
theta_new = torch.empty(dim)
torch.nn.init.uniform_(theta_new, a = -1 , b = 1.0)
theta_init_new = torch.tensor(theta, dtype=torch.float32, requires_grad=True)
x = theta_init_new
print('theta_new: {}'.format(theta_new))
print('theta_init_new: {}'.format(theta_init_new))
print('x: {}'.format(x))
else:
x = theta_init
# This is prepared for LSTM optimizer to find all loss addition
# to generate calculation chart
global_loss_graph = 0
state = None
x.requires_grad = True
if optimizee.__name__ != 'Adam':
print('Adam Optimizer Not Passed.')
losses = []
for i in range(unroll_train_steps):
print('\n')
print('---------------------------------------')
print('---------------------------------------')
print('i = {}'.format(i))
loss = f(x)
print('1. global_loss_graph: {}'.format(global_loss_graph))
global_loss_graph = global_loss_graph + loss
print('2. global_loss_graph: {}'.format(global_loss_graph))
# loss.backward() computes dloss/dx for every parameter x which has
# requires_grad=True. These are accumulated into x.grad for every
# parameter x. In pseudo-code: x.grad += dloss/dx
loss.backward(retain_graph = retain_graph_flag) #The default is False, when the optimized LSTM is set to True
print('x.grad: {}'.format(x.grad))
update, state = optimizee(x.grad, state)
losses.append(loss)
x = x + update
x.retain_grad()
update.retain_grad()
if state is not None:
state = (state[0].detach(), state[1].detach())
print(' x.retain_grad(): {}'.format( x.retain_grad()))
return losses, global_loss_graph
else:
print('Adam Optimizer Passed.')
losses = []
x.requires_grad = True
optimizee = torch.optim.Adam( [x], lr = 0.1)
for i in range(unroll_train_steps):
optimizee.zero_grad()
loss = f(x)
global_loss_graph = global_loss_graph + loss
loss.backward(retain_graph = retain_graph_flag)
optimizee.step()
# detach_() Detaches the Tensor from the graph that created it,
# making it a leaf. Views cannot be detached in-place.
losses.append(loss.detach_())
return losses, global_loss_graph
def global_training(optimizee):
global_loss_list = []
adam_global_optimizer = torch.optim.Adam([{'params': optimizee.parameters()},
{'params':linear.parameters()}], lr = 0.0001)
_, global_loss_1 = learn2(LSTM_Optimizee, training_steps, retain_graph_flag=True, reset_theta=True)
print(global_loss_1)
for i in range(global_train_steps):
_, global_loss = learn2(LSTM_Optimizee, training_steps, retain_graph_flag=True, reset_theta=False)
#zero_grad() clears old gradients from the last step
# (otherwise you’d just accumulate the gradients from all
# loss.backward() calls).
adam_global_optimizer.zero_grad()
## This fixed graph is optimized every time,
# and the cache of dynamic graphs cannot be released.
global_loss.backward()
adam_global_optimizer.step()
global_loss_list.append(global_loss.detach_())
print('Global_loss: {}'.format(global_loss))
return global_loss_list
def LSTM_Optimizee(gradients, state):
'''
The Input is gradient; pyTorch requires input of torch.nn.lstm as (1, batchsize, input_dim)
'''
#unsqueeze() inserts singleton dim at position given as parameter
gradients = gradients.unsqueeze(0).unsqueeze(0)
if state is None:
state = (torch.zeros(layers, batch_size, hidden_nums),
torch.zeros(layers, batch_size, hidden_nums))
update, state = lstm2(gradients, state)
update = linear(update)
# Squeeze to make it a single batch again e.g. [1,1,5] => 5
return update.squeeze().squeeze(), state
import torch
import torch.nn as nn
from torch.autograd import Variable
dim = 10
w = torch.empty(dim)
torch.nn.init.uniform_(w, a = 0.5, b = 1.5)
theta = torch.empty(dim)
theta_init = torch.tensor(theta, dtype = torch.float32, requires_grad=True)
def f(x): #Define the function to be optimized
x = w*(x-1)
y = ((x+1)*(x+0.5)*x*(x-1))
return y.sum()
global_train_steps = 1000
layers = 2
batch_size = 1
training_steps = 15
theta = torch.empty(dim)
hidden_nums = 20
input_dim = dim
training_steps = 15
output_dim = dim
lstm2 = torch.nn.LSTM(input_dim, hidden_nums, layers)
linear = torch.nn.Linear(hidden_nums, output_dim)
global_loss_list = global_training(lstm2)
import numpy as np
import matplotlib.pyplot as plt
Global_T = np.arange(global_train_steps)
p1, = plt.plot(Global_T, global_loss_list, label='Global_graph_loss')
plt.legend(handles=[p1])
plt.title('Training LSTM optimizee by gradient descent ')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment