Created
July 24, 2016 18:42
-
-
Save Kaixhin/a0e4d1fc323e68b6373ffd769fe23a06 to your computer and use it in GitHub Desktop.
Predicting a time series with Element-Research Torch RNN
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
-- Element-Research Torch RNN Tutorial for recurrent neural nets : let's predict time series with a laptop GPU | |
-- https://christopher5106.github.io/deep/learning/2016/07/14/element-research-torch-rnn-tutorial.html | |
--]] | |
--[[ | |
-- Part 1 | |
--]] | |
require 'rnn' | |
-- Construct RNN for h_t = σ(W_hh . h_t−1 + W_xh . X_t) | |
local r = nn.Recurrent( | |
7, -- Hidden state size | |
nn.LookupTable(10, 7), -- W_xh . X_t: 7D embedding of a word from a dictionary of 10 words | |
nn.Linear(7, 7), -- W_hh . h_t-1 | |
nn.Sigmoid(), -- Transfer function σ | |
5 -- Truncated BPTT limit | |
) | |
-- Construct RNN (alternative with more general nn.Recurrence) | |
local rm = nn.Sequential() | |
:add(nn.ParallelTable() | |
:add(nn.LookupTable(10, 7)) | |
:add(nn.Linear(7, 7))) | |
:add(nn.CAddTable()) | |
:add(nn.Sigmoid()) | |
r = nn.Recurrence(rm, 7, 1) -- Arguments are recurrent module, hidden state size and input dimension | |
-- Construct output for o_t = W_ho . h_t | |
local rr = nn.Sequential() | |
:add(r) | |
:add(nn.Linear(7, 10)) | |
:add(nn.LogSoftMax()) -- Output log probabilities | |
-- Wrap non-recurrent modules with nn.Recursor | |
local rnn = nn.Recursor(rr, 5) -- Truncated BPTT limit | |
-- Create input and target sequences | |
local inputs = torch.LongTensor({{1}, {2}, {3}, {4}, {5}}) | |
local targets = torch.LongTensor({{2}, {3}, {4}, {5}, {6}}) | |
-- Apply each element of the sequence to the RNN step by step | |
local outputs, err = {}, 0 | |
local criterion = nn.ClassNLLCriterion() | |
for step = 1, 5 do | |
outputs[step] = rnn:forward(inputs[step]) | |
err = err + criterion:forward(outputs[step], targets[step]) | |
end | |
-- Train the RNN with BPTT step by step | |
local gradOutputs, gradInputs = {}, {} | |
for step = 5, 1, -1 do | |
gradOutputs[step] = criterion:backward(outputs[step], targets[step]) | |
gradInputs[step] = rnn:backward(inputs[step], gradOutputs[step]) | |
end | |
-- Update the parameters | |
rnn:updateParameters(0.1) -- Learning rate | |
rnn:forget() -- Reset the hidden state after every training or evaluation sequence | |
rnn:zeroGradParameters() -- Reset the accumulated gradients after every training sequence | |
-- Alternatively, use nn.Sequencer to process a sequence in one step | |
rnn = nn.Sequencer(rr) | |
criterion = nn.SequencerCriterion(nn.ClassNLLCriterion()) | |
-- Perform forward and backward pass | |
outputs = rnn:forward(inputs) | |
err = criterion:forward(outputs, targets) | |
gradOutputs = criterion:backward(outputs, targets) | |
gradInputs = rnn:backward(inputs, gradOutputs) | |
-- Update the parameters | |
rnn:updateParameters(0.1) | |
-- nn.Sequencer calls forget before every forward call | |
rnn:zeroGradParameters() | |
--[[ | |
-- Part 2 | |
--]] | |
require 'gnuplot' | |
-- Create cos function to predict | |
local ii = torch.linspace(0, 200, 2000) | |
local oo = torch.cos(ii) | |
gnuplot.plot({'f(x)', ii, oo, '+-'}) | |
-- Use GPU 1 | |
local gpu = 1 | |
require 'cutorch' | |
require 'cunn' | |
cutorch.setDevice(gpu) | |
local sequence = oo:cuda() | |
-- Set up hyperparameters | |
local nIters = 2000 | |
local batchSize = 80 | |
local rho = 10 | |
local hiddenSize = 300 | |
local nIndex = 1 | |
local lr = 0.0001 | |
local nPredict = 200 | |
-- Set up network | |
rnn = nn.Sequential() | |
:add(nn.Linear(nIndex, hiddenSize)) | |
:add(nn.FastLSTM(hiddenSize, hiddenSize)) | |
:add(nn.NormStabilizer()) -- Use the norm stabilisation criterion to regularise the hidden states | |
:add(nn.Linear(hiddenSize, nIndex)) | |
:add(nn.HardTanh()) | |
rnn = nn.Sequencer(rnn):cuda() | |
rnn:training() | |
print(rnn) | |
-- Set up criterion | |
criterion = nn.MSECriterion():cuda() | |
-- Create random offsets to create sequences of length rho to train with | |
local offsets = {} | |
for i = 1, batchSize do | |
table.insert(offsets, math.ceil(math.random() * (sequence:size(1) - rho))) | |
end | |
offsets = torch.LongTensor(offsets):cuda() | |
-- Create zero targets (as only final prediction matters) | |
local gradOutputsZeroed = {} | |
for step = 1, rho do | |
gradOutputsZeroed[step] = torch.zeros(batchSize, 1):cuda() | |
end | |
-- Train | |
local iteration = 1 | |
while iteration < nIters do | |
-- Create inputs and targets | |
local inputs, targets = {}, {} | |
for step = 1, rho do | |
inputs[step] = sequence:index(1, offsets):view(batchSize, 1) -- Create input sequence using offsets | |
offsets:add(1) -- Increment offsets (for t+1 target prediction) | |
for j = 1, batchSize do | |
-- Wrap offsets around if necessary | |
if offsets[j] > sequence:size(1) then | |
offsets[j] = 1 | |
end | |
end | |
targets[step] = sequence:index(1, offsets) -- Create target sequence using incremented offsets | |
end | |
rnn:zeroGradParameters() -- Zero gradients | |
-- Forward propagate | |
local outputs = rnn:forward(inputs) | |
local err = criterion:forward(outputs[rho], targets[rho]) | |
print(string.format("Iteration %d ; NLL err = %f ", iteration, err)) | |
-- Backward propagate | |
local gradOutputs = criterion:backward(outputs[rho], targets[rho]) | |
gradOutputsZeroed[rho] = gradOutputs | |
local gradInputs = rnn:backward(inputs, gradOutputsZeroed) | |
-- Update parameters | |
rnn:updateParameters(lr) | |
iteration = iteration + 1 | |
end | |
-- Test | |
rnn:evaluate() | |
-- Create sequence to predict | |
local predict = torch.CudaTensor(nPredict) | |
for step = 1, rho do | |
predict[step] = sequence[step] | |
end | |
local start = {} | |
iteration = 0 | |
while rho + iteration < nPredict do | |
for step = 1, rho do | |
start[step] = predict:index(1, torch.LongTensor({step + iteration})):view(1, 1) | |
end | |
output = rnn:forward(start) | |
predict[iteration + rho + 1] = (output[rho]:float())[1][1] -- Retrieve prediction | |
iteration = iteration + 1 | |
end | |
-- Plot predictions | |
gnuplot.plot({'f(x)', predict, '+-'}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment