Last active
June 21, 2022 14:36
-
-
Save spro/c87cc706625b8a54e604fb1024106556 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch import nn | |
from torch.autograd import Variable | |
import torch.nn.functional as F | |
class RNN(nn.Module): | |
def __init__(self, input_size, hidden_size, output_size, n_layers=1): | |
super(RNN, self).__init__() | |
self.input_size = input_size | |
self.hidden_size = hidden_size | |
self.output_size = output_size | |
self.n_layers = n_layers | |
self.c1 = nn.Conv1d(input_size, hidden_size, 2) | |
self.p1 = nn.AvgPool1d(2) | |
self.c2 = nn.Conv1d(hidden_size, hidden_size, 1) | |
self.p2 = nn.AvgPool1d(2) | |
self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=0.01) | |
self.out = nn.Linear(hidden_size, output_size) | |
def forward(self, inputs, hidden): | |
batch_size = inputs.size(1) | |
# Turn (seq_len x batch_size x input_size) into (batch_size x input_size x seq_len) for CNN | |
inputs = inputs.transpose(0, 1).transpose(1, 2) | |
# Run through Conv1d and Pool1d layers | |
c = self.c1(inputs) | |
p = self.p1(c) | |
c = self.c2(p) | |
p = self.p2(c) | |
# Turn (batch_size x hidden_size x seq_len) back into (seq_len x batch_size x hidden_size) for RNN | |
p = p.transpose(1, 2).transpose(0, 1) | |
p = F.tanh(p) | |
output, hidden = self.gru(p, hidden) | |
conv_seq_len = output.size(0) | |
output = output.view(conv_seq_len * batch_size, self.hidden_size) # Treating (conv_seq_len x batch_size) as batch_size for linear layer | |
output = F.tanh(self.out(output)) | |
output = output.view(conv_seq_len, -1, self.output_size) | |
return output, hidden | |
input_size = 20 | |
hidden_size = 50 | |
output_size = 7 | |
batch_size = 5 | |
n_layers = 2 | |
seq_len = 15 | |
rnn = RNN(input_size, hidden_size, output_size, n_layers=n_layers) | |
inputs = Variable(torch.rand(seq_len, batch_size, input_size)) # seq_len x batch_size x | |
outputs, hidden = rnn(inputs, None) | |
print('outputs', outputs.size()) # conv_seq_len x batch_size x output_size | |
print('hidden', hidden.size()) # n_layers x batch_size x hidden_size |
@aa1607 I know an old question but I stumbled in here 😄 think the answer is (memory) contiguity. Consider dynamic RNN :
# RNN
for each slice of time
for each sequence
multiply and add together features
# CNN
for each sequence
for for each feature
for each timestep
multiply and add together features with close timesteps
Its faster!
Awesome explaination.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, this is a great script regarding you how to reshape at will, but it highlights another issue I was having that I was wondering if you could answer ? If RNNs can be fed either batch first or sequence first, while everything else accepts exclusively batch first data, why not dispense with sequence-first rather than work it into so many implementations? I just moved from keras where they used batch first for all data, and I cant understand why pytorch makes such a priority of getting us to reshape back into S-B-F ? Also thankyou so much for the seq2seq example - its incredible.