Skip to content

Instantly share code, notes, and snippets.

View thomwolf's full-sized avatar
🚂
training

Thomas Wolf thomwolf

🚂
training
View GitHub Profile
@thomwolf
thomwolf / pytorch_weight_initialization.py
Created October 3, 2017 11:54
Simple way to reproduce Keras default initialisation in a typical pyTorch NLP model
def init_weights(self):
"""
Here we reproduce Keras default initialization weights to initialize Embeddings/LSTM weights
"""
ih = (param.data for name, param in self.named_parameters() if 'weight_ih' in name)
hh = (param.data for name, param in self.named_parameters() if 'weight_hh' in name)
b = (param.data for name, param in self.named_parameters() if 'bias' in name)
nn.init.uniform(self.embed.weight.data, a=-0.5, b=0.5)
for t in ih:
nn.init.xavier_uniform(t)
@thomwolf
thomwolf / smart_batch_sampler.py
Created October 3, 2017 11:50
A pyTorch BatchSampler that enables large epochs on small datasets and balanced sampling from unbalanced datasets
class DeepMojiBatchSampler(object):
"""A Batch sampler that enables larger epochs on small datasets and
has upsampling functionality.
# Arguments:
y_in: Labels of the dataset.
batch_size: Batch size.
epoch_size: Number of samples in an epoch.
upsample: Whether upsampling should be done. This flag should only be
set on binary class problems.
seed: Random number generator seed.
@thomwolf
thomwolf / simple_pytorch_dataset.py
Created October 3, 2017 11:48
A simple pyTorch Dataset class
class DeepMojiDataset(Dataset):
""" A simple Dataset class.
# Arguments:
X_in: Inputs of the given dataset.
y_in: Outputs of the given dataset.
# __getitem__ output:
(torch.LongTensor, torch.LongTensor)
"""
def __init__(self, X_in, y_in):
@thomwolf
thomwolf / prepare_packed_sequence.py
Created October 3, 2017 10:55
Preparer a pyTorch PackedSequence for a batch of sequences
# input_seqs is a batch of input sequences as a numpy array of integers (word indices in vocabulary) padded with zeroas
input_seqs = Variable(torch.from_numpy(input_seqs.astype('int64')).long())
# First: order the batch by decreasing sequence length
input_lengths = torch.LongTensor([torch.max(input_seqs[i, :].data.nonzero()) + 1 for i in range(input_seqs.size()[0])])
input_lengths, perm_idx = input_lengths.sort(0, descending=True)
input_seqs = input_seqs[perm_idx][:, :input_lengths.max()]
# Then pack the sequences
packed_input = pack_padded_sequence(input_seqs, input_lengths.cpu().numpy(), batch_first=True)
@thomwolf
thomwolf / attention_layer_keras.py
Created October 3, 2017 10:06
A Keras Attention Layer for DeepMoji model
class AttentionWeightedAverage(Layer):
"""
Computes a weighted average of the different channels across timesteps.
Uses 1 parameter pr. channel to compute the attention value for a single timestep.
"""
def __init__(self, return_attention=False, **kwargs):
self.init = initializers.get('uniform')
self.supports_masking = True
self.return_attention = return_attention
@thomwolf
thomwolf / attention_layer_pytorch.py
Last active January 25, 2021 00:51
A pyTorch attention layer for torchMoji model
class Attention(Module):
"""
Computes a weighted average of channels across timesteps (1 parameter pr. channel).
"""
def __init__(self, attention_size, return_attention=False):
""" Initialize the attention layer
# Arguments:
attention_size: Size of the attention vector.
return_attention: If true, output will include the weight for each input token
used for the prediction
@thomwolf
thomwolf / Hard_Sigmoid_LSTM.py
Created October 3, 2017 09:54
A pyTorch LSTM Cell with a hard sigmoid recurrent activation
def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
"""
A modified LSTM cell with hard sigmoid activation on the input, forget and output gates.
"""
hx, cx = hidden
gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)
ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
ingate = hard_sigmoid(ingate)