thomwolf’s gists

thomwolf / Hard_Sigmoid_LSTM.py

Created October 3, 2017 09:54

A pyTorch LSTM Cell with a hard sigmoid recurrent activation

	def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
	"""
	A modified LSTM cell with hard sigmoid activation on the input, forget and output gates.
	"""
	hx, cx = hidden
	gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)

	ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)

	ingate = hard_sigmoid(ingate)

thomwolf / attention_layer_pytorch.py

Last active January 25, 2021 00:51

A pyTorch attention layer for torchMoji model

	class Attention(Module):
	"""
	Computes a weighted average of channels across timesteps (1 parameter pr. channel).
	"""
	def __init__(self, attention_size, return_attention=False):
	""" Initialize the attention layer
	# Arguments:
	attention_size: Size of the attention vector.
	return_attention: If true, output will include the weight for each input token
	used for the prediction

thomwolf / attention_layer_keras.py

Created October 3, 2017 10:06

A Keras Attention Layer for DeepMoji model

	class AttentionWeightedAverage(Layer):
	"""
	Computes a weighted average of the different channels across timesteps.
	Uses 1 parameter pr. channel to compute the attention value for a single timestep.
	"""

	def __init__(self, return_attention=False, **kwargs):
	self.init = initializers.get('uniform')
	self.supports_masking = True
	self.return_attention = return_attention

thomwolf / prepare_packed_sequence.py

Created October 3, 2017 10:55

Preparer a pyTorch PackedSequence for a batch of sequences

	# input_seqs is a batch of input sequences as a numpy array of integers (word indices in vocabulary) padded with zeroas
	input_seqs = Variable(torch.from_numpy(input_seqs.astype('int64')).long())

	# First: order the batch by decreasing sequence length
	input_lengths = torch.LongTensor([torch.max(input_seqs[i, :].data.nonzero()) + 1 for i in range(input_seqs.size()[0])])
	input_lengths, perm_idx = input_lengths.sort(0, descending=True)
	input_seqs = input_seqs[perm_idx][:, :input_lengths.max()]

	# Then pack the sequences
	packed_input = pack_padded_sequence(input_seqs, input_lengths.cpu().numpy(), batch_first=True)

thomwolf / simple_pytorch_dataset.py

Created October 3, 2017 11:48

A simple pyTorch Dataset class

	class DeepMojiDataset(Dataset):
	""" A simple Dataset class.
	# Arguments:
	X_in: Inputs of the given dataset.
	y_in: Outputs of the given dataset.

	# __getitem__ output:
	(torch.LongTensor, torch.LongTensor)
	"""
	def __init__(self, X_in, y_in):

thomwolf / smart_batch_sampler.py

Created October 3, 2017 11:50

A pyTorch BatchSampler that enables large epochs on small datasets and balanced sampling from unbalanced datasets

	class DeepMojiBatchSampler(object):
	"""A Batch sampler that enables larger epochs on small datasets and
	has upsampling functionality.
	# Arguments:
	y_in: Labels of the dataset.
	batch_size: Batch size.
	epoch_size: Number of samples in an epoch.
	upsample: Whether upsampling should be done. This flag should only be
	set on binary class problems.
	seed: Random number generator seed.

thomwolf / pytorch_weight_initialization.py

Created October 3, 2017 11:54

Simple way to reproduce Keras default initialisation in a typical pyTorch NLP model

	def init_weights(self):
	"""
	Here we reproduce Keras default initialization weights to initialize Embeddings/LSTM weights
	"""
	ih = (param.data for name, param in self.named_parameters() if 'weight_ih' in name)
	hh = (param.data for name, param in self.named_parameters() if 'weight_hh' in name)
	b = (param.data for name, param in self.named_parameters() if 'bias' in name)
	nn.init.uniform(self.embed.weight.data, a=-0.5, b=0.5)
	for t in ih:
	nn.init.xavier_uniform(t)

thomwolf / bayes_by_backprop.py

Created November 30, 2017 13:27 — forked from vvanirudh/bayes_by_backprop.py

Bayes by Backprop in PyTorch (introduced in the paper "Weight uncertainty in Neural Networks", Blundell et. al. 2015)

	# Drawn from https://gist.github.com/rocknrollnerd/c5af642cf217971d93f499e8f70fcb72 (in Theano)
	# This is implemented in PyTorch
	# Author : Anirudh Vemula

	import torch
	import torch.nn as nn
	from torch.autograd import Variable
	import numpy as np

	from sklearn.datasets import fetch_mldata

thomwolf / neuralcoref.py

Last active March 22, 2018 22:04

The Neuralcoref pyTorch model

	class Model(nn.Module):
	def __init__(self, vocab_size, embed_dim, H1, H2, H3, pairs_in, single_in, drop=0.5):
	super(Model, self).__init__()
	self.embed = nn.Embedding(vocab_size, embedding_dim)
	self.drop = nn.Dropout(drop)
	self.pairs = nn.Sequential(nn.Linear(pairs_in, H1), nn.ReLU(), nn.Dropout(drop),
	nn.Linear(H1, H2), nn.ReLU(), nn.Dropout(drop),
	nn.Linear(H2, H3), nn.ReLU(), nn.Dropout(drop),
	nn.Linear(H3, 1),
	nn.Linear(1, 1))

thomwolf / get_params.py

Last active April 3, 2018 08:48

A PyTorch iterator over module parameters that allows to update module parameters (and not only the data tensor).

	def get_params(module, memo=None, pointers=None):
	""" Returns an iterator over PyTorch module parameters that allows to update parameters
	(and not only the data).
	! Side effect: update shared parameters to point to the first yield instance
	(i.e. you can update shared parameters and keep them shared)
	Yields:
	(Module, string, Parameter): Tuple containing the parameter's module, name and pointer
	"""
	if memo is None:
	memo = set()

Thomas Wolf thomwolf