Aleksandra Deis Lexie88rus

🏡

WFH

Data Scientist

Lexie88rus / sample_titles.py

Last active August 31, 2019 00:12

Sample titles from the model

	# Sample title from the trained model
	def sample():
	num_words = 10

	# Initialize input step and hidden state
	input = torch.zeros(1, 1, vocab_size)
	hidden = (torch.zeros(1, 1, n_hidden).to(device), torch.zeros(1, 1, n_hidden).to(device))

	i = 0
	output_word = None

Lexie88rus / training_loop.py

Last active August 31, 2019 00:03

Train the model

	import time
	import numpy as np
	import math
	import matplotlib.pyplot as plt

	# Set up the number of iterations, printing and plotting options
	n_iters = 1100000
	print_every = 1000
	plot_every = 1000

Lexie88rus / train.py

Created August 30, 2019 23:56

Training procedure for LSTM model

	# Define training procedure
	def train(sequence, target, device):
	# Move tensors to device
	hidden = rnn.initHidden(device)
	sequence = sequence.to(device)
	target = target.to(device)

	rnn.zero_grad()

	# Forward step

Lexie88rus / setup.py

Created August 30, 2019 21:12

Setup hyperparameters and training device

	# setup learning rate and loss function
	learning_rate = 0.005
	criterion = nn.NLLLoss()

	# device to use (GPU if available, CPU otherwise)
	device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

Lexie88rus / index_from_tensor.py

Created August 30, 2019 21:05

Function to convert an index from the vocabulary into tensor

	# Define a function to convert tensor into index in vocabulary
	def indexFromTensor(target):
	'''
	Function returns tensor containing target index given tensor representing target word
	'''
	top_n, top_i = target.topk(1)
	target_index = top_i[0].item()

	target_index_tensor = torch.zeros((1), dtype = torch.long)
	target_index_tensor[0] = target_index

Lexie88rus / word_from_output.py

Created August 30, 2019 21:01

Function to convert model output into a word

	# Define a function which converts output into word
	def wordFromOutput(output):
	'''
	Functions returns an index from the vocabulary and the corresponding word
	'''
	top_n, top_i = output.topk(1)
	category_i = top_i[0].item()
	return [k for (k, v) in vocab.items() if v == category_i], category_i

Lexie88rus / create_model.py

Last active August 30, 2019 20:59

Create and initialize LSTM model with PyTorch

	# import PyTorch
	import torch
	import torch.nn as nn

	# Create LSTM
	class SimpleLSTM(nn.Module):
	'''
	Simple LSTM model to generate kernel titles.
	Arguments:
	- input_size - should be equal to the vocabulary size

Lexie88rus / generate_sequences.py

Created August 30, 2019 20:54

Generation of sequences out of kernel titles

	# Generate sequences out of titles:

	# Define sequence length
	sequence_length = 3

	# Generate sequences
	def generate_sequences(titles):
	sequences = []
	targets = []
	# Loop for all selected titles

Lexie88rus / encode_words.py

Created August 30, 2019 20:49

Encode words as tensors

	import torch

	# Translate word to an index from vocabulary
	def wordToIndex(word):
	if (word != end_of_sentence):
	word = clean_title(word)
	return vocab[word]

	# Translate word to 1-hot tensor
	def wordToTensor(word):

Lexie88rus / create_vocabulary.py

Last active August 30, 2019 16:04

Create a vocabulary for kernel titles

	def create_vocabulary(titles):
	'''
	Function to create a vocabulary out of a list of titles
	'''
	vocab = set()

	for title in titles:
	if (clean_title(title) != ''):
	words = extract_words(title)
	vocab.update(words)