Mateusz Bednarski mbednarski

Artificial Intelligence and machine learning passionate. Love to solve. And I do care.

mbednarski / train_model.py

Created June 10, 2017 14:40

	import click

	from random_forest import RandomForestModel
	import sys

	sys.path.append('src')
	from data.preprocess import read_processed_data


	@click.command()

mbednarski / gist:1b8ce51aad72838ff37c766076a0d853

Last active February 26, 2018 20:14

mbednarski / gist:3c3c269944ad22a0b40c3aa956dd9ab9

Created February 26, 2018 20:16

mbednarski / tokenize.py

Last active February 26, 2018 20:17

	def tokenize_corpus(corpus):
	tokens = [x.split() for x in corpus]
	return tokens

	tokenized_corpus = tokenize_corpus(corpus)

mbednarski / vocab.py

Last active May 28, 2022 15:43

	vocabulary = []
	for sentence in tokenized_corpus:
	for token in sentence:
	if token not in vocabulary:
	vocabulary.append(token)

	word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
	idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}

	vocabulary_size = len(vocabulary)

mbednarski / pairs.py

Last active May 28, 2022 15:48

	window_size = 2
	idx_pairs = []
	# for each sentence
	for sentence in tokenized_corpus:
	indices = [word2idx[word] for word in sentence]
	# for each word, threated as center word
	for center_word_pos in range(len(indices)):
	# for each window position
	for w in range(-window_size, window_size + 1):
	context_word_pos = center_word_pos + w

mbednarski / x.py

Created March 4, 2018 18:43

	def get_input_layer(word_idx):
	x = torch.zeros(vocabulary_size).float()
	x[word_idx] = 1.0
	return x

mbednarski / z1.py

Created March 6, 2018 22:40

	embedding_dims = 5
	W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True)
	z1 = torch.matmul(W1, x)

mbednarski / z2.py

Created March 6, 2018 22:46

	W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True)
	z2 = torch.matmul(W2, z1)

mbednarski / train.py

Created March 6, 2018 23:27

	embedding_dims = 5
	W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True)
	W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True)
	num_epochs = 100
	learning_rate = 0.001

	for epo in range(num_epochs):
	loss_val = 0
	for data, target in idx_pairs:
	x = Variable(get_input_layer(data)).float()