Dimitris Poulopoulos dpoulopoulos

⛺

Work from home

Senior ML Engineer

dpoulopoulos / metaflow_2.py

Created February 22, 2020 17:42

Convenient functions to calculate prime, odd and even numbers.

	def check_prime(x):
	"""
	Convenient function that checks if a number is prime.
	"""
	if x > 1:
	for i in range(2, x):
	if (x % i) == 0:
	return False
	else:
	return True

dpoulopoulos / metaflow_3.py

Last active February 22, 2020 18:27

Find prime, odd and even numbers in the dataset.

	import numpy as np

	from metaflow import FlowSpec, step


	class CheckNumbers(FlowSpec):
	@step
	def start(self):
	"""
	Initializes a random dataset.

dpoulopoulos / metaflow_4.py

Last active February 22, 2020 18:26

Nesting branches.

	import numpy as np

	from metaflow import FlowSpec, Parameter, step


	class CheckNumbers(FlowSpec):
	cores = Parameter('cores',
	help="Parallelize the operation in that many CPU cores.",
	default=4)

dpoulopoulos / nmf_1.py

Last active February 26, 2020 09:42

Load wiki movie and English first names data set.

	import pandas as pd

	# load Wikipedia Movie Plots Dataset
	df = pd.read_csv('wiki_plots.csv')
	# load the English names dataset
	names_df = pd.read_csv('first_names.all.txt', names=['names'], header=0)
	# keep only the relevant columns
	df = df[['Title', 'Plot']]
	# sample 50% of the movies
	df = df.sample(frac=.5)

dpoulopoulos / nmf_2.py

Created February 26, 2020 10:06

Create the movie-word matrix.

	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

	# get plots into a list
	plots = df['Plot'].tolist()

	# get the stop words
	names = names_df['names'].tolist()
	stop_words = list(ENGLISH_STOP_WORDS) + names

dpoulopoulos / nmf_3.py

Created February 26, 2020 10:11

NMF factorization of matrix.

	from sklearn import decomposition

	# instantiate the NMF decomposition
	nmf = decomposition.NMF(n_components=18, random_state=1)
	# get the W and H matrices
	W = nmf.fit_transform(vectors)
	H = nmf.components_

dpoulopoulos / nmf_4.py

Last active February 26, 2020 16:35

	def get_top_words(topic, k=10):
	return [vocab[i] for i in np.argsort(topic)[:-k-1:-1]]

	def get_topics(matrix, k=10):
	topic_words = ([get_top_words(t, k) for t in matrix])
	return [' '.join(t) for t in topic_words]

	# get the k most important words for each topic
	get_topics(H)

dpoulopoulos / incremental_recommender_conf_1.py

Last active March 8, 2020 21:34

Create the preference column.

	# more than 3 -> 1, less than 5 -> 0
	data_df['preference'] = np.where(data_df['rating'] > 3, 1, 0)
	data_df.head()

dpoulopoulos / incremental_recommender_conf_2.py

Created February 29, 2020 11:47

Definition of the confidence function for icf.

	def conf_func(x: torch.tensor, a: float = 1) -> torch.tensor:
	x[x == 5.] = a * 1.
	x[x == 4.] = a * .5
	x[x == 3.] = a * .01
	x[x == 2.] = a * .5
	x[x == 1.] = a * 1.
	return x.float()

dpoulopoulos / incremental_recommender_conf_3.py

Last active March 2, 2020 08:16

Model definition.

	# local
	net = SimpleCF(n_users, n_movies, factors=FACTORS, init=torch.nn.init.uniform_,
	a=0., b=.1, binary=True)
	objective = FlatBCELoss()
	optimizer = SGD(net.parameters(), lr=6e-2)
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	model = Step(net, objective, optimizer, conf_func=conf_func, device=device)