Skip to content

Instantly share code, notes, and snippets.

View dpoulopoulos's full-sized avatar
Work from home

Dimitris Poulopoulos dpoulopoulos

Work from home
  • Mozilla AI
  • Athens, Greece
  • 07:08 (UTC +03:00)
View GitHub Profile
@dpoulopoulos
dpoulopoulos / metaflow_2.py
Created February 22, 2020 17:42
Convenient functions to calculate prime, odd and even numbers.
def check_prime(x):
"""
Convenient function that checks if a number is prime.
"""
if x > 1:
for i in range(2, x):
if (x % i) == 0:
return False
else:
return True
@dpoulopoulos
dpoulopoulos / metaflow_3.py
Last active February 22, 2020 18:27
Find prime, odd and even numbers in the dataset.
import numpy as np
from metaflow import FlowSpec, step
class CheckNumbers(FlowSpec):
@step
def start(self):
"""
Initializes a random dataset.
@dpoulopoulos
dpoulopoulos / metaflow_4.py
Last active February 22, 2020 18:26
Nesting branches.
import numpy as np
from metaflow import FlowSpec, Parameter, step
class CheckNumbers(FlowSpec):
cores = Parameter('cores',
help="Parallelize the operation in that many CPU cores.",
default=4)
@dpoulopoulos
dpoulopoulos / nmf_1.py
Last active February 26, 2020 09:42
Load wiki movie and English first names data set.
import pandas as pd
# load Wikipedia Movie Plots Dataset
df = pd.read_csv('wiki_plots.csv')
# load the English names dataset
names_df = pd.read_csv('first_names.all.txt', names=['names'], header=0)
# keep only the relevant columns
df = df[['Title', 'Plot']]
# sample 50% of the movies
df = df.sample(frac=.5)
@dpoulopoulos
dpoulopoulos / nmf_2.py
Created February 26, 2020 10:06
Create the movie-word matrix.
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
# get plots into a list
plots = df['Plot'].tolist()
# get the stop words
names = names_df['names'].tolist()
stop_words = list(ENGLISH_STOP_WORDS) + names
@dpoulopoulos
dpoulopoulos / nmf_3.py
Created February 26, 2020 10:11
NMF factorization of matrix.
from sklearn import decomposition
# instantiate the NMF decomposition
nmf = decomposition.NMF(n_components=18, random_state=1)
# get the W and H matrices
W = nmf.fit_transform(vectors)
H = nmf.components_
def get_top_words(topic, k=10):
return [vocab[i] for i in np.argsort(topic)[:-k-1:-1]]
def get_topics(matrix, k=10):
topic_words = ([get_top_words(t, k) for t in matrix])
return [' '.join(t) for t in topic_words]
# get the k most important words for each topic
get_topics(H)
@dpoulopoulos
dpoulopoulos / incremental_recommender_conf_1.py
Last active March 8, 2020 21:34
Create the preference column.
# more than 3 -> 1, less than 5 -> 0
data_df['preference'] = np.where(data_df['rating'] > 3, 1, 0)
data_df.head()
@dpoulopoulos
dpoulopoulos / incremental_recommender_conf_2.py
Created February 29, 2020 11:47
Definition of the confidence function for icf.
def conf_func(x: torch.tensor, a: float = 1) -> torch.tensor:
x[x == 5.] = a * 1.
x[x == 4.] = a * .5
x[x == 3.] = a * .01
x[x == 2.] = a * .5
x[x == 1.] = a * 1.
return x.float()
# local
net = SimpleCF(n_users, n_movies, factors=FACTORS, init=torch.nn.init.uniform_,
a=0., b=.1, binary=True)
objective = FlatBCELoss()
optimizer = SGD(net.parameters(), lr=6e-2)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Step(net, objective, optimizer, conf_func=conf_func, device=device)