Created
April 30, 2018 16:15
-
-
Save cemoody/4b73cd1866108455cdac169fb1d6eb4b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from torch_trainer.trainer import Trainer | |
from torch_trainer.callbacks import rms_callback | |
from torch import nn | |
from torch.optim import Adam | |
import torch.nn.functional as F | |
import numpy as np | |
import pandas as pd | |
class MF(nn.Module): | |
def __init__(self, n_users, n_items, k, n_obs, lam): | |
super(MF, self).__init__() | |
self.n_obs = n_obs | |
self.lam = lam | |
self.user_bias = nn.Embedding(n_users, 1) | |
self.item_bias = nn.Embedding(n_items, 1) | |
self.user_embed = nn.Embedding(n_users, k) | |
self.item_embed = nn.Embedding(n_items, k) | |
def forward(self, u, i, target): | |
bias = self.user_bias(u).squeeze() + self.item_bias(i).squeeze() | |
intx = (self.user_embed(u) * self.item_embed(i)).sum(dim=1) | |
return bias + intx | |
def loss(self, prediction, u, i, target): | |
n_batches = self.n_obs*1.0 / target.size()[0] | |
# log-likelihood loss of target given prediction | |
llh = F.binary_cross_entropy_with_logits(prediction, target) | |
# Explicit L2-regularization | |
reg = self.lam*((self.user_bias.weight**2.0).sum() + | |
(self.item_bias.weight**2.0).sum() + | |
(self.user_embed.weight**2.0).sum() + | |
(self.item_embed.weight**2.0).sum() | |
) / n_batches | |
return llh + reg | |
# Dataset and hyperparameters | |
url = ('https://raw.githubusercontent.com/nchah/movielens-recommender/' | |
'master/data/ml-ratings-100k-sample.csv') | |
k = 10 | |
lam = 1e-6 | |
n_epochs = 1 | |
# Read in data, pretending the ratings are binary | |
df = pd.read_csv(url) | |
u, i = df.userId.values, df.movieId.values | |
y = (df.rating.values > df.rating.mean()).astype('float') | |
n_u, n_i, n_y = int(u.max() + 1), int(i.max() + 1), len(y) | |
model = MF(n_u, n_i, k, n_y, lam) | |
optim = Adam(model.parameters()) | |
callbacks = {'rms': rms_callback} | |
t = Trainer(model, optim, batchsize=128, | |
callbacks=callbacks, seed=42) | |
for _ in range(n_epochs): | |
t.fit(u, i, y) | |
U = model.user_embed.weight.data.cpu().numpy() | |
I = model.item_embed.weight.data.cpu().numpy() | |
np.savez("vectors", U=U, I=I) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment