This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# required libraries - numpy, pandas, pytorch | |
import numpy as np | |
import pandas as pd | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
import random | |
# laoding the table as a pandas dataframe | |
ratings = pd.read_csv('ratings.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# required libraries - numpy, pandas, pytorch | |
import numpy as np | |
import pandas as pd | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
import random |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# laoding the table as a pandas dataframe | |
ratings = pd.read_csv('ratings.csv') | |
# getting the three column names from a pandas dataframe | |
user_col, item_col, rating_col = ratings.columns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this function returns a python dictionary | |
# which maps each id to a corresponding index value | |
def list_2_dict(id_list:list): | |
d={} | |
for id, index in zip(id_list, range(len(id_list))): | |
d[id] = index | |
return d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# splits ratings dataframe to training and validation dataframes | |
def get_data(ratings, valid_pct:float = 0.2): | |
# shuffle the indexes | |
ln = random.sample(range(0, len(ratings)), len(ratings)) | |
# split based on the given validation set percentage | |
part = int(len(ln)*valid_pct) | |
valid_index = ln[0:part] | |
train_index = ln[part:] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# get a batch -> (user, item and rating arrays) from the dataframe | |
def get_batch(ratings, start:int, end:int): | |
return ratings[user_col][start:end].values, ratings[item_col][start:end].values, ratings[rating_col][start:end].values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# get list of unique user ids | |
users = sorted(list(set(ratings[user_col].values))) | |
# get list of unique item ids | |
items = sorted(list(set(ratings[item_col].values))) | |
# generate dict of correponding indexes for the user ids | |
user2idx = list_2_dict(users) | |
# generate dict of correponding indexes for the item ids |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# neural net based on Embedding matrices | |
# model reference -> https://github.com/fastai/fastai/ | |
class EmbeddingModel(nn.Module): | |
def __init__(self, n_factors, n_users, n_items, y_range, initialise = 0.01): | |
super().__init__() | |
self.y_range = y_range | |
self.u_weight = nn.Embedding(n_users, n_factors) | |
self.i_weight = nn.Embedding(n_items, n_factors) | |
self.u_bias = nn.Embedding(n_users, 1) | |
self.i_bias = nn.Embedding(n_items, 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a model object | |
# y_range has been extended(0-11) than required(1-10) to make the | |
# values lie in the linear region of the sigmoid function | |
model = EmbeddingModel(10, len(users), len(items), [0,11], initialise = 0.01).cuda() | |
# split the data, returns a list [train, valid] | |
data = get_data(ratings, 0.1) | |
# loss = mean((target_rating - predicted_rating)**2) | |
loss_function = nn.MSELoss() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def train(epochs = 10, bs = 64): | |
for epoch in range(epochs): | |
# training the model | |
i=0 | |
total_loss = 0.0 | |
ct = 0 | |
while i < len(data[0]): | |
x1,x2,y = get_batch(data[0],i,i+bs) | |
i+=bs |
OlderNewer