This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
train_data = pd.read_csv('train.csv', engine='python', encoding='utf-8', header = None, names=['Class Index', 'Title', 'Text']) | |
test_data = pd.read_csv('test.csv', engine='python', encoding='utf-8', header = None, names=['Class Index', 'Title', 'Text']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_triplets_hard(batch_size, X_usr, X_item, df, return_cache = False): | |
""" | |
Returns the list of three arrays to feed the model. | |
Parameters | |
---------- | |
batch_size : int | |
size of the batch. | |
X_usr : numpy array of shape (n_users, n_user_features) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print("Starting training process!") | |
print("-------------------------------------") | |
t_start = time.time() | |
for i in range(1, n_iter+1): | |
triplets = get_triplets_hard(batch_size, X_usr, X_item, df_matrix) | |
loss = network_train.train_on_batch(triplets, None) | |
n_iteration += 1 | |
if i % evaluate_every == 0: | |
print("\n ------------- \n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_posneg(df, anchor): | |
""" | |
Given a user id anchor, it gives back the max number of triplets [anchor, positive, negative] | |
available. | |
Triplets are randomly shuffled to better feed the training network. | |
Parameters | |
---------- | |
df : Pandas DataFrame | |
Dataframe containing ratings, having user id as rows, movie id as columns |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def build_model(n_users, n_items, emb_dim = 30): | |
''' | |
Define the Keras Model for training | |
Parameters | |
---------- | |
n_users : int | |
number of users | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def build_embedding(df, features, emb_dim = 10, name = 'embedding_layer'): | |
''' | |
Define the embedding neural network to encode features in a emb_dim-dimensional vector. | |
Parameters | |
---------- | |
df : pandas DataFrame | |
dataframe containing input metadata | |
features : list of str |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TripletLossLayer(Layer): | |
""" | |
Layer object to minimise the triplet loss. | |
Here we implement the Bayesian Personal Ranking triplet loss. | |
""" | |
def __init__(self, **kwargs): | |
super(TripletLossLayer, self).__init__(**kwargs) | |
def bpr_triplet_loss(self, inputs): | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_matrix = df_rating.pivot(index='UserId', columns='MovieId', values='Rating') |
NewerOlder