Last active
May 4, 2022 16:26
-
-
Save erap129/5ea94174eaeca99c5c272f06a67569ad to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
def GetTopN(predictions, n=10, minimumRating=4.0): | |
topN = defaultdict(list) | |
for userID, movieID, actualRating, estimatedRating, _ in predictions: | |
if (estimatedRating >= minimumRating): | |
topN[userID].append((movieID, estimatedRating)) | |
for userID, ratings in topN.items(): | |
ratings.sort(key=lambda x: x[1], reverse=True) | |
topN[userID] = ratings[:n] | |
return topN | |
def HitRate(topNPredicted, leftOutPredictions): | |
hits = 0 | |
total = 0 | |
# For each left-out rating | |
for leftOut in leftOutPredictions: | |
userID = leftOut[0] | |
leftOutMovieID = leftOut[1] | |
# Is it in the predicted top 10 for this user? | |
hit = False | |
for movieID, predictedRating in topNPredicted[userID]: | |
if leftOutMovieID == movieID: | |
hit = True | |
break | |
if (hit) : | |
hits += 1 | |
total += 1 | |
# Compute overall precision | |
return hits/total | |
def get_hitrate_results(algo, train_loocv, test_loocv): | |
algo.fit(train_loocv) | |
left_out_predictions = algo.test(test_loocv) | |
loocv_anti_testset = train_loocv.build_anti_testset() | |
all_predictions = algo.test(loocv_anti_testset) | |
top_n_predicted = GetTopN(all_predictions) | |
hitrate = HitRate(top_n_predicted, left_out_predictions) | |
print(f'HitRate: {hitrate}') | |
return all_predictions | |
def get_algo_results(algo, trainset, testset): | |
algo.fit(trainset) | |
predictions = algo.test(testset) | |
accuracy.rmse(predictions) | |
def get_most_similar_movies(movies_df, movie_embeddings, trainset, target_movie_id, top_k=10): | |
inner_movie_id = trainset.to_inner_iid(target_movie_id) | |
sims = cosine_similarity(movie_embeddings, movie_embeddings) | |
target_movie_sims_sorted = [trainset.to_raw_iid(x) for x in np.argsort(sims[inner_movie_id])[::-1]] | |
most_similar_movies = movies_df.loc[target_movie_sims_sorted].iloc[:top_k] | |
return most_similar_movies | |
def filter_predictions_for_user(predictions, user_id, movies_df, top_k=10): | |
top_preds = sorted([pred for pred in predictions if pred.uid == user_id], key=lambda pred: pred.est, reverse=True)[:top_k] | |
movie_ids = [pred.iid for pred in top_preds] | |
relevant_movies = movies_df.loc[movie_ids] | |
relevant_movies['rating'] = [pred.est for pred in top_preds] | |
return relevant_movies | |
def get_algorithm_report(algo_class, trainset, testset, train_loocv, test_loocv, movies_df, target_movie_id=1, target_user_id=1, top_k=10, algo_args=[], algo_kwargs={}, calc_most_similar=True): | |
algo_inst = algo_class(*algo_args, **algo_kwargs) | |
get_algo_results(algo_inst, trainset, testset) | |
algo_inst_for_hitrate = algo_class(*algo_args, **algo_kwargs) | |
all_predictions = get_hitrate_results(algo_inst_for_hitrate, train_loocv, test_loocv) | |
if calc_most_similar: | |
if hasattr(algo_inst_for_hitrate, 'qi'): | |
sims = algo_inst_for_hitrate.qi | |
else: | |
sims = algo_inst_for_hitrate.sim | |
most_similar_movies = get_most_similar_movies(movies_df, sims, train_loocv, target_movie_id, top_k=top_k); | |
print(f'Most similar movies to {movies_df.loc[target_movie_id].movie_name}:') | |
print(tabulate(most_similar_movies.head(top_k)[['movie_name', 'genre']], headers='keys')) | |
predictions_for_user = filter_predictions_for_user(all_predictions, target_user_id, movies_df) | |
print(f'Top predictions for user {target_user_id}:') | |
print(tabulate(predictions_for_user.head(top_k)[['movie_name', 'genre']], headers='keys')) | |
class SVDWithTqdm(SVD): | |
def __init__(self, *args, **kwargs): | |
super().__init__(*args, **kwargs) | |
def test(self, testset, verbose=False): | |
# The ratings are translated back to their original scale. | |
predictions = [self.predict(uid, | |
iid, | |
r_ui_trans, | |
verbose=verbose) | |
for (uid, iid, r_ui_trans) in tqdm(testset, desc='making predictions')] | |
return predictions | |
class KNNBasicWithTqdm(KNNBasic): | |
def __init__(self, *args, **kwargs): | |
super().__init__(*args, **kwargs) | |
def test(self, testset, verbose=False): | |
# The ratings are translated back to their original scale. | |
predictions = [self.predict(uid, | |
iid, | |
r_ui_trans, | |
verbose=verbose) | |
for (uid, iid, r_ui_trans) in tqdm(testset, desc='making predictions')] | |
return predictions |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment