Skip to content

Instantly share code, notes, and snippets.

@erap129
Last active May 4, 2022 16:26
Show Gist options
  • Save erap129/5ea94174eaeca99c5c272f06a67569ad to your computer and use it in GitHub Desktop.
Save erap129/5ea94174eaeca99c5c272f06a67569ad to your computer and use it in GitHub Desktop.
from collections import defaultdict
def GetTopN(predictions, n=10, minimumRating=4.0):
topN = defaultdict(list)
for userID, movieID, actualRating, estimatedRating, _ in predictions:
if (estimatedRating >= minimumRating):
topN[userID].append((movieID, estimatedRating))
for userID, ratings in topN.items():
ratings.sort(key=lambda x: x[1], reverse=True)
topN[userID] = ratings[:n]
return topN
def HitRate(topNPredicted, leftOutPredictions):
hits = 0
total = 0
# For each left-out rating
for leftOut in leftOutPredictions:
userID = leftOut[0]
leftOutMovieID = leftOut[1]
# Is it in the predicted top 10 for this user?
hit = False
for movieID, predictedRating in topNPredicted[userID]:
if leftOutMovieID == movieID:
hit = True
break
if (hit) :
hits += 1
total += 1
# Compute overall precision
return hits/total
def get_hitrate_results(algo, train_loocv, test_loocv):
algo.fit(train_loocv)
left_out_predictions = algo.test(test_loocv)
loocv_anti_testset = train_loocv.build_anti_testset()
all_predictions = algo.test(loocv_anti_testset)
top_n_predicted = GetTopN(all_predictions)
hitrate = HitRate(top_n_predicted, left_out_predictions)
print(f'HitRate: {hitrate}')
return all_predictions
def get_algo_results(algo, trainset, testset):
algo.fit(trainset)
predictions = algo.test(testset)
accuracy.rmse(predictions)
def get_most_similar_movies(movies_df, movie_embeddings, trainset, target_movie_id, top_k=10):
inner_movie_id = trainset.to_inner_iid(target_movie_id)
sims = cosine_similarity(movie_embeddings, movie_embeddings)
target_movie_sims_sorted = [trainset.to_raw_iid(x) for x in np.argsort(sims[inner_movie_id])[::-1]]
most_similar_movies = movies_df.loc[target_movie_sims_sorted].iloc[:top_k]
return most_similar_movies
def filter_predictions_for_user(predictions, user_id, movies_df, top_k=10):
top_preds = sorted([pred for pred in predictions if pred.uid == user_id], key=lambda pred: pred.est, reverse=True)[:top_k]
movie_ids = [pred.iid for pred in top_preds]
relevant_movies = movies_df.loc[movie_ids]
relevant_movies['rating'] = [pred.est for pred in top_preds]
return relevant_movies
def get_algorithm_report(algo_class, trainset, testset, train_loocv, test_loocv, movies_df, target_movie_id=1, target_user_id=1, top_k=10, algo_args=[], algo_kwargs={}, calc_most_similar=True):
algo_inst = algo_class(*algo_args, **algo_kwargs)
get_algo_results(algo_inst, trainset, testset)
algo_inst_for_hitrate = algo_class(*algo_args, **algo_kwargs)
all_predictions = get_hitrate_results(algo_inst_for_hitrate, train_loocv, test_loocv)
if calc_most_similar:
if hasattr(algo_inst_for_hitrate, 'qi'):
sims = algo_inst_for_hitrate.qi
else:
sims = algo_inst_for_hitrate.sim
most_similar_movies = get_most_similar_movies(movies_df, sims, train_loocv, target_movie_id, top_k=top_k);
print(f'Most similar movies to {movies_df.loc[target_movie_id].movie_name}:')
print(tabulate(most_similar_movies.head(top_k)[['movie_name', 'genre']], headers='keys'))
predictions_for_user = filter_predictions_for_user(all_predictions, target_user_id, movies_df)
print(f'Top predictions for user {target_user_id}:')
print(tabulate(predictions_for_user.head(top_k)[['movie_name', 'genre']], headers='keys'))
class SVDWithTqdm(SVD):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def test(self, testset, verbose=False):
# The ratings are translated back to their original scale.
predictions = [self.predict(uid,
iid,
r_ui_trans,
verbose=verbose)
for (uid, iid, r_ui_trans) in tqdm(testset, desc='making predictions')]
return predictions
class KNNBasicWithTqdm(KNNBasic):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def test(self, testset, verbose=False):
# The ratings are translated back to their original scale.
predictions = [self.predict(uid,
iid,
r_ui_trans,
verbose=verbose)
for (uid, iid, r_ui_trans) in tqdm(testset, desc='making predictions')]
return predictions
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment