Created
June 5, 2019 18:56
-
-
Save deansublett/1fbd89ad4d5f26cd42c6bd36bdcc13d0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics.pairwise import sigmoid_kernel | |
# Compute the sigmoid kernel | |
sig = sigmoid_kernel(tfv_matrix, tfv_matrix) | |
# Reverse mapping of indices and movie titles | |
indices = pd.Series(movies_clean.index, index=movies_clean['original_title']).drop_duplicates() | |
# Credit to Ibtesam Ahmed for the skeleton code | |
def give_rec(title, sig=sig): | |
# Get the index corresponding to original_title | |
idx = indices[title] | |
# Get the pairwsie similarity scores | |
sig_scores = list(enumerate(sig[idx])) | |
# Sort the movies | |
sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True) | |
# Scores of the 10 most similar movies | |
sig_scores = sig_scores[1:11] | |
# Movie indices | |
movie_indices = [i[0] for i in sig_scores] | |
# Top 10 most similar movies | |
return movies_clean['original_title'].iloc[movie_indices] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment