Skip to content

Instantly share code, notes, and snippets.

@erap129
Last active May 6, 2022 07:27
Show Gist options
  • Save erap129/9c463bb5f2d92d605fb8cbcf50e865ff to your computer and use it in GitHub Desktop.
Save erap129/9c463bb5f2d92d605fb8cbcf50e865ff to your computer and use it in GitHub Desktop.
img2vec = Img2Vec(cuda=False)
movie_df_with_plots_posters_filepath = f'{BASE_FOLDER}/MovieLens-1M/movie_df_with_plots_posters.csv'
if os.path.exists(movie_df_with_plots_posters_filepath):
movies_df = pd.read_csv(movie_df_with_plots_posters_filepath)
else:
def get_movie_embedding(url):
try:
response = requests.get(url)
img = Image.open(BytesIO(response.content)).convert('RGB')
embedding = img2vec.get_vec(img, tensor=True)
return embedding.squeeze().numpy().astype(float).tolist()
except (UnidentifiedImageError, OSError):
print(f'failed URL {url}')
return np.zeros(512).tolist()
movies_df['image_embedding'] = movies_df['URL'].progress_apply(lambda url: get_movie_embedding(url))
movies_df.to_csv(movie_df_with_plots_posters_filepath)
all_embeddings = np.vstack(movies_df['image_embedding'].values)
item_matrix_images_trainset_loocv = get_item_matrix_with_inner_ids(all_embeddings, movies_df, train_loocv)
cosine_sim_images_trainset_loocv = cosine_similarity(item_matrix_images_trainset_loocv,
item_matrix_images_trainset_loocv)
item_matrix_images_trainset = get_item_matrix_with_inner_ids(all_embeddings, movies_df, trainset)
cosine_sim_images_trainset = cosine_similarity(item_matrix_images_trainset, item_matrix_images_trainset)
get_algorithm_report(CustomSimKNNAlgorithm, trainset, testset, train_loocv, test_loocv, movies_df,
target_movie_id='movie_1', target_user_id='user_1', top_k=10,
algo_kwargs_trainset=dict(similarities=cosine_sim_images_trainset, sim_options={'user_based': False}),
algo_kwargs_trainset_loocv=dict(similarities=cosine_sim_images_trainset_loocv, sim_options={'user_based': False}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment