Skip to content

Instantly share code, notes, and snippets.

@MemphisMeng
Created August 12, 2020 19:16
Show Gist options
  • Save MemphisMeng/61d949e397cba1a52eaf0c3b4ba47ed3 to your computer and use it in GitHub Desktop.
Save MemphisMeng/61d949e397cba1a52eaf0c3b4ba47ed3 to your computer and use it in GitHub Desktop.
# movie profile
movie_profile = movies[['id', 'title', 'genres']]
movie_profile.rename(columns={'id': 'movieId'}, inplace=True)
genres = [item.strip() for l in all_genres for item in l ]
unique_genres = set(genres)
for genre in unique_genres:
movie_profile[genre] = 0
for i in range(len(movie_profile)):
if type(movie_profile['genres'].iloc[i]) != None.__class__:
Genres = movie_profile.iloc[i].genres.split(', ')
for g in Genres:
movie_profile[g].iloc[i] = 1
movie_profile = movie_profile.drop(columns=['title', 'genres']).set_index('movieId')
movie_profile.sort_index(axis=0, inplace=True)
# user profile
user_x_movie = pd.pivot_table(ratings, values='rating', index=['movieId'], columns = ['userId'])
user_x_movie.sort_index(axis=0, inplace=True)
userIDs = user_x_movie.columns
user_profile = pd.DataFrame(columns = movie_profile.columns)
for i in tqdm(range(len(user_x_movie.columns))):
working_df = movie_profile.mul(user_x_movie.iloc[:,i], axis=0)
# working_df.replace(0, np.NaN, inplace=True)
user_profile.loc[userIDs[i]] = working_df.mean(axis=0)
# TFIDF
df = movie_profile.sum()
idf = (len(movies)/df).apply(np.log) #log inverse of DF
TFIDF = movie_profile.mul(idf.values)
# recommendation prediction
df_predict = pd.DataFrame()
for i in tqdm(range(len(user_x_movie.columns))):
working_df = TFIDF.mul(user_profile.iloc[i], axis=1)
df_predict[user_x_movie.columns[i]] = working_df.sum(axis=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment