Skip to content

Instantly share code, notes, and snippets.

normal_predictor = NormalPredictor()
get_algo_results(normal_predictor, trainset, testset)
get_hitrate_results(normal_predictor, train_loocv, test_loocv)
from collections import defaultdict
def GetTopN(predictions, n=10, minimumRating=4.0):
topN = defaultdict(list)
for userID, movieID, actualRating, estimatedRating, _ in predictions:
if (estimatedRating >= minimumRating):
topN[userID].append((movieID, estimatedRating))
for userID, ratings in topN.items():
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['user_id', 'movie_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=.25)
LOOCV = LeaveOneOut(n_splits=1, random_state=1)
train_loocv, test_loocv = list(LOOCV.split(data))[0]
movies_df = movies_df[~pd.isna(movies_df['movie_plot'])]
ratings_df = ratings_df[ratings_df['movie_id'].isin(movies_df.index)]
We can make this file beautiful and searchable if this error is corrected: Unclosed quoted field in line 2.
,movie_name,movie_plot
0,Toy Story (1995),"A group of living toys, who assume lifelessness around humans, are preparing to move into a new house with their owner Andy Davis, his sister Molly and their single mother. The toys become uneasy when Andy has his birthday party a week early; to calm them, Sheriff Woody, Andy's favorite toy and their leader, sends Sarge and his green army men to spy on the gift opening with a baby monitor. The other toys (which include Mr. Potato Head, Slinky Dog, Rex the tyrannosaur, Hamm the piggy bank, and Bo Peep the porcelain doll) are relieved when Andy receives nothing that could replace them. Andy then receives a last-minute surprise gift – a Buzz Lightyear action figure who believes he is a real space ranger. Buzz impresses the other toys with his various features and becomes Andy's new favorite, making Woody jealous.The day before the move, Andy's family plans for a dinner at Pizza Planet, where Andy is allowed to bring along only one toy. To ensure Andy chooses him and not
def get_wikipedia_page_name(raw_name):
names = wikipedia.search(raw_name)
if len(names) == 0:
return ''
else:
return names[0]
def get_movie_plot(page_name):
try:
try:
posters_df = pd.read_csv(f'{BASE_FOLDER}/MovieLens-1M/movie_poster.csv',
header=None, names=['URL'])
print(posters_df.head())
movies_df = movies_df.join(posters_df, how='inner')
sampled_movie_df = movies_df.sample(n=8)
images = sampled_movie_df['URL'].tolist()
names = sampled_movie_df['movie_name'].tolist()
ipyplot.plot_images(images, names)
rating_by_genre_df = ratings_df.join(movies_df_exploded, on='movie_id').\
groupby('genre').agg({'rating': ['mean', 'count']}).sort_values(('rating', 'mean')).reset_index()
rating_by_genre_df.columns = ['_'.join(col).strip() for col in rating_by_genre_df.columns.values]
px.bar(rating_by_genre_df, x='genre_', y='rating_mean', height=300)
combined_ratings_df = pd.merge(pd.merge(movies_df_exploded.rename_axis('movie_id'), ratings_df, on='movie_id'), users_df, on='user_id')
combined_ratings_data = combined_ratings_df.groupby(['genre', 'gender']).agg({'rating': ['mean', 'count']}).reset_index()
combined_ratings_data.columns = [' '.join(col).strip() for col in combined_ratings_data.columns.values]
combined_ratings_data.loc[combined_ratings_data['gender'] == 'F', 'rating count'] /= len(combined_ratings_df[combined_ratings_df['gender'] == 'F'])
combined_ratings_data.loc[combined_ratings_data['gender'] == 'M', 'rating count'] /= len(combined_ratings_df[combined_ratings_df['gender'] == 'M'])
px.bar(combined_ratings_data, x='genre', y='rating count', color='gender', barmode='group')
user_id movie_id rating time
0 1 1193 5 978300760
1 1 661 3 978302109
2 1 914 3 978301968
3 1 3408 4 978300275
4 1 2355 5 978824291