This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Treinando algoritmo obtido com menor MAE | |
algo_svd_mae = gs.best_estimator['mae'] | |
algo_svd_mae.fit(full_trainset) | |
print(get_rec_movies('Esposa', algo_svd_mae)) #Lista de filmes | |
# Treinando algoritmo obtido com menor RMSE | |
algo_svd_rmse = gs.best_estimator['rmse'] | |
algo_svd_rmse.fit(full_trainset) | |
print(get_rec_movies('Esposa', algo_svd_rmse)) #Lista de filmes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Obtém os resultados do Grid Search | |
df_results = pd.DataFrame.from_dict(gs.cv_results) | |
df_results.columns = df_results.columns.str.replace('param_','') | |
# Grafica os mapas de calor | |
n_epochs = len(param_grid['n_epochs']) | |
fig, axes = plt.subplots(nrows=n_epochs, ncols=3, figsize=(22, 6*n_epochs)) | |
for ax_row, n_epoch in zip(axes, param_grid['n_epochs']): | |
for ax, metric in zip(ax_row, ['mae', 'rmse', 'time']): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_rec_movies(user_id:str, algoritmo, n_top: int =10) -> pd.DataFrame: | |
""" | |
Obtém uma lista de recomendação para N filmes para um | |
usuário. | |
Parameters: | |
user_id: o ID do usuário | |
n_top: O número de filmes desejados | |
Returns: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
algo_base=BaselineOnly() | |
algo_base.fit(full_trainset) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Implementação baseada em | |
# https://github.com/NicolasHug/Surprise/blob/master/examples/benchmark.py | |
set_cell_seed(MY_SEED) | |
# Os algortimos para verificar | |
classes = (SVD, BaselineOnly) | |
kf = KFold(random_state=MY_SEED) # certifica que as dobras serão as mesmas | |
table = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define a grade de hiperparâmetros | |
param_grid = { | |
'n_factors': [10, 100, 200, 400], | |
'lr_all': [0.002, 0.005, 0.007, 0.009], | |
'n_epochs':[20, 100, 200, 400] | |
} | |
# Ajusta os modelos na grade com Validação Cruzada | |
gs = GridSearchCV(SVD, param_grid, measures=['mae', 'rmse'], cv=5, n_jobs=-1) | |
gs.fit(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
reader = Reader(rating_scale=(0.5, 5)) | |
data = Dataset.load_from_df(df_ratings[['userId', 'movieId', 'rating']], | |
reader) | |
full_trainset = data.build_full_trainset() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def add_rating(userId:str, movie_name:str, rating:float): | |
""" | |
Adiciona a nota de um userId referente a filme. | |
Parameters: | |
userId: Id do usuário. | |
movie_name: Nome do filme. Aceita Expressão Regular | |
rating: Nota de 0.5 a 5 de 0.5 em 0.5. | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataset = 'ml-latest-small' | |
# Carrega as notas | |
df_ratings = pd.read_csv(f'{dataset}/ratings.csv', | |
dtype={'userId':str, 'movieId':str, 'rating':float}) | |
df_ratings.drop('timestamp', axis=1, inplace=True) | |
# Carrega informação dos filmes: | |
df_movies = pd.read_csv(f'{dataset}/movies.csv', dtype=str) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import datetime | |
import random | |
import pandas as pd | |
import numpy as np | |
from surprise import SVD, BaselineOnly, Reader, Dataset | |
from surprise.model_selection import GridSearchCV, cross_validate, KFold | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
%matplotlib inline |
NewerOlder