Last active
August 15, 2023 09:20
-
-
Save tatocaster/dd9ac131123a742a690d41768ac6bcab to your computer and use it in GitHub Desktop.
based on Nintendo game data, cluster by features and get similar games
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.cluster import KMeans | |
# Read the video games dataset into a DataFrame | |
data = pd.read_json('../data/nintendo-games.json') | |
features = data[['meta_score', 'esrb_rating', 'genres']] | |
# convert categorical features to numerical using one-hot encoding | |
features = pd.get_dummies(features, columns=['meta_score', 'esrb_rating', 'genres']) | |
# use k-means clustering to group similar games together | |
km = KMeans(n_clusters=20, n_init=10) | |
km.fit(features) | |
# add the cluster labels to the dataframe | |
data['cluster'] = km.labels_ | |
def recommend_games(favorite_games, num_recommendations=5): | |
""" | |
Recommends games based on input favorite games using KMeans clustering. | |
Parameters: | |
favorite_games (list): List of favorite game names. | |
features (list): List of feature names to use for clustering. | |
num_clusters (int): Number of clusters to use in KMeans clustering. Default is 5. | |
num_recommendations (int): Number of recommended games to return for each favorite game. Default is 10. | |
Returns: | |
dict: A set containing a list of names of recommended games from the same cluster as the game. | |
""" | |
# Identify the clusters of the favorite games | |
favorite_games_data = data[data['title'].isin(favorite_games)] | |
if favorite_games_data.empty: | |
raise ValueError("No data found for the favorite games.") | |
# Find other games in the same clusters as favorite games | |
recommended_games_set = set() | |
for favorite_game_row in favorite_games_data.itertuples(): | |
similar_games = data[data['cluster'] == favorite_game_row.cluster] | |
similar_games = similar_games[similar_games['title'] != favorite_game_row.title] # Exclude favorite game | |
similar_games = data.loc[similar_games.index] # Get the full rows of similar games | |
similar_games_sorted = similar_games.sort_values(['meta_score', 'msrp'], ascending=False) | |
recommended_games = similar_games_sorted.head(num_recommendations)['title'].tolist() | |
recommended_games_set.update(recommended_games) | |
# Remove the favorite games from the recommended games set | |
recommended_games_set.difference_update(set(favorite_games)) | |
return recommended_games_set | |
# Example usage: | |
favorite_games = [ | |
] | |
recommended_games = recommend_games(favorite_games) | |
print(recommended_games) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment