Last active
May 31, 2023 02:41
-
-
Save onelharrison/15b25f0b4ece968be938ddaf8a5a64ad to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from knn_from_scratch import knn, euclidean_distance | |
def recommend_movies(movie_query, k_recommendations): | |
raw_movies_data = [] | |
with open('movies_recommendation_data.csv', 'r') as md: | |
# Discard the first line (headings) | |
next(md) | |
# Read the data into memory | |
for line in md.readlines(): | |
data_row = line.strip().split(',') | |
raw_movies_data.append(data_row) | |
# Prepare the data for use in the knn algorithm by picking | |
# the relevant columns and converting the numeric columns | |
# to numbers since they were read in as strings | |
movies_recommendation_data = [] | |
for row in raw_movies_data: | |
data_row = list(map(float, row[2:])) | |
movies_recommendation_data.append(data_row) | |
# Use the KNN algorithm to get the 5 movies that are most | |
# similar to The Post. | |
recommendation_indices, _ = knn( | |
movies_recommendation_data, movie_query, k=k_recommendations, | |
distance_fn=euclidean_distance, choice_fn=lambda x: None | |
) | |
movie_recommendations = [] | |
for _, index in recommendation_indices: | |
movie_recommendations.append(raw_movies_data[index]) | |
return movie_recommendations | |
if __name__ == '__main__': | |
the_post = [7.2, 1, 1, 0, 0, 0, 0, 1, 0] # feature vector for The Post | |
recommended_movies = recommend_movies(movie_query=the_post, k_recommendations=5) | |
# Print recommended movie titles | |
for recommendation in recommended_movies: | |
print(recommendation[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment