|
"""Script for converting watched movies and Wishlist CSVs from MovieLens to Letterboxd format.""" |
|
|
|
from __future__ import annotations |
|
|
|
import csv |
|
import datetime |
|
import json |
|
from typing import Any |
|
|
|
from config import ( |
|
LOGS_CSV, |
|
RATINGS_CSV, |
|
WISHLIST_CSV, |
|
LETTERBOXD_WATCHED_EXPORT_PATH, |
|
LETTERBOXD_WATCHLIST_EXPORT_PATH, |
|
) |
|
|
|
|
|
def get_movies_to_rate() -> list[dict[str, Any]]: |
|
"""Get movies that have been rated on MovieLens and return a dict for creating a CSV file for importing to Letterboxd. |
|
|
|
Rating CSV format: |
|
0: "movie_id", 1: "imdb_id", 2: "tmdb_id", 3: "rating", 4: "average_rating", 5: "title" |
|
|
|
Logs CSV format: |
|
0: "datetime", 1: "login_id", 2: "action_type", 3: "log_json" |
|
|
|
If the action_type is "rating" and the action in the log_json is "ADD", then the movie was rated on that date. |
|
|
|
Returns: |
|
dict: A dict with a list of movies to rate. The list contains dicts with the following: |
|
Title (str): The title of the movie |
|
imdbID (str): The IMDb ID of the movie |
|
tmdbID (str): The TMDb ID of the movie |
|
Rating (float): The rating of the movie (0.5-5.0, in 0.5 increments) |
|
WatchedDate (str): The date the movie was watched (YYYY-MM-DD) |
|
|
|
Raises: |
|
FileNotFoundError: If the ratings CSV is not found or is not readable |
|
""" |
|
movies = {} |
|
|
|
with open(RATINGS_CSV, newline="", encoding="utf-8") as f: |
|
reader = csv.reader(f) |
|
next(reader) |
|
for row in reader: |
|
movies[row[0]] = { |
|
"Title": row[5], |
|
"imdbID": row[1], |
|
"tmdbID": row[2], |
|
"Rating": float(row[3]), |
|
} |
|
|
|
try: |
|
with open(LOGS_CSV, newline="", encoding="utf-8") as f: |
|
reader = csv.reader(f) |
|
next(reader) |
|
for row in reader: |
|
if row[2] == "rating": |
|
log_json = json.loads(row[3]) |
|
if log_json["action"] == "ADD": |
|
movie_id = str(log_json["movieId"]) |
|
if movie_id in movies: |
|
rated_at = datetime.datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S.%f") |
|
movies[movie_id]["WatchedDate"] = rated_at.strftime("%Y-%m-%d") |
|
except FileNotFoundError: |
|
print( |
|
f"\N{CROSS MARK} WARNING: LOGS_CSV '{LOGS_CSV}' not found, WatchedDate will be set to today for all movies. " |
|
"Make sure this file exists and is readable if you want to set the WatchedDate to " |
|
"the date the movie was rated." |
|
) |
|
# set WatchedDate to today if logs CSV doesn't exist |
|
today_ymd = datetime.datetime.now().strftime("%Y-%m-%d") |
|
for movie in movies.values(): |
|
movie["WatchedDate"] = today_ymd |
|
|
|
return list(movies.values()) |
|
|
|
|
|
def get_movies_on_wishlist() -> list[dict[str, Any]]: |
|
"""Get movies that have been added to the wishlist on MovieLens and return a dict for creating a CSV file for importing to Letterboxd. |
|
|
|
Wishlist CSV format: |
|
0: "movie_id", 1: "imdb_id", 2: "tmdb_id", 3: "average_rating", 4: "title" |
|
|
|
Returns: |
|
dict: A dict with a list of movies to add to the watchlist. The list contains dicts with the following: |
|
Title (str): The title of the movie |
|
imdbID (str): The IMDb ID of the movie |
|
tmdbID (str): The TMDb ID of the movie |
|
|
|
Raises: |
|
FileNotFoundError: If the wishlist CSV is not found or is not readable |
|
""" |
|
movies = [] |
|
|
|
with open(WISHLIST_CSV, newline="", encoding="utf-8") as f: |
|
reader = csv.reader(f) |
|
next(reader) |
|
for row in reader: |
|
movies.append( |
|
{ |
|
"Title": row[4], |
|
"imdbID": row[1], |
|
"tmdbID": row[2], |
|
} |
|
) |
|
|
|
return movies |
|
|
|
|
|
def data_to_csv(filename: str, data: list[dict[str, Any]]) -> None: |
|
"""Write data to a CSV file. |
|
|
|
Args: |
|
filename (str): The name of the file to write to |
|
data (list): A list of dicts to write to the CSV file |
|
""" |
|
fieldnames = data[0].keys() |
|
with open(filename, "w", newline="", encoding="utf-8") as f: |
|
writer = csv.DictWriter(f, fieldnames=fieldnames) |
|
writer.writeheader() |
|
writer.writerows(data) |
|
|
|
|
|
class ConversionError(Exception): |
|
"""Exception raised when a conversion fails. |
|
|
|
Attributes: |
|
message (str): The message to display |
|
""" |
|
|
|
def __init__(self, message: str) -> None: |
|
self.message = message |
|
super().__init__(message) |
|
|
|
|
|
class MissingImportFileError(ConversionError): |
|
"""Exception raised when a file is missing. |
|
|
|
Attributes: |
|
list_type (str): The type of list that is missing |
|
variable_name (str): The name of the variable that is missing the filename |
|
""" |
|
|
|
def __init__(self, list_type: str, variable_name: str) -> None: |
|
self.list_type = list_type |
|
self.variable_name = variable_name |
|
super().__init__(f"{list_type} not created because {variable_name} is not set in config.py") |
|
|
|
|
|
class ImportFileNotFoundError(ConversionError): |
|
"""Exception raised when a file cannot be read. |
|
|
|
Attributes: |
|
list_type (str): The type of list that is missing |
|
filename (str): The name of the file that was not found |
|
""" |
|
|
|
def __init__(self, list_type: str, filename: str) -> None: |
|
self.list_type = list_type |
|
self.filename = filename |
|
super().__init__( |
|
f"{list_type} not created because '{filename}' does not exist or is not readable. " |
|
"Make sure this file exists and is readable if you want to export this list." |
|
) |
|
|
|
|
|
class ExportFileNotFoundError(ConversionError): |
|
"""Exception raised when a file cannot be written to. |
|
|
|
Attributes: |
|
list_type (str): The type of list that is missing |
|
filename (str): The name of the file that was not found |
|
""" |
|
|
|
def __init__(self, list_type: str, filename: str) -> None: |
|
self.list_type = list_type |
|
self.filename = filename |
|
super().__init__( |
|
f"{list_type} list not created because '{filename}' is not writable. " |
|
"Make sure the directory it is in exists and is writable if you want to export this list." |
|
) |
|
|
|
|
|
def export_and_log_watched() -> list[dict[str, Any]]: |
|
if not RATINGS_CSV: |
|
raise MissingImportFileError("Watched list", "RATINGS_CSV") |
|
|
|
try: |
|
watched = get_movies_to_rate() |
|
except FileNotFoundError: |
|
raise ImportFileNotFoundError("Watched list", RATINGS_CSV) |
|
|
|
try: |
|
data_to_csv(LETTERBOXD_WATCHED_EXPORT_PATH, watched) |
|
except FileNotFoundError: |
|
raise ExportFileNotFoundError("Watched list", LETTERBOXD_WATCHED_EXPORT_PATH) |
|
|
|
return watched |
|
|
|
|
|
def export_and_log_wishlist() -> list[dict[str, Any]]: |
|
if not WISHLIST_CSV: |
|
raise MissingImportFileError("Wishlist", "WISHLIST_CSV") |
|
|
|
try: |
|
watchlist = get_movies_on_wishlist() |
|
except FileNotFoundError: |
|
raise ImportFileNotFoundError("Wishlist", WISHLIST_CSV) |
|
|
|
try: |
|
data_to_csv(LETTERBOXD_WATCHLIST_EXPORT_PATH, watchlist) |
|
except FileNotFoundError: |
|
raise ExportFileNotFoundError("Wishlist", LETTERBOXD_WATCHLIST_EXPORT_PATH) |
|
|
|
return watchlist |
|
|
|
|
|
if __name__ == "__main__": |
|
try: |
|
watched = export_and_log_watched() |
|
print( |
|
f"\N{THUMBS UP SIGN} Created {LETTERBOXD_WATCHED_EXPORT_PATH} with {len(watched)} movies that have " |
|
"been watched and rated!" |
|
) |
|
except ConversionError as e: |
|
print(f"\N{CROSS MARK} WARNING: {e.message}") |
|
try: |
|
watchlist = export_and_log_wishlist() |
|
print( |
|
f"\N{THUMBS UP SIGN} Created {LETTERBOXD_WATCHLIST_EXPORT_PATH} with {len(watchlist)} movies from your MovieLens wishlist!" |
|
) |
|
except ConversionError as e: |
|
print(f"\N{CROSS MARK} WARNING: {e.message}") |
Hi! Thanks for this - I needed this solution. Unfortunately, it didn't initially work for me - a friend said that "it only generates the watched value if it has it in the activity log. So your first movie doesn't appear to have a rating date," which led to the script only generating a 1kb file with only a few cells filled in. Possibly because I've been using Movielens since it started, and it might not have actually been logging watch dates back then.
They came up with this solution - replace this part of the code
with open(RATINGS_CSV, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
movies[row[0]] = {
"Title": row[5],
"imdbID": row[1],
"tmdbID": row[2],
"Rating": float(row[3])
}
with this
with open(RATINGS_CSV, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
movies[row[0]] = {
"Title": row[5],
"imdbID": row[1],
"tmdbID": row[2],
"Rating": float(row[3]),
"WatchedDate" : datetime.datetime.now().strftime("%Y-%m-%d")
}
This means that when an entry doesn't have a watch date, the script just replaces it with the date that the script is run. It worked for me - although now it looks like I watched 900 movies in a day!
Anyway, hope this helps you or someone else. Thanks!