Skip to content

Instantly share code, notes, and snippets.

@DenverCoder1
Last active November 5, 2024 16:48
Show Gist options
  • Save DenverCoder1/f218260e3f5cfc6551fab88e7b07d9f0 to your computer and use it in GitHub Desktop.
Save DenverCoder1/f218260e3f5cfc6551fab88e7b07d9f0 to your computer and use it in GitHub Desktop.
Convert MovieLens CSV export to Letterboxd import format

Script for converting watched movies and Wishlist CSVs from MovieLens to Letterboxd format.

Steps:

  1. Download movielens-ratings.csv and movielens-logs.csv from https://movielens.org/profile/settings/import-export by clicking "export ratings" and "export activity logs". For watchlist import, also download movielens-wishlist.csv by clicking "export wishlist".
  2. Change the RATINGS_CSV, LOGS_CSV, and WISHLIST_CSV to the paths of the movielens-ratings.csv, movielens-logs.csv, and movielens-wishlist.csv files respectively.
  3. Set the LETTERBOXD_WATCHED_EXPORT_PATH and LETTERBOXD_WATCHLIST_EXPORT_PATH to the locations where you want to save the Letterboxd CSV files for importing.
  4. Run the script using Python 3.7+.
  5. Go to https://letterboxd.com/import/ and import the watched movies and ratings to the "Watched" list and the wishlist to the "Watchlist" list

License: MIT

# Files exported from MovieLens - Change these to the paths of your downloaded files
RATINGS_CSV = "/home/jonah/Downloads/movielens-ratings.csv"
LOGS_CSV = "/home/jonah/Downloads/movielens-logs.csv"
WISHLIST_CSV = "/home/jonah/Downloads/movielens-wishlist.csv"
# Files to create for importing to Letterboxd - Change these to the paths you want the output files to appear
LETTERBOXD_WATCHED_EXPORT_PATH = "/home/jonah/Downloads/letterboxd-watched-export.csv"
LETTERBOXD_WATCHLIST_EXPORT_PATH = "/home/jonah/Downloads/letterboxd-watchlist-export.csv"
"""Script for converting watched movies and Wishlist CSVs from MovieLens to Letterboxd format."""
from __future__ import annotations
import csv
import datetime
import json
from typing import Any
from config import (
LOGS_CSV,
RATINGS_CSV,
WISHLIST_CSV,
LETTERBOXD_WATCHED_EXPORT_PATH,
LETTERBOXD_WATCHLIST_EXPORT_PATH,
)
def get_movies_to_rate() -> list[dict[str, Any]]:
"""Get movies that have been rated on MovieLens and return a dict for creating a CSV file for importing to Letterboxd.
Rating CSV format:
0: "movie_id", 1: "imdb_id", 2: "tmdb_id", 3: "rating", 4: "average_rating", 5: "title"
Logs CSV format:
0: "datetime", 1: "login_id", 2: "action_type", 3: "log_json"
If the action_type is "rating" and the action in the log_json is "ADD", then the movie was rated on that date.
Returns:
dict: A dict with a list of movies to rate. The list contains dicts with the following:
Title (str): The title of the movie
imdbID (str): The IMDb ID of the movie
tmdbID (str): The TMDb ID of the movie
Rating (float): The rating of the movie (0.5-5.0, in 0.5 increments)
WatchedDate (str): The date the movie was watched (YYYY-MM-DD)
Raises:
FileNotFoundError: If the ratings CSV is not found or is not readable
"""
movies = {}
with open(RATINGS_CSV, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
movies[row[0]] = {
"Title": row[5],
"imdbID": row[1],
"tmdbID": row[2],
"Rating": float(row[3]),
}
try:
with open(LOGS_CSV, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
if row[2] == "rating":
log_json = json.loads(row[3])
if log_json["action"] == "ADD":
movie_id = str(log_json["movieId"])
if movie_id in movies:
rated_at = datetime.datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S.%f")
movies[movie_id]["WatchedDate"] = rated_at.strftime("%Y-%m-%d")
except FileNotFoundError:
print(
f"\N{CROSS MARK} WARNING: LOGS_CSV '{LOGS_CSV}' not found, WatchedDate will be set to today for all movies. "
"Make sure this file exists and is readable if you want to set the WatchedDate to "
"the date the movie was rated."
)
# set WatchedDate to today if logs CSV doesn't exist
today_ymd = datetime.datetime.now().strftime("%Y-%m-%d")
for movie in movies.values():
movie["WatchedDate"] = today_ymd
return list(movies.values())
def get_movies_on_wishlist() -> list[dict[str, Any]]:
"""Get movies that have been added to the wishlist on MovieLens and return a dict for creating a CSV file for importing to Letterboxd.
Wishlist CSV format:
0: "movie_id", 1: "imdb_id", 2: "tmdb_id", 3: "average_rating", 4: "title"
Returns:
dict: A dict with a list of movies to add to the watchlist. The list contains dicts with the following:
Title (str): The title of the movie
imdbID (str): The IMDb ID of the movie
tmdbID (str): The TMDb ID of the movie
Raises:
FileNotFoundError: If the wishlist CSV is not found or is not readable
"""
movies = []
with open(WISHLIST_CSV, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
movies.append(
{
"Title": row[4],
"imdbID": row[1],
"tmdbID": row[2],
}
)
return movies
def data_to_csv(filename: str, data: list[dict[str, Any]]) -> None:
"""Write data to a CSV file.
Args:
filename (str): The name of the file to write to
data (list): A list of dicts to write to the CSV file
"""
fieldnames = data[0].keys()
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
class ConversionError(Exception):
"""Exception raised when a conversion fails.
Attributes:
message (str): The message to display
"""
def __init__(self, message: str) -> None:
self.message = message
super().__init__(message)
class MissingImportFileError(ConversionError):
"""Exception raised when a file is missing.
Attributes:
list_type (str): The type of list that is missing
variable_name (str): The name of the variable that is missing the filename
"""
def __init__(self, list_type: str, variable_name: str) -> None:
self.list_type = list_type
self.variable_name = variable_name
super().__init__(f"{list_type} not created because {variable_name} is not set in config.py")
class ImportFileNotFoundError(ConversionError):
"""Exception raised when a file cannot be read.
Attributes:
list_type (str): The type of list that is missing
filename (str): The name of the file that was not found
"""
def __init__(self, list_type: str, filename: str) -> None:
self.list_type = list_type
self.filename = filename
super().__init__(
f"{list_type} not created because '{filename}' does not exist or is not readable. "
"Make sure this file exists and is readable if you want to export this list."
)
class ExportFileNotFoundError(ConversionError):
"""Exception raised when a file cannot be written to.
Attributes:
list_type (str): The type of list that is missing
filename (str): The name of the file that was not found
"""
def __init__(self, list_type: str, filename: str) -> None:
self.list_type = list_type
self.filename = filename
super().__init__(
f"{list_type} list not created because '{filename}' is not writable. "
"Make sure the directory it is in exists and is writable if you want to export this list."
)
def export_and_log_watched() -> list[dict[str, Any]]:
if not RATINGS_CSV:
raise MissingImportFileError("Watched list", "RATINGS_CSV")
try:
watched = get_movies_to_rate()
except FileNotFoundError:
raise ImportFileNotFoundError("Watched list", RATINGS_CSV)
try:
data_to_csv(LETTERBOXD_WATCHED_EXPORT_PATH, watched)
except FileNotFoundError:
raise ExportFileNotFoundError("Watched list", LETTERBOXD_WATCHED_EXPORT_PATH)
return watched
def export_and_log_wishlist() -> list[dict[str, Any]]:
if not WISHLIST_CSV:
raise MissingImportFileError("Wishlist", "WISHLIST_CSV")
try:
watchlist = get_movies_on_wishlist()
except FileNotFoundError:
raise ImportFileNotFoundError("Wishlist", WISHLIST_CSV)
try:
data_to_csv(LETTERBOXD_WATCHLIST_EXPORT_PATH, watchlist)
except FileNotFoundError:
raise ExportFileNotFoundError("Wishlist", LETTERBOXD_WATCHLIST_EXPORT_PATH)
return watchlist
if __name__ == "__main__":
try:
watched = export_and_log_watched()
print(
f"\N{THUMBS UP SIGN} Created {LETTERBOXD_WATCHED_EXPORT_PATH} with {len(watched)} movies that have "
"been watched and rated!"
)
except ConversionError as e:
print(f"\N{CROSS MARK} WARNING: {e.message}")
try:
watchlist = export_and_log_wishlist()
print(
f"\N{THUMBS UP SIGN} Created {LETTERBOXD_WATCHLIST_EXPORT_PATH} with {len(watchlist)} movies from your MovieLens wishlist!"
)
except ConversionError as e:
print(f"\N{CROSS MARK} WARNING: {e.message}")
@Mister-Six
Copy link

Mister-Six commented May 5, 2024

Hi! Thanks for this - I needed this solution. Unfortunately, it didn't initially work for me - a friend said that "it only generates the watched value if it has it in the activity log. So your first movie doesn't appear to have a rating date," which led to the script only generating a 1kb file with only a few cells filled in. Possibly because I've been using Movielens since it started, and it might not have actually been logging watch dates back then.

They came up with this solution - replace this part of the code

with open(RATINGS_CSV, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
movies[row[0]] = {
"Title": row[5],
"imdbID": row[1],
"tmdbID": row[2],
"Rating": float(row[3])
}

with this

with open(RATINGS_CSV, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
movies[row[0]] = {
"Title": row[5],
"imdbID": row[1],
"tmdbID": row[2],
"Rating": float(row[3]),
"WatchedDate" : datetime.datetime.now().strftime("%Y-%m-%d")
}

This means that when an entry doesn't have a watch date, the script just replaces it with the date that the script is run. It worked for me - although now it looks like I watched 900 movies in a day!

Anyway, hope this helps you or someone else. Thanks!

@LewkyB
Copy link

LewkyB commented Nov 5, 2024

here is a simpler script that worked fine for me today

import csv
import re

# Input and output file names
input_filename = 'input.csv'  # Replace with your actual input file name
output_filename = 'output.csv'  # The output file compatible with Letterboxd

# Open the input CSV file for reading
with open(input_filename, 'r', newline='', encoding='utf-8') as csvfile_in:
    reader = csv.DictReader(csvfile_in)

    # Open the output CSV file for writing
    with open(output_filename, 'w', newline='', encoding='utf-8') as csvfile_out:
        # Define the field names as per Letterboxd's requirements
        fieldnames = ['Title', 'Year', 'Rating']
        writer = csv.DictWriter(csvfile_out, fieldnames=fieldnames, quoting=csv.QUOTE_MINIMAL)

        # Write the header to the output CSV
        writer.writeheader()

        # Process each row in the input CSV
        for row in reader:
            title_field = row['title']
            rating_field = row['rating']

            # Use regex to extract the title and year from the 'title' field
            match = re.match(r'^(.*)\s\((\d{4})\)$', title_field)
            if match:
                title = match.group(1)
                year = match.group(2)
            else:
                # If the year is not in parentheses at the end, handle accordingly
                title = title_field
                year = ''

            # Prepare the data for writing, handling any necessary escaping
            title = title.replace('"', r'\"')  # Escape double quotes in the title

            # Write the row to the output CSV
            writer.writerow({'Title': title, 'Year': year, 'Rating': rating_field})

print(f"Conversion complete. The output file '{output_filename}' is ready for import into Letterboxd.")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment