Skip to content

Instantly share code, notes, and snippets.

@nburrus
Created March 21, 2025 09:10
Show Gist options
  • Save nburrus/6c13fc8c40d72733a06cc863ed7a3b5e to your computer and use it in GitHub Desktop.
Save nburrus/6c13fc8c40d72733a06cc863ed7a3b5e to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3
import os
import json
import re
import argparse
import shutil
import logging
import sys
# Usage:
# First generate a file_list.txt
# cd "Google Photos"
# find . -type f > file_list.txt
# Then run the script
# consolidate_takeout.py . file_list.txt album_name
def parse_arguments():
parser = argparse.ArgumentParser(description='Process a specific album in a takeout directory and a file list.')
parser.add_argument('takeout_dir', type=str, help='The directory containing the takeout files.')
parser.add_argument('file_list', type=str, help='The path to the file list (file_list.txt).')
parser.add_argument('album_name', type=str, help='The name of the album to process.')
return parser.parse_args()
def consolidate_album(album_dir, file_list):
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
# Read the file list into a set for quick lookup
# file_list has full paths relative to the album directory
with open(file_list, 'r') as f:
file_set = set(line.strip() for line in f)
# Check if the folder name starts with 20XX, if so get the year.
# We'll use it to restrict the search for the same picture name as
# we often have multiple pictures with the same name in different years.
album_year = re.match(r'2\d{3}', os.path.basename(album_dir)).group() if re.match(r'2\d{3}', os.path.basename(album_dir)) else None
print (f"Album year: {album_year}")
# Iterate over files in the album directory
for filename in os.listdir(album_dir):
if filename.endswith('.supplemental-metadata.json'):
image_file = filename.replace('.supplemental-metadata.json', '')
image_path = os.path.join(album_dir, image_file)
# Check if the image file exists
if not os.path.exists(image_path):
# Check for a unique match in the file list
matches = [f for f in file_set if os.path.basename(f) == image_file and (album_year is None or album_year in f)]
if len(matches) == 1:
# If there is a single match, it must be the right one, just copy it and we're done.
shutil.copy(matches[0], album_dir)
logging.info(f"[SUCCESS] copied {matches[0]} to {album_dir}")
elif len(matches) > 1:
# Multiple matches, we'll need to manually review which ones correspond to the
# album.
logging.error(f"Multiple matches found for {image_file} in file list: {matches}")
else:
logging.error(f"No match found for {image_file} in file list.")
def main():
args = parse_arguments()
takeout_dir = args.takeout_dir
album_name = args.album_name
file_list = args.file_list
# Process the specified album directory
album_dir = os.path.join(takeout_dir, album_name)
if os.path.isdir(album_dir):
consolidate_album(album_dir, file_list)
else:
logging.error(f"The specified album directory {album_dir} does not exist.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment