Skip to content

Instantly share code, notes, and snippets.

@muety
Created August 1, 2024 20:36
Show Gist options
  • Save muety/11737c29e825c85389d32ba9d21d134b to your computer and use it in GitHub Desktop.
Save muety/11737c29e825c85389d32ba9d21d134b to your computer and use it in GitHub Desktop.
Script to clean up duplicates in Immich based on their file size.
#!/usr/bin/python
# Script to clean up duplicates in Immich based on their file size.
#
# Rationale: I observed that photos which I had applied the Google Photos' "Optimize" filter to, are imported in their original and edited versions,
# while the edited version usually seems to have a smaller file size, despite same resolution (lower JPEG quality, maybe?).
# This script iterates all duplicates detected by Immich itself and, given a pair of two, removes the smaller one if their file names and creations dates match
#
# Prerequisites:
# 1. Run Immich Smart Search
# 2. Run Immich duplicate detection (with threshold of 0.1)
# 3. Create API key
# 4. Run this script
import logging
from typing import List, Dict, Any
import httpx # pip install httpx
import argparse
logging.basicConfig(level=logging.INFO)
class DuplicatesCleaner:
def __init__(self, url: str, api_key: str, keep_smaller: bool = True):
self.keep_smaller: bool = keep_smaller
self.client: httpx.Client = httpx.Client(base_url=f'{url}/api', headers={
'x-api-key': api_key
})
def run(self):
self.batch_delete(self.filter_duplicates(self.fetch_duplicates()))
def fetch_duplicates(self) -> List[Dict[str, Any]]:
r = self.client.get('duplicates')
r.raise_for_status()
return r.json()
def filter_duplicates(self, duplicates: List[Dict[str, Any]]) -> List[str]:
# returns list of image ids to delete
to_delete: List[str] = []
for d in duplicates:
assets = sorted(d['assets'], key=lambda p: p['exifInfo']['fileSizeInByte'], reverse=not self.keep_smaller)
names = set([p['originalFileName'] for p in assets])
created_dates = set([p['fileCreatedAt'] for p in assets])
if len(assets) < 2:
continue
if len(names) > 1 or len(created_dates) > 1:
logging.info(f'skipping duplicate {d["duplicateId"]} as their file names or dates differ')
continue
to_delete.append(assets[0]['id'])
logging.info(f'got {len(to_delete)} duplicates')
return to_delete
def batch_delete(self, ids: List[str]):
r = self.client.request('DELETE', 'assets', json={
'force': True,
'ids': ids,
})
r.raise_for_status()
logging.info(f'deleted {len(ids)} duplicates')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--url', type=str, required=True, help='Your Immich base URL')
parser.add_argument('--api_key', type=str, required=True, help='Your Immich API Key')
parser.add_argument('--keep_larger', action='store_true', help='Whether to keep the larger (file size) of both images instead of smaller')
args = parser.parse_args()
c: DuplicatesCleaner = DuplicatesCleaner(args.url, args.api_key, not args.keep_larger)
c.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment