Skip to content

Instantly share code, notes, and snippets.

@RhetTbull
Last active May 21, 2025 15:34
Show Gist options
  • Save RhetTbull/9035ff260d123413012758252a76d82a to your computer and use it in GitHub Desktop.
Save RhetTbull/9035ff260d123413012758252a76d82a to your computer and use it in GitHub Desktop.
Generate video captions with Apple Intelligence
#!/usr/bin/env -S uv run --script
"""Generate captions for videos in Apple Photos using Apple's Media Analysis Service"""
# run with uv: `uv run https://gist.githubusercontent.com/RhetTbull/9035ff260d123413012758252a76d82a/raw/a5be13b0bcb1d5c2224ba1bb0828b652601b2482/video_captions.py`
# Following allows you to run the script with uv via `uv run video_captions.py`
# /// script
# dependencies = [
# "pyobjc-core",
# "pyobjc-framework-Photos",
# "photokit",
# ]
# ///
import json
import sys
import threading
from typing import cast
import objc
from Foundation import NSArray, NSDictionary
from Photos import PHAsset
import photokit
PLMediaAnalysisServiceRequestAdapter = objc.lookUpClass(
"PLMediaAnalysisServiceRequestAdapter"
)
# Register metadata for the method so PyObjC knows how to handle the callbacks
objc.registerMetaDataForSelector(
b"PLMediaAnalysisServiceRequestAdapter",
b"requestVideoCaptionPreferenceForAssets:withOptions:progressHandler:completionHandler:",
{
"arguments": {
2 + 2: {"type": b"@?"}, # progressHandler (block type)
2
+ 3: { # completionHandler (block type)
"callable": {
"retval": {"type": b"v"},
"arguments": {
0: {"type": b"^v"}, # block self
1: {"type": b"@"}, # results dictionary
2: {"type": b"@"}, # error
},
}
},
}
},
)
def ns_to_py(obj):
"""Recursively convert NSDictionary/NSArray to Python dict/list"""
if isinstance(obj, NSDictionary):
return {str(k): ns_to_py(obj[k]) for k in obj}
elif isinstance(obj, NSArray):
return [ns_to_py(item) for item in obj]
else:
return obj # Assume it's a scalar (str, int, float, etc.)
def generate_caption_for_video_assets(assets) -> dict[str, dict]:
"""
Generate an AI-powered caption for the given PHAsset (image only).
Args:
assets: A list of Photos.PHAsset instances for videos to caption.
Returns:
A dictionary mapping video UUIDs to their captions (as dict of various values).
"""
captions = {}
completion_error = None
event = threading.Event()
def _completion_handler(results, error):
"""Completion handler for video caption generation."""
nonlocal completion_error
if error is not None:
completion_error = error.localizedDescription()
event.set()
return
results = ns_to_py(results)
results = cast(dict, results)
pl = photokit.PhotoLibrary()
for uuid, result in results.items():
asset = pl.asset(uuid)
captions[asset.original_filename] = result
event.set()
PLMediaAnalysisServiceRequestAdapter.requestVideoCaptionPreferenceForAssets_withOptions_progressHandler_completionHandler_(
assets, # assets array
None, # options (nil)
None, # progress handler (nil)
_completion_handler, # completion handler function
)
event.wait() # wait for completion, completion handler will be called when done
if completion_error:
raise Exception(completion_error)
return captions
def get_selected_video_assets() -> list[PHAsset]:
"""
Returns a list of currently selected video assets in Photos.app.
Returns:
list: A list of PHAsset objects that are videos, or an empty list if no videos are selected
or if Photos.app is not running.
"""
selection = photokit.PhotoLibrary().selection()
if not selection:
return []
return [asset.phasset for asset in selection if asset.ismovie]
if __name__ == "__main__":
print("Getting selected video assets...", flush=True)
assets = get_selected_video_assets()
if not assets:
print("No videos selected in Photos.app.")
sys.exit(1)
print(
f"Found {len(assets)} selected video{'s' if len(assets) != 1 else ''} in Photos.app",
flush=True,
)
print("Requesting caption (this might take a moment)...", flush=True)
results = generate_caption_for_video_assets(assets)
print(json.dumps(results, indent=4))
@RhetTbull
Copy link
Author

This can be run via uv directly from the gist:
uv run https://gist.githubusercontent.com/RhetTbull/9035ff260d123413012758252a76d82a/raw/a5be13b0bcb1d5c2224ba1bb0828b652601b2482/video_captions.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment