Skip to content

Instantly share code, notes, and snippets.

@aarondill
Last active March 11, 2025 09:39
Show Gist options
  • Save aarondill/c31cafa75d766d1ee81747d73edcb8e5 to your computer and use it in GitHub Desktop.
Save aarondill/c31cafa75d766d1ee81747d73edcb8e5 to your computer and use it in GitHub Desktop.
import argparse
import itertools
from pathlib import Path
import subprocess
import json
from argparse import ArgumentParser
from datetime import datetime
def error(text: str):
return print(f"\033[91mError: {text}\033[0m")
def warn(text: str):
return print(f"\033[93mWarning: {text}\033[0m")
def extract_all_subtitles(file: Path):
result = subprocess.run(["mkvmerge", "-J", file], capture_output=True, text=True)
if result.returncode != 0: # Check if the command was successful
return error(f"cannot process file {file} {result.stderr}")
# Parse the JSON response
mkv_info = json.loads(result.stdout)
if "container" in mkv_info and mkv_info["container"]["supported"] == False:
return error(f"file {file} is not supported")
mkv_info["tracks"] = sorted(
[track for track in mkv_info["tracks"] if track["type"] == "subtitles"],
key=lambda x: x["id"],
)
# sort tracks by track id (to ensure stable order)
if len(mkv_info["tracks"]) == False:
return warn(f"no subtitles found in {file}")
default = next(
(
track
for track in mkv_info["tracks"]
if "default_track" in track["properties"]
and track["properties"]["default_track"] == True
),
None,
)
if default is not None:
print(f"Default subtitle found: {default['id']}")
return
english_tracks = [
track
for track in mkv_info["tracks"]
if track["properties"]["language"] == "eng"
]
if len(english_tracks) == 0 and len(mkv_info["tracks"]) == 1:
if "language" in mkv_info["tracks"][0]["properties"] and not (
mkv_info["tracks"][0]["properties"]["language"] in ("eng", "und")
): # The only track has a language property but it's not English or undefined
return warn(f"no English track found in {file}")
# If there is no English track and there is only one track, use the first track
english_tracks = [mkv_info["tracks"][0]]
elif len(english_tracks) == 0:
english_tracks = [
track
for track in mkv_info["tracks"]
if (not "language" in track["properties"])
or track["properties"]["language"] == "und"
]
track = english_tracks[0]
# prefer flag_hearing_impaired if there's more than one english track
if len(english_tracks) > 1:
hearing_impaired = [
track
for track in english_tracks
if "flag_hearing_impaired" in track["properties"]
and track["properties"]["flag_hearing_impaired"] == True
] or [
track
for track in english_tracks
if "track_name" in track["properties"]
and "sdh" in track["properties"]["track_name"].casefold()
]
if len(hearing_impaired) >= 1:
track = hearing_impaired[0]
if len(hearing_impaired) > 1:
warn(
f"multiple hearing impaired tracks found. Using first track ({track['id']})"
)
else: # If there's multiple English tracks, use the second track. Usually this is the hearing impaired track.
track = english_tracks[1]
warn(f"multiple English tracks found. Using second track ({track['id']})")
elif len(english_tracks) == 0:
return warn(f"no English track found in {file}")
track_codec = track["codec"]
track_language = track["properties"]["language"]
print(
f"Setting default: track {track['id']} codec: {track_codec} language: {track_language}"
)
subprocess.run(
[
"mkvpropedit",
"--edit",
f"track:={track['properties']['uid']}",
"--set",
"flag-default=1",
file,
]
)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("filename", nargs="+")
args = parser.parse_args()
files = [
sorted(arg.rglob("*.[mM][kK][vV]")) if arg.is_dir() else [arg]
for arg in map(Path, args.filename)
]
files = list(itertools.chain.from_iterable(files))
for i, file in enumerate(files):
print(f"\nfile {i+1}/{len(files)}: {file}")
extract_all_subtitles(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment