Last active
March 11, 2025 09:39
-
-
Save aarondill/c31cafa75d766d1ee81747d73edcb8e5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import itertools | |
from pathlib import Path | |
import subprocess | |
import json | |
from argparse import ArgumentParser | |
from datetime import datetime | |
def error(text: str): | |
return print(f"\033[91mError: {text}\033[0m") | |
def warn(text: str): | |
return print(f"\033[93mWarning: {text}\033[0m") | |
def extract_all_subtitles(file: Path): | |
result = subprocess.run(["mkvmerge", "-J", file], capture_output=True, text=True) | |
if result.returncode != 0: # Check if the command was successful | |
return error(f"cannot process file {file} {result.stderr}") | |
# Parse the JSON response | |
mkv_info = json.loads(result.stdout) | |
if "container" in mkv_info and mkv_info["container"]["supported"] == False: | |
return error(f"file {file} is not supported") | |
mkv_info["tracks"] = sorted( | |
[track for track in mkv_info["tracks"] if track["type"] == "subtitles"], | |
key=lambda x: x["id"], | |
) | |
# sort tracks by track id (to ensure stable order) | |
if len(mkv_info["tracks"]) == False: | |
return warn(f"no subtitles found in {file}") | |
default = next( | |
( | |
track | |
for track in mkv_info["tracks"] | |
if "default_track" in track["properties"] | |
and track["properties"]["default_track"] == True | |
), | |
None, | |
) | |
if default is not None: | |
print(f"Default subtitle found: {default['id']}") | |
return | |
english_tracks = [ | |
track | |
for track in mkv_info["tracks"] | |
if track["properties"]["language"] == "eng" | |
] | |
if len(english_tracks) == 0 and len(mkv_info["tracks"]) == 1: | |
if "language" in mkv_info["tracks"][0]["properties"] and not ( | |
mkv_info["tracks"][0]["properties"]["language"] in ("eng", "und") | |
): # The only track has a language property but it's not English or undefined | |
return warn(f"no English track found in {file}") | |
# If there is no English track and there is only one track, use the first track | |
english_tracks = [mkv_info["tracks"][0]] | |
elif len(english_tracks) == 0: | |
english_tracks = [ | |
track | |
for track in mkv_info["tracks"] | |
if (not "language" in track["properties"]) | |
or track["properties"]["language"] == "und" | |
] | |
track = english_tracks[0] | |
# prefer flag_hearing_impaired if there's more than one english track | |
if len(english_tracks) > 1: | |
hearing_impaired = [ | |
track | |
for track in english_tracks | |
if "flag_hearing_impaired" in track["properties"] | |
and track["properties"]["flag_hearing_impaired"] == True | |
] or [ | |
track | |
for track in english_tracks | |
if "track_name" in track["properties"] | |
and "sdh" in track["properties"]["track_name"].casefold() | |
] | |
if len(hearing_impaired) >= 1: | |
track = hearing_impaired[0] | |
if len(hearing_impaired) > 1: | |
warn( | |
f"multiple hearing impaired tracks found. Using first track ({track['id']})" | |
) | |
else: # If there's multiple English tracks, use the second track. Usually this is the hearing impaired track. | |
track = english_tracks[1] | |
warn(f"multiple English tracks found. Using second track ({track['id']})") | |
elif len(english_tracks) == 0: | |
return warn(f"no English track found in {file}") | |
track_codec = track["codec"] | |
track_language = track["properties"]["language"] | |
print( | |
f"Setting default: track {track['id']} codec: {track_codec} language: {track_language}" | |
) | |
subprocess.run( | |
[ | |
"mkvpropedit", | |
"--edit", | |
f"track:={track['properties']['uid']}", | |
"--set", | |
"flag-default=1", | |
file, | |
] | |
) | |
if __name__ == "__main__": | |
parser = ArgumentParser() | |
parser.add_argument("filename", nargs="+") | |
args = parser.parse_args() | |
files = [ | |
sorted(arg.rglob("*.[mM][kK][vV]")) if arg.is_dir() else [arg] | |
for arg in map(Path, args.filename) | |
] | |
files = list(itertools.chain.from_iterable(files)) | |
for i, file in enumerate(files): | |
print(f"\nfile {i+1}/{len(files)}: {file}") | |
extract_all_subtitles(file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment