aarondill · March 11, 2025 09:39
diff --git a/subtitle.py b/subtitle.py
 import argparse
 import itertools
 from pathlib import Path
 import subprocess
 import json
 from argparse import ArgumentParser
 from datetime import datetime


 def error(text: str):
    return print(f"\033[91mError: {text}\033[0m")


 def warn(text: str):
    return print(f"\033[93mWarning: {text}\033[0m")


 def extract_all_subtitles(file: Path):
    result = subprocess.run(["mkvmerge", "-J", file], capture_output=True, text=True)
    if result.returncode != 0:  # Check if the command was successful
        return error(f"cannot process file {file} {result.stderr}")

    # Parse the JSON response
    mkv_info = json.loads(result.stdout)
    if "container" in mkv_info and mkv_info["container"]["supported"] == False:
        return error(f"file {file} is not supported")

    mkv_info["tracks"] = sorted(
        [track for track in mkv_info["tracks"] if track["type"] == "subtitles"],
        key=lambda x: x["id"],
    )
    # sort tracks by track id (to ensure stable order)
    if len(mkv_info["tracks"]) == False:
        return warn(f"no subtitles found in {file}")

    default = next(
        (
            track
            for track in mkv_info["tracks"]
            if "default_track" in track["properties"]
            and track["properties"]["default_track"] == True
        ),
        None,
    )
    if default is not None:
        print(f"Default subtitle found: {default['id']}")
        return

    english_tracks = [
        track
        for track in mkv_info["tracks"]
        if track["properties"]["language"] == "eng"
    ]
    if len(english_tracks) == 0 and len(mkv_info["tracks"]) == 1:
        if "language" in mkv_info["tracks"][0]["properties"] and not (
            mkv_info["tracks"][0]["properties"]["language"] in ("eng", "und")
        ):  # The only track has a language property but it's not English or undefined
            return warn(f"no English track found in {file}")
        # If there is no English track and there is only one track, use the first track
        english_tracks = [mkv_info["tracks"][0]]
    elif len(english_tracks) == 0:
        english_tracks = [
            track
            for track in mkv_info["tracks"]
            if (not "language" in track["properties"])
            or track["properties"]["language"] == "und"
        ]

    track = english_tracks[0]
    # prefer flag_hearing_impaired if there's more than one english track
    if len(english_tracks) > 1:
        hearing_impaired = [
            track
            for track in english_tracks
            if "flag_hearing_impaired" in track["properties"]
            and track["properties"]["flag_hearing_impaired"] == True
        ] or [
            track
            for track in english_tracks
            if "track_name" in track["properties"]
            and "sdh" in track["properties"]["track_name"].casefold()
        ]
        if len(hearing_impaired) >= 1:
            track = hearing_impaired[0]
            if len(hearing_impaired) > 1:
                warn(
                    f"multiple hearing impaired tracks found. Using first track ({track['id']})"
                )
        else:  # If there's multiple English tracks, use the second track. Usually this is the hearing impaired track.
            track = english_tracks[1]
            warn(f"multiple English tracks found. Using second track ({track['id']})")
    elif len(english_tracks) == 0:
        return warn(f"no English track found in {file}")

    track_codec = track["codec"]
    track_language = track["properties"]["language"]
    print(
        f"Setting default: track {track['id']} codec: {track_codec} language: {track_language}"
    )
    subprocess.run(
        [
            "mkvpropedit",
            "--edit",
            f"track:={track['properties']['uid']}",
            "--set",
            "flag-default=1",
            file,
        ]
    )


 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("filename", nargs="+")
    args = parser.parse_args()
    files = [
        sorted(arg.rglob("*.[mM][kK][vV]")) if arg.is_dir() else [arg]
        for arg in map(Path, args.filename)
    ]
    files = list(itertools.chain.from_iterable(files))

    for i, file in enumerate(files):
        print(f"\nfile {i+1}/{len(files)}: {file}")
        extract_all_subtitles(file)
	import argparse
	import itertools
	from pathlib import Path
	import subprocess
	import json
	from argparse import ArgumentParser
	from datetime import datetime


	def error(text: str):
	return print(f"\033[91mError: {text}\033[0m")


	def warn(text: str):
	return print(f"\033[93mWarning: {text}\033[0m")


	def extract_all_subtitles(file: Path):
	result = subprocess.run(["mkvmerge", "-J", file], capture_output=True, text=True)
	if result.returncode != 0: # Check if the command was successful
	return error(f"cannot process file {file} {result.stderr}")

	# Parse the JSON response
	mkv_info = json.loads(result.stdout)
	if "container" in mkv_info and mkv_info["container"]["supported"] == False:
	return error(f"file {file} is not supported")

	mkv_info["tracks"] = sorted(
	[track for track in mkv_info["tracks"] if track["type"] == "subtitles"],
	key=lambda x: x["id"],
	)
	# sort tracks by track id (to ensure stable order)
	if len(mkv_info["tracks"]) == False:
	return warn(f"no subtitles found in {file}")

	default = next(
	(
	track
	for track in mkv_info["tracks"]
	if "default_track" in track["properties"]
	and track["properties"]["default_track"] == True
	),
	None,
	)
	if default is not None:
	print(f"Default subtitle found: {default['id']}")
	return

	english_tracks = [
	track
	for track in mkv_info["tracks"]
	if track["properties"]["language"] == "eng"
	]
	if len(english_tracks) == 0 and len(mkv_info["tracks"]) == 1:
	if "language" in mkv_info["tracks"][0]["properties"] and not (
	mkv_info["tracks"][0]["properties"]["language"] in ("eng", "und")
	): # The only track has a language property but it's not English or undefined
	return warn(f"no English track found in {file}")
	# If there is no English track and there is only one track, use the first track
	english_tracks = [mkv_info["tracks"][0]]
	elif len(english_tracks) == 0:
	english_tracks = [
	track
	for track in mkv_info["tracks"]
	if (not "language" in track["properties"])
	or track["properties"]["language"] == "und"
	]

	track = english_tracks[0]
	# prefer flag_hearing_impaired if there's more than one english track
	if len(english_tracks) > 1:
	hearing_impaired = [
	track
	for track in english_tracks
	if "flag_hearing_impaired" in track["properties"]
	and track["properties"]["flag_hearing_impaired"] == True
	] or [
	track
	for track in english_tracks
	if "track_name" in track["properties"]
	and "sdh" in track["properties"]["track_name"].casefold()
	]
	if len(hearing_impaired) >= 1:
	track = hearing_impaired[0]
	if len(hearing_impaired) > 1:
	warn(
	f"multiple hearing impaired tracks found. Using first track ({track['id']})"
	)
	else: # If there's multiple English tracks, use the second track. Usually this is the hearing impaired track.
	track = english_tracks[1]
	warn(f"multiple English tracks found. Using second track ({track['id']})")
	elif len(english_tracks) == 0:
	return warn(f"no English track found in {file}")

	track_codec = track["codec"]
	track_language = track["properties"]["language"]
	print(
	f"Setting default: track {track['id']} codec: {track_codec} language: {track_language}"
	)
	subprocess.run(
	[
	"mkvpropedit",
	"--edit",
	f"track:={track['properties']['uid']}",
	"--set",
	"flag-default=1",
	file,
	]
	)


	if __name__ == "__main__":
	parser = ArgumentParser()
	parser.add_argument("filename", nargs="+")
	args = parser.parse_args()
	files = [
	sorted(arg.rglob("*.[mM][kK][vV]")) if arg.is_dir() else [arg]
	for arg in map(Path, args.filename)
	]
	files = list(itertools.chain.from_iterable(files))

	for i, file in enumerate(files):
	print(f"\nfile {i+1}/{len(files)}: {file}")
	extract_all_subtitles(file)