Last active
November 6, 2024 10:55
-
-
Save FurloSK/7f52303a10ab7478e3cddfe4bcc50881 to your computer and use it in GitHub Desktop.
Extract subtitles from MKV/MP4 videos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Extract subtitles from each MKV/MP4 file in the given directory | |
# [updated 2024-01-09 by FurloSK] | |
# Permanent gist address: https://gist.github.com/FurloSK/7f52303a10ab7478e3cddfe4bcc50881 | |
# | |
# ===== Usage ===== | |
# extractSubtitles.sh [-i] [<fileOrDirectory>] | |
# -i | |
# Supplying this option will skip extraction and only print information about subtitles in file | |
# <fileOrDirectory> | |
# If a directory is given, will process all MKV/MP4 files in this directory (and subdirectories) | |
# If a file is given, will process this single file | |
# If the parameter is skipped altogether, will process current directory (and subdirectories) | |
# | |
# ===== History ===== | |
# Original version by ComputerNerdFromHell (site no longer working): | |
# http://www.computernerdfromhell.com/blog/automatically-extract-subtitles-from-mkv | |
# Archived here: https://web.archive.org/web/20181119144734/http://www.computernerdfromhell.com/blog/automatically-extract-subtitles-from-mkv/ | |
# Resubmitted by nux: | |
# https://askubuntu.com/questions/452268/extract-subtitle-from-mkv-files/452279#452279 | |
# Completely rewritten and tweaked by FurloSK: | |
# https://superuser.com/questions/1527829/extracting-subtitles-from-mkv-file/1649627#1649627 | |
# Permanent gist address: https://gist.github.com/FurloSK/7f52303a10ab7478e3cddfe4bcc50881 | |
# | |
# ============================================================================= | |
# Config part: this is the only thing you need to tweak | |
# MKVToolNix path - Leave empty if you have the tools added to $PATH. | |
# This is needed e.g. on macOS, if you just downloaded MKVToolNix app and dragged it to Applications folder | |
toolPath='/Applications/+ Moje/MKVToolNix.app/Contents/MacOS/' | |
# ============================================================================= | |
# Start of script | |
# by default, process all files in local dir | |
DIR="." | |
skipExtraction=false | |
# first parameter might be -i switch, which will only print subtitle tracks instead of extracting them | |
if [[ "$1" == "-i" ]] ; then | |
skipExtraction=true | |
# if also directory or file is given, print info about it instead of default local dir | |
if [[ "$#" -eq 2 && "$1" == "-i" ]] ; then | |
DIR="$2" | |
fi | |
# otherwise if directory or file is given, extract subtitles from that one | |
elif [[ "$#" -eq 1 ]] ; then | |
DIR="$1" | |
fi | |
# Get all the MKV/MP4 files in this dir and its subdirs | |
find "$DIR" -type f \( -iname '*.mkv' -o -iname '*.mp4' -o -iname '*.avi' -o -iname '*.ts' \) | while read filename | |
do | |
echo "\nProcessing file $filename:" | |
# Get base file name (without extension) | |
fileBasename=${filename%.*} | |
# Parse info about all subtitles tracks from file | |
# This will output lines in this format, one line per subtitle track, fields delimited by tabulator: | |
# trackID <tab> trackLanguage <tab> trackCodecID <tab> trackCodec | |
"${toolPath}mkvmerge" -J "$filename" | python -c "exec(\"import sys, json;\njs = json.load(sys.stdin);\nif not 'tracks' in js:\n\tprint('unsupported');\n\tsys.exit();\nfor track in js['tracks']:\n\tif track['type'] == 'subtitles':\n\t\tprint(str(track['id']) + '\t' + track['properties']['language'] + '\t' + (track['properties']['codec_id'] if 'codec_id' in track['properties'] else 'undefined') + '\t' + track['codec'])\")" | while IFS=$'\t' read -r trackNumber trackLanguage trackCodecID trackCodec; | |
#"${toolPath}mkvmerge" -J "$filename" | python -c "exec(\"import sys, json;\nfor track in json.load(sys.stdin)['tracks']:\n\tif track['type'] == 'subtitles':\n\t\tprint(str(track['id']) + '\t' + track['properties']['language'] + '\t' + (track['properties']['codec_id'] if 'codec_id' in track['properties'] else track['codec']) + '\t' + track['codec'])\")" | while IFS=$'\t' read -r trackNumber trackLanguage trackCodecID trackCodec; | |
do | |
# if JSON tracks extraction failed, continue to next file | |
if [ $trackNumber = 'unsupported' ] ; then | |
echo " Unsupported file, skipping..." | |
continue; | |
fi | |
echo " Found subtitle track #${trackNumber}: $trackLanguage ($trackCodec, $trackCodecID)" | |
# address missing ['properties']['codec_id'] in JSON | |
if [ $trackCodecID = 'undefined' ] ; then | |
# fix DVBSUB codec automatically | |
if [ $trackCodec = 'DVBSUB' ] ; then | |
trackCodecID='S_DVBSUB' | |
echo " Warning: missing codec_id for $trackCodec track => corrected to $trackCodecID." | |
else | |
echo " Error: missing codec_id for $trackCodec track!" | |
fi | |
fi | |
# if we are only printing tracks, not extracting them, print track and continue | |
if [ $skipExtraction = true ] ; then | |
continue; | |
fi | |
# optional: process only some types of subtitle tracks (according to $trackCodecID) | |
# See codec types here (under header Subtitle Codec Mappings): | |
# https://datatracker.ietf.org/doc/html/draft-ietf-cellar-codec/#name-subtitle-codec-mappings | |
# E.g. to skip DVD subtitles, add S_VOBSUB | |
if [[ $trackCodecID == 'unwantedCodecID_#1' || $trackCodecID == 'unwantedCodecID_#2' ]] ; then | |
echo " Unwanted codec ID $trackCodecID, skipping track..." | |
continue; | |
fi | |
# determine proper extension | |
if [ $trackCodecID = 'S_TEXT/SSA' ] ; then | |
extension='ssa' | |
elif [ $trackCodecID = 'S_TEXT/ASS' ] ; then | |
extension='ass' | |
elif [ $trackCodecID = 'S_TEXT/USF' ] ; then | |
extension='usf' | |
elif [ $trackCodecID = 'S_TEXT/WEBVTT' ] ; then | |
extension='vtt' | |
elif [ $trackCodecID = 'S_DVBSUB' ] ; then | |
extension='dvb' | |
else # fallback to standard .srt file (S_VOBSUB files will still get their proper extension) | |
extension='srt' | |
fi | |
# prepare output filename | |
# (adding . [dot] between filename and language, so VLC will properly recognize the language) | |
outFilename="${fileBasename} [#${trackNumber}].${trackLanguage}.${extension}" | |
# extract track with language and track id | |
echo " Extracting track to file ${outFilename}" | |
echo " Executing command \"${toolPath}mkvextract\" tracks \"${filename}\" ${trackNumber}:\"${outFilename}\"" | |
result=`"${toolPath}mkvextract" tracks "${filename}" ${trackNumber}:"${outFilename}"` | |
echo " > $result" | |
#`"${toolPath}mkvextract" tracks "${filename}" ${trackNumber}:"${outFilename}" > /dev/null 2>&1` | |
#========================================================================== | |
# Lines below are from the original source by ComputerNerdFromHell. | |
# They are now all obsolete and redundant (kept just for reference) | |
# Extract the track to a .tmp file | |
#`"${toolPath}mkvextract" tracks "$filename" $trackNumber:"$subtitlename.srt.tmp" > /dev/null 2>&1` | |
#`chmod g+rw "$subtitlename.srt.tmp"` | |
# # Do a super-primitive language guess: ENGLISH | |
# langtest=`egrep -ic ' you | to | the ' "$subtitlename".srt.tmp` | |
# trimregex="" | |
# | |
# # Check if subtitle passes our language filter (10 or more matches) | |
# if [ $langtest -ge 10 ]; then | |
# # Regex to remove credits at the end of subtitles (read my reason why!) | |
# `sed 's/\r//g' < "$subtitlename.srt.tmp" \ | |
# | sed 's/%/%%/g' \ | |
# | awk '{if (a){printf("\t")};printf $0; a=1; } /^$/{print ""; a=0;}' \ | |
# | grep -iv "$trimregex" \ | |
# | sed 's/\t/\r\n/g' > "$subtitlename.srt"` | |
# `rm "$subtitlename.srt.tmp"` | |
# `chmod g+rw "$subtitlename.srt"` | |
# else | |
# # Not our desired language: add a number to the filename and keep anyway, just in case | |
# `mv "$subtitlename.srt.tmp" "$subtitlename.$tracknumber.srt" > /dev/null 2>&1` | |
# fi | |
echo "" | |
done | |
done |
For use with Ubuntu, change first line to #!/bin/bash
, comment the toolPath
line, and add the -e
option to the first echo
command.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice scripting Thnx!
For MacOs users there are 3 things to check to make this script work for you:
1 - Make sure the path to MKVToolnix is correct. In my case this worked(line30):
toolPath='/Applications/MKVToolNix-79.0.app/Contents/MacOS/'
2 - Make sure python can be found. In my case I had to add the version number after python (line62):
"${toolPath}mkvmerge" -J "$filename" | python3 -c
3 - When downloading the script there was (in my case) a line-break that produced a python 'syntax error' (line 62).
Make sure that the line-break is after ' trackCodec; '
Then everything worked just fine !)