Skip to content

Instantly share code, notes, and snippets.

@srghma
Forked from bathtime/transmerge.sh
Created February 11, 2024 13:47
Show Gist options
  • Save srghma/9c1ebb299282695382d5d266a081c1c3 to your computer and use it in GitHub Desktop.
Save srghma/9c1ebb299282695382d5d266a081c1c3 to your computer and use it in GitHub Desktop.
A simple Linux shell script for translating an .srt file into another language and merging both languages into an .ass file
#!/bin/sh
#
# This program takes an .srt file, translates it, and merges both translations into a .ass file with the user's selected
# language on top of the screen and the other language at the bottom.
#
# Usage: ./transmerge.sh [source language] [target language] [language on top (en|fri|...)] [source .srt] [target .ass (optional)]
#
# ex., $ ./transmerge.sh en fr en movie.srt
#
# New file: movie.ass
#
# Above translates English to merged English+French with English displaying at the top of the screen and French the bottom.
#
source_lang=$1
target_lang=$2
top=$3
source_file=$4
file_saved=$5
sleep_s=12
lines=0
count=0
[ "$top" == "$source_lang" ] && bot=$target_lang || bot=$source_lang
[ "$file_saved" == "" ] && file_saved=$(echo $source_file | sed "s/\.srt$/\.ass/g")
file_saved=$file_saved"."$(date +"%m%d%H%M%S")
formatted_file=$file_saved".1"
filetype=$(file -i $source_file | awk -F'=' '{print $2'})
[ "$filetype" == "utf-8" ] && iconv -f $filetype $source_file -o $formatted_file || perl -CS -pwe '' $source_file > $formatted_file
perl -i -CS -ane 's/<i>//g; s/<\/i>//g; s/- //g; s/\*//g; s/\r//g; $n=(@F==0) ? $n+1 : 0; print if $n<=1' $formatted_file
substart="[Script Info]\n\
ScriptType: v4.00+\\n\
Collisions: Normal\\n\
PlayDepth: 0\nTimer: 100,0000\n\
Video Aspect Ratio: 0\n\
WrapStyle: 0\n\
ScaledBorderAndShadow: no\n\
\n\
[V4+ Styles]\n\
Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding\n\
Style: "$top",Arial,10,&H00F9FFFF,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,0,8,10,10,10,0\n\
Style: "$bot",Arial,18,&H00F9FFF9,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,10,0\n\
\n\
[Events]\n\
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"
echo -e $substart > $file_saved
for task in "Formatting" "Translating"; do
echo -e "\n\n"$task" "$(echo $source_file | rev | cut -f 2- -d '.' | rev)".srt ...\n\n\n"$substart
while read -r lineNum; read -r lineTime; read -r lineText; do
## Only loop if no empty text is found
while [[ $lineText =~ [^[:space:]] ]]; do ## [ "$lineText" != "" ]
text=$text$lineText" "
read -r lineText
done
if [ "$task" == "Translating" ]; then
translation=$(wget -U "Mozilla/5.0" -q -O- "http://translate.googleapis.com/translate_a/single?client=gtx&sl=$source_lang&tl=$target_lang&dt=t&q=$text" | perl -lne 'push @a,/(?<!\,\[\[)\[\"(.*?)(?<!\\)\"/g;END{print "@a"}' | perl -CS -pwe 's/\N{U+005C}\N{U+0022}/\N{U+0022}/g;')
#translation=$(trans -s $source_lang -t $target_lang -b "$text")
else
translation=$text
fi
## Add\Remove a space for question marks, exclamation marks, colons... depending on language
if [ "$source_lang" == "fr" ]; then
text=$(echo $text | perl -CS -pwe 's/((?!=\w)\s(?=[!|?|;|:]))//g; s/((?<=[\w|\s])[!|?|;|:])/ $&/g; s/\N{U+00AB}(?=\S)/$& /g; s/(?=\S)\N{U+00BB}/ $&/g; ')
else
text=$(echo $text | sed 's/ !/!/g; s/ ?/?/g; s/ :/:/g; s/ ;/\;/g;')
fi
## “ = U+201C, ” = U+201D, ' = U+0027 , " = U+0022, « = U+00AB, » = U+00BB
if [ "$target_lang" == "fr" ]; then
translation=$(echo $translation | perl -CS -pwe 's/((?!=\w)\s(?=[!|?|;|:]))//g; s/((?<=[\w|\s])[!|?|;|:])/ $&/g; s/\N{U+00AB}(?=\S)/$& /g; s/(?=\S)\N{U+00BB}/ $&/g;')
else
translation=$(echo $translation | sed 's/ !/!/g; s/ ?/?/g; s/ :/:/g; s/ ;/\;/g;')
fi
## Grab and incorperate timeline info. This shell command is faster than awk, sed, and cut
timeStamp="Dialogue: 0,"${lineTime:1:7}"."${lineTime:9:2}","${lineTime:18:7}"."${lineTime:26:2}","
first=$timeStamp$top",,0000,0000,0000,,"$([ "$top" == "$source_lang" ] && echo $text || echo $translation)
second=$timeStamp$bot",,0000,0000,0000,,"$([ "$top" != "$source_lang" ] && echo $text || echo $translation)
if [ "$task" == "Translating" ]; then
count=$((count + 1))
printf "%s\n%s\n" "$first" "$second" |tee -a $file_saved
## Don't sleep after last translation as it's not necessary
if [[ $lines -ne $count ]]; then
percentage=$(( ($count*100)/$lines ))
echo -n $'\e[s' ## Save cursor position
for (( i=$sleep_s; i>0; i-- )); do
timeleft=$(date -d@$(( ($lines-$count-1)*$sleep_s+$i )) -u +%H:%M:%S)
echo -e -n "\e[7m\e[1mFile: "$source_file" Logical line: "$count" / "$lines" ("$percentage"%) Real line: "$lineNum" / "$realLines" Time left: "$timeleft" Next translation: "$i"s... "$'\e[u'
sleep 1
done
## Print spaces to delete info line then reset cursor to previous position
printf "%-135s %s" "Translating..." $'\e[u'
fi
else
lines=$((lines + 1))
printf "%s\n%s\n" "$first" "$second"
realLines=$lineNum
fi
text=""
done < $formatted_file
done
new_file_saved=$(echo $file_saved | rev | cut -f 2- -d '.' | rev)
mv $file_saved $new_file_saved
rm $formatted_file
echo -e "\n\nDone!\n\n"
echo "cat "$(pwd)"/"$new_file_saved
echo "vi "$(pwd)"/"$source_file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment