-
-
Save srghma/9c1ebb299282695382d5d266a081c1c3 to your computer and use it in GitHub Desktop.
A simple Linux shell script for translating an .srt file into another language and merging both languages into an .ass file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# This program takes an .srt file, translates it, and merges both translations into a .ass file with the user's selected | |
# language on top of the screen and the other language at the bottom. | |
# | |
# Usage: ./transmerge.sh [source language] [target language] [language on top (en|fri|...)] [source .srt] [target .ass (optional)] | |
# | |
# ex., $ ./transmerge.sh en fr en movie.srt | |
# | |
# New file: movie.ass | |
# | |
# Above translates English to merged English+French with English displaying at the top of the screen and French the bottom. | |
# | |
source_lang=$1 | |
target_lang=$2 | |
top=$3 | |
source_file=$4 | |
file_saved=$5 | |
sleep_s=12 | |
lines=0 | |
count=0 | |
[ "$top" == "$source_lang" ] && bot=$target_lang || bot=$source_lang | |
[ "$file_saved" == "" ] && file_saved=$(echo $source_file | sed "s/\.srt$/\.ass/g") | |
file_saved=$file_saved"."$(date +"%m%d%H%M%S") | |
formatted_file=$file_saved".1" | |
filetype=$(file -i $source_file | awk -F'=' '{print $2'}) | |
[ "$filetype" == "utf-8" ] && iconv -f $filetype $source_file -o $formatted_file || perl -CS -pwe '' $source_file > $formatted_file | |
perl -i -CS -ane 's/<i>//g; s/<\/i>//g; s/- //g; s/\*//g; s/\r//g; $n=(@F==0) ? $n+1 : 0; print if $n<=1' $formatted_file | |
substart="[Script Info]\n\ | |
ScriptType: v4.00+\\n\ | |
Collisions: Normal\\n\ | |
PlayDepth: 0\nTimer: 100,0000\n\ | |
Video Aspect Ratio: 0\n\ | |
WrapStyle: 0\n\ | |
ScaledBorderAndShadow: no\n\ | |
\n\ | |
[V4+ Styles]\n\ | |
Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding\n\ | |
Style: "$top",Arial,10,&H00F9FFFF,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,0,8,10,10,10,0\n\ | |
Style: "$bot",Arial,18,&H00F9FFF9,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,10,0\n\ | |
\n\ | |
[Events]\n\ | |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text" | |
echo -e $substart > $file_saved | |
for task in "Formatting" "Translating"; do | |
echo -e "\n\n"$task" "$(echo $source_file | rev | cut -f 2- -d '.' | rev)".srt ...\n\n\n"$substart | |
while read -r lineNum; read -r lineTime; read -r lineText; do | |
## Only loop if no empty text is found | |
while [[ $lineText =~ [^[:space:]] ]]; do ## [ "$lineText" != "" ] | |
text=$text$lineText" " | |
read -r lineText | |
done | |
if [ "$task" == "Translating" ]; then | |
translation=$(wget -U "Mozilla/5.0" -q -O- "http://translate.googleapis.com/translate_a/single?client=gtx&sl=$source_lang&tl=$target_lang&dt=t&q=$text" | perl -lne 'push @a,/(?<!\,\[\[)\[\"(.*?)(?<!\\)\"/g;END{print "@a"}' | perl -CS -pwe 's/\N{U+005C}\N{U+0022}/\N{U+0022}/g;') | |
#translation=$(trans -s $source_lang -t $target_lang -b "$text") | |
else | |
translation=$text | |
fi | |
## Add\Remove a space for question marks, exclamation marks, colons... depending on language | |
if [ "$source_lang" == "fr" ]; then | |
text=$(echo $text | perl -CS -pwe 's/((?!=\w)\s(?=[!|?|;|:]))//g; s/((?<=[\w|\s])[!|?|;|:])/ $&/g; s/\N{U+00AB}(?=\S)/$& /g; s/(?=\S)\N{U+00BB}/ $&/g; ') | |
else | |
text=$(echo $text | sed 's/ !/!/g; s/ ?/?/g; s/ :/:/g; s/ ;/\;/g;') | |
fi | |
## “ = U+201C, ” = U+201D, ' = U+0027 , " = U+0022, « = U+00AB, » = U+00BB | |
if [ "$target_lang" == "fr" ]; then | |
translation=$(echo $translation | perl -CS -pwe 's/((?!=\w)\s(?=[!|?|;|:]))//g; s/((?<=[\w|\s])[!|?|;|:])/ $&/g; s/\N{U+00AB}(?=\S)/$& /g; s/(?=\S)\N{U+00BB}/ $&/g;') | |
else | |
translation=$(echo $translation | sed 's/ !/!/g; s/ ?/?/g; s/ :/:/g; s/ ;/\;/g;') | |
fi | |
## Grab and incorperate timeline info. This shell command is faster than awk, sed, and cut | |
timeStamp="Dialogue: 0,"${lineTime:1:7}"."${lineTime:9:2}","${lineTime:18:7}"."${lineTime:26:2}"," | |
first=$timeStamp$top",,0000,0000,0000,,"$([ "$top" == "$source_lang" ] && echo $text || echo $translation) | |
second=$timeStamp$bot",,0000,0000,0000,,"$([ "$top" != "$source_lang" ] && echo $text || echo $translation) | |
if [ "$task" == "Translating" ]; then | |
count=$((count + 1)) | |
printf "%s\n%s\n" "$first" "$second" |tee -a $file_saved | |
## Don't sleep after last translation as it's not necessary | |
if [[ $lines -ne $count ]]; then | |
percentage=$(( ($count*100)/$lines )) | |
echo -n $'\e[s' ## Save cursor position | |
for (( i=$sleep_s; i>0; i-- )); do | |
timeleft=$(date -d@$(( ($lines-$count-1)*$sleep_s+$i )) -u +%H:%M:%S) | |
echo -e -n "\e[7m\e[1mFile: "$source_file" Logical line: "$count" / "$lines" ("$percentage"%) Real line: "$lineNum" / "$realLines" Time left: "$timeleft" Next translation: "$i"s... "$'\e[u' | |
sleep 1 | |
done | |
## Print spaces to delete info line then reset cursor to previous position | |
printf "%-135s %s" "Translating..." $'\e[u' | |
fi | |
else | |
lines=$((lines + 1)) | |
printf "%s\n%s\n" "$first" "$second" | |
realLines=$lineNum | |
fi | |
text="" | |
done < $formatted_file | |
done | |
new_file_saved=$(echo $file_saved | rev | cut -f 2- -d '.' | rev) | |
mv $file_saved $new_file_saved | |
rm $formatted_file | |
echo -e "\n\nDone!\n\n" | |
echo "cat "$(pwd)"/"$new_file_saved | |
echo "vi "$(pwd)"/"$source_file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment