bathtime · December 26, 2023 22:22 · bathtime · May 9, 2021
diff --git a/srtssa.sh b/srtssa.sh
 #!/bin/bash

 usage() {

   echo "Version 0.15.1, GNU GPLv3 

 	*** WARNING: ALPHA VERSION CODE!!! THIS PROGRAM EATS HAMSTERS! ***

 This program takes an .srt, .txt, .pdf, or .epub file, translates it, and merges both translations
 into an .ssa or .txt file in a parallel manner, allowing both subtitles to be viewed at the same time.
 This can be helpful for people learning a new language. 

 Usage: ./srtssa.sh [OPTIONS]... [FILE]

   Options:

 	-s, --source	<lang>	language of input file (ex., en, fr, ru...) (default: 'auto')
 	-t, --target	<lang>	language to translate to (default: [-lang1|-lang2])
 				make translation blank: 'blank'
 				just copy the source language: 'copy'
 				to omit altogether, set to 'blank' and add the flag '-delblank'
 	-i, --input	<file>	input file (ex., .srt, .pdf, .txt)
 	-o, --output	<file>	output file (ex., .ssa, .ass, .txt)
 	-sx, --sext	<ext>	source file extension (default: txt)
 	-tx, --text	<ext>	target file extension (default: txt)
 	-a, --above	<lang>	place this language above the other (default: [source])
 	-l1,--lang1	<lang>	1st preferred target language (default: 'en')
 	-l2,--lang2	<lang>	2nd preferred target language (default: 'fr')
 	-t1,--top1	<lang>	1st preferred language to be placed on top (default: 'en'|source)
 	-t2,--top2	<lang>	2nd preferred language to be placed on top (default: 'fr'|target)
 	-m, --mode	<mode>	options: srt, text, epub (default: source file's extension or 'text')

   Interface:

 	-h, --help	this help display
 	-g, --gtk	use graphical interface (requires zenity)
 	-q, --quiet	only display progress, errors, and final result (default)
 	-z, --stealth	prevent all information from being printed to screen (except errors)
 	-v, --verbose	display formatting information
 	-d, --debug	prints 'verbose' + raw text elements for debugging.
 	-x, --xbug	prints as --debug but also pipes source lines through 'less'

   Advanced Options (use at your own risk!):

 	-w,  --wait	<0-60>		secods to wait between translations (default: '12')
 	--counting	[file|logic]	label line numbers according to the srt file's number or logical count.
 					This is useful when lines are skipped or with misnumbered srt files
 	--alt				use alternate translation engine (translate-shell app required)
 	--dif	   	[0,1-1000]	difference in length between the source and target text (in %)
 					which triggers a re-translation (default: '200'[%], '0'=off)
 	--mincutoff	[0-1000]	them minimum characters allowed else the line is erased [TODO]
 	--noerr				remove error detection
 	--norepair			do not retranslate on error
 	--makesubs			make an ssa file from txt, pdf, epub... (times will all be set to 0)
 	--insert			insert translated text directly below
 	--nosource			exclude source language from output file
 	--justformat			exit after formatting
 	--log				appends errors to a log file: [source].log
 	--keeptmp			don't delete temporary files
 	-ch, --chars	[100-20000]	max characters sent per translation (default: '5000')
 	-e,  --engine	[engine]	allows for the user to add their own translation engine.
 					%s = source lang, %t = target lang, %x = text to be translated
 					ex., ./strssa.sh -ch 1000 -engine 'trans -s %s -t %s -b %x' file.srt

   Text Manipulation:
        --clean				*** EXPERIMENTAL *** break text down into simple sentences
        --initclean			same as '--clean' but cleans before formatting lines
 					not recommended for use with srt files!
 	--cleantrans			same as '--clean' but cleans translated text
 	--clean-newline	[text]		specifies how '--clean' will handle new lines (default '\n\n')
 	--newline	[text]		change the newlines into another character (default ' ')
 	--backnew	[text]		change backslashed newline from one character to another (default '\\n')
 	--transnew	[text]		insert another character for newline text to be translated (default ' ')
 	--onelineeach			insert another line after each newline
 	--linespaced	[0-10]		dermines how many concurrent newlines are allowed (--onelineeach forces this to 1)
 	--brackreturn			push bracketed text to a new line
 	--delblank			delete results with no text
 	--deluni			delete any unicode (ex., \u003c)
 	--deltag			delete tags (ex., <i>...</i>)
 	--removeline	[text]		remove any line containing this text
 	--simpleclean			inserts two newlines after each comma, period, or semi-colon
 	--commareturn			inserts two newlines after each comma

   Epub Options:

 	--w3mepub		render epub files with 'w3m' (must be installed)
 	--html2text		render epub files with 'html2text' (must be installed)
 	--removepage		attempt to remove page numbers from text
 	--raw			keep return carriages and other potentially bad characters
 	--epub-newline1	[text]	determines how lines are joined at '<p' tags (default '\n\n')
 				only works with default engine!
 	--epub-newline2	[text]	determines how lines are joined at '<' tags (default ' ')
 				only works with default engine!

   Pdf Options:

 	--mupdf			render pdf files with 'mutool' ('mupdf-tools' be installed)
 				to render pdf files by default 'poppler-utils' must be installed

   Misc Options:

 	--calibre		render several types of files with 'ebook-convert' (calibre must be installed)
 	--pandoc		render several types of files with 'pandoc' (must be installed)

   Example Usage:

 	$ ./srtssa.sh -s en -t fr -a fr -f movie.srt

   The above example translates English to a merged (English and French) .ssa file with English displaying at the top of the screen and French the bottom."
 }

 interface=5
 tar_pref1="en"
 tar_pref2="fr"
 top_pref1="fr"
 top_pref2="en"
 linespaced=2
 makesubs=0
 onelineeach=0
 wait=10
 charMax=5000
 dif=200
 w3mepub=0
 raw=0
 keeptmp=0
 notrans=0
 counting='logic'
 nosource=0
 noerr=0
 newline=' '
 backnew='\\n'
 transnew=' '
 clean=0
 initclean=1
 cleantrans=0
 clean_newline='\n\n'
 brackreturn=0
 epub_newline1='\n\n'
 epub_newline2=''
 removepage=0
 html2text=0
 calibre=0
 mupdf=0
 pandoc=0
 norepair=0
 deluni=0
 deltag=0
 delblank=0
 removeline=''
 commareturn=1
 simpleclean=0
 justformat=0
 gtk=0
 alt=1
 mincutoff=1

 abort() { echo -e "$@ Aborting."; exit 3; }
 bye() { if [ "$interface" -ge 1 ]; then echo -e "\nGoodbye...       "; fi; rm -rf "$gui_file"; if [ "$keeptmp" != "1" ]; then rm -rf "$tmp_file";fi; }


 filterOpts() { echo "$@" | perl -CS -pe 's#^(?=\S)#--#g;'; }

 if [ "$1" = "" ] && [ "$gtk" -eq 1 ] ; then

   src_file=$(zenity --text='Please choose a source file to translate' --file-selection --multiple --file-filter=*.*)



   [ -z "$src_file" ] && abort 'No source file selected.'

   langS=$(zenity --list --editable --title="Choose Source Language" --column="Language" en fr auto)
   [ -z "$langS" ] && langS='-s auto' || langS='-s '$langS
   langT=$(zenity --list --editable --height=240 --title="Choose Target Language" --column="Language" en fr auto copy blank)
   [ -z "$langT" ] && langT='-t auto' || langT='-t '$langT

   cleanOpts=$(filterOpts $(zenity --list --checklist --editable --height=350 --separator=' --' --title="Choose Cleanup Options" --column="Type" --column="Cleanup:" FALSE initclean FALSE clean FALSE cleantrans FALSE bracketreturn TRUE delblank TRUE deluni FALSE deltag FALSE removepage))

   newlineOpts=$(filterOpts $(zenity --list --checklist --editable --height=750 --separator=' --' --title="Choose New Line Options" --column="Type" --column="Cleanup:" TRUE 'newline " "' FALSE 'newline "\n"' FALSE 'newline "\n\n"' FALSE 'newline "\\n"' FALSE 'newline ""' TRUE 'clean-newline "\n\n"' FALSE 'clean-newline "\n"' FALSE 'clean-newline "\n\n"' FALSE 'clean-newline "\\n"' FALSE 'clean-newline ""' TRUE 'backnew "\\n"' FALSE 'backnew "\n"' FALSE 'backnew " "' FALSE 'backnew ""' TRUE 'transnew " "' FALSE 'transnew "\n"' FALSE 'transnew "\\n"' FALSE 'transnew ""' FALSE 'epub-newline1 "\n"' TRUE 'epub-newline1 "\n\n"' FALSE 'epub-newline1 "\n"' FALSE 'epub-newline1 "\\n"' FALSE 'epub-newline1 ""' FALSE 'epub-newline2 "\n"' FALSE 'epub-newline2 "\n\n"' FALSE 'epub-newline2 "\n"' FALSE 'epub-newline2 "\\n"' TRUE "epub-newline2 ' '" ))

   engineOpts=$(filterOpts $(zenity --list --checklist --height=550 --editable --separator=' --' --title="Choose Engine Options" --column="Type" --column="Cleanup:" FALSE alt FALSE noerr FALSE norepair FALSE insert FALSE nosource FALSE justformat FALSE log FALSE keeptmp FALSE w3mepub FALSE html2text FALSE raw FALSE mupdf FALSE calibre FALSE pandoc))

   options=$langS' '$langT' '$cleanOpts' '$newlineOpts' '$engineOpts

   [ "$interface" -ge 4 ] && echo "Options chosen:"$options

   set -- $options "${1%.*}"
   preset=1
 fi

 #export PERL_UNICODE=SDL

 ## Format text for increased readibility 
 clean() {

   ## …  = 2026, « = 00AB, # » = 00BB,— = 2014 
   text=$(LC_ALL=C echo "$1" | LC_ALL=C perl -CS -pe 's/ {2,}/ /g; \
              s#(?<=[:|;])\s?\)?\s?(?!\s?\))#$&'"${clean_newline}"'#g; \
              s#\.{3,}#...#g; s#\N{U+2026}\.{1,}#\N{U+2026}#g; \
              s#\.{1,}\N{U+2026}#\N{U+2026}#g; s#\.{3,}#...#g;\
              s#(?<!^^)(?<!\S)(?<!\d)(\-|\N{U+2014})#'"${clean_newline}"'$&#g; \
              s#(?<=\,)\s(?=\N{U+2014}|\"\S)#'"${clean_newline}"'#g; \ ## ,—  ,"
              s#(?<!^^)(?<!ddd)\N{U+00AB}#'"${clean_newline}"'\N{U+00AB}#g; \
              s#(?<=(?:^\.|\N{U+2026}))\s?(?=\-|\N{U+2014})#$1'"${clean_newline}"'#g;\ ## Em dash: .—
              s#(?<!\N{U+2014}\N{U+2026})(?<!\N{U+2014}\s\N{U+2026})(?<=([\N{U+2026}|\.|\?|\!|\"|\N{U+00BB}]))\s(?![\!|\?|\:|\;|\)|\"|\s|\N{U+00BB}|\.])#'"${clean_newline}"'#g; \
              s#(?<=(?:\N{U+00BB}|\?|\!|\.))(?![\d|\N{U+00BB}|\!|\?|\"|\)|\s|\.|\:|\;|\N{U+2026}])#'"${clean_newline}"'#g;') 

   ## Brackets on a new line:
   if [ "$brackreturn" -eq 1 ]; then
      text=$(LC_ALL=C echo "$text" | LC_ALL=C perl -CS -pe 's/ {2,}/ /g; \ 
      s#(?<!^^)(?=\s+\(+\s?)\(+#'"${clean_newline}"'$&#g; \ 
      s#(?<=[^\N{U+2014}]([\N{U+2026}|\.|\?|\!|\"|\)|\N{U+00BB}]))\s(?![\!|\?|\)|\;|\:|\"|\s|\N{U+00BB}|\.])#'"${clean_newline}"'#g; \ 
      s#(?<=(?:\N{U+00BB}|\?|\!|\.|\)))(?![\d|\N{U+00BB}|\!|\?|\"|\)|\s|\;|\:|\.|\N{U+2026}])#'"${clean_newline}"'#g;')
   fi

   [ "$removepage" -eq 1 ] && text=$(echo "$text" | LC_ALL=C perl -CS -pe 's#^\d+##; s#\d+$##g; ')

   ## Remove excess spaces and blank characters
   text=$(LC_ALL=C echo "$text" | LC_ALL=C perl -CS -pe 's#\N{U+00A0}##g; s# +# #g; s#^ ##g; s# $##g;') 

   echo "$text"
 }

 translation() {

   text=$(LC_ALL=C echo "$@" | LC_ALL=C sed -r 's#(\#|\&|\*)##g; s#(\\n|\n)#'"${transnew}"'#g;')

   #LC_ALL=C text=$(LC_ALL=C echo "$@" | LC_ALL=C sed -r 's#(\#|\&|\*)##g; s#(\\n|\n)#'"${transnew}"'#g; s#–#-#g; s#…#...#g;')

   #text="$@"

   if [ "$tar_lang" = "copy" ]; then
      LC_ALL=C echo "$text"
   elif [ "$tar_lang" = "blank" ]; then
      :
   elif [ -n "$alt" ]; then
      LC_ALL=C ./trans -s "$src_lang" -t "$tar_lang" -b "$text"
   elif [ -n "$engine" ]; then
       run="$engine"
       run=${run/\%s/$src_lang}
       run=${run/\%t/$tar_lang}
       run=${run/\%x/'"$@"'}
       LC_ALL=C eval "$run" | sed -r 's/[\#|\&|\*]//g; s#u200b# #g;'
   else
      LC_ALL=C wget -U "Mozilla/5.0" -q -O- "http://translate.googleapis.com/translate_a/single?client=gtx&sl=$src_lang&tl=$tar_lang&dt=t&q=$text" | perl -CS -X -lne 'push @a,/(?<!\,\[\[?)\[\"(.*?)(?<!\\)\"/g;END{print "@a" }' | LC_ALL=C perl -CS -pwe 's/\N{U+005C}\N{U+0022}\s?/\N{U+0022}/g;' | LC_ALL=C sed -r ' s#\\u200b##g;'

   fi
 }

 ## Needed to decode line numbers from Roman Numerals when translating Latin
 roman() {

  input=$@
  output=""
  len=${#input}		

  roman_val() {
     N=$1
     one=$2
     five=$3
     ten=$4
     out=""

     case $N in
          0)	out+=""	;;
      [123])	while [[ $N -gt 0 ]]; do
                   out+="$one"
                   N=$(($N-1))
 		done ;;
          4)	out+="$one$five" ;;
          5)	out+="$five" ;;
      [678])	out+="$five"
 		N=$(($N-5))
 		while [[ $N -gt 0 ]]; do
                   out+="$one"
                   N=$(($N-1))
 		done ;;
          9)	while [[ $N -lt 10 ]]; do
                   out+="$one"
                   N=$(($N+1))
 		done
 		out+="$ten" ;;
      esac
      echo $out
   }
 		
   while [[ $len -gt 0  ]]
   do
      num=${input:0:1}
      case $len in
            1) output+="$(roman_val $num I V X)" ;;
            2) output+="$(roman_val $num X L C)" ;;
            3) output+="$(roman_val $num C D M)" ;;
            4) output+="$(roman_val $num M ${U}V${R} ${U}X${R})" ;;
            *) num=${input:0:(-3)}
               while [[ $num -gt 0 ]]; do
                  output+="M"
                  num=$(($num-1))
               done ;;
      esac
      input=${input:1} ; len=${#input}
   done
   echo $output
 }

 trap 'bye' EXIT

 eval set -- $(getopt -a -n st2ssa -o i:xo:w:a:s:ndzghvqt:e:x:m: --long log,sx:,mode:,tx:,sext:,text:,dif:,noerr,notrans,engine:,alt,ch:,chars:,l1,l2,gtk,stealth,verbose,lang1:,lang2:,tl1:,tl2:,top1,top2,help,chunks:,source:,target:,output:,above:,w3mepub,wait,raw,delblank,counting:,keeptmp,nosource,transnew:,nl:,newline:,backnew:,insert,xbug,brackret,clean,initclean,clean-newline:,cleantrans,justformat,epub-newline1:,epub-newline2:,removepage,html2text,calibre,mupdf,pandoc,norepair,deluni,deltag,quiet,input:,removeline:,makesubs,onelineeach,simpleclean,commareturn,mincutoff:,linespaced: -- "$@")

 while :; do
   case "$1" in
     -a  | --above)	top=$2;			shift 2 ;;
     -l1 | --lang1)	tar_pref1=$2;		shift 2 ;;
     -l2 | --lang2)	tar_pref2=$2;		shift 2 ;;
     -t1 | --top1)	top_pref1=$2;		shift 2 ;;
     -t2 | --top2)	top_pref2=$2;		shift 2 ;;
     --ch | --chars)	charMax=$2;		shift 2 ;;
     --dif)		dif=$2;			shift 2 ;;
     -w  | --wait)	wait=$2;		shift 2 ;;
     -i  | --input)	src_file="$2";		shift 2 ;;
     -o  | --output)	out_file="$2";		shift 2 ;;
     -s  | --source)	src_lang=$2;		shift 2 ;;
     -t  | --target)	tar_lang=$2;		shift 2 ;;
     -h  | --help)	usage;			exit    ;;
     -g  | --gtk)	gtk=1;
                        [ -z "$(command -v zenity)" ] && abort 'zenity not installed.';
                                        	shift 1 ;;
     -z  | --stealth)	interface="0";		shift 1 ;;
     --insert)		insert="1";		shift 1 ;;
     -q  | --quiet)	interface="1";		shift 1 ;;
     -v  | --verbose)	interface="2";		shift 1 ;;
     -d  | --debug)	interface="3";		shift 1 ;;
     -x  | --xbug)	interface="4";		shift 1 ;;
     --makesubs)	makesubs="1";		shift 1 ;;
     --onelineeach)	onelineeach="1";	shift 1 ;;
     --counting)	counting="$2";		shift 2 ;;
     --nl | --newline)	newline="$2";		shift 2 ;;
     --backnew)		backnew="$2";		shift 2 ;;
     --linespaced)	linespaced="$2";	shift 2 ;;
     --clean-newline)	clean_newline="$2";	shift 2 ;;
     --epub-newline1)	epub_newline1="$2";	shift 2 ;;
     --epub-newline2)	epub_newline2="$2";	shift 2 ;;
     --removeline)	removeline="$2";	shift 2 ;;
     --mincutoff)	mincutoff="$2";		shift 2 ;;
     --simpleclean)	simpleclean="1";	shift 1 ;;
     --commareturn)	commareturn="1";	shift 1 ;;
     --w3mepub)		w3mepub="1";
                        [ -z "$(command -v w3m)" ] && abort 'w3m not installed.';
                                        	shift 1 ;;
     --transnew)	transnew="$2";		shift 2 ;;
     --raw)		raw="1";		shift 1 ;;
     --delblank)	delblank="1";		shift 1 ;;
     --deltag)		deltag="1";		shift 1 ;;
     --brackreturn)	brackreturn="1";	shift 1 ;;
     --clean)		clean="1";	shift 1 ;;
     --initclean)	initclean="1";	shift 1 ;;
     --cleantrans)	cleantrans="1";	shift 1 ;;
     --html2text)	html2text="1";	shift 1 ;;
     --calibre)		calibre="1";	shift 1 ;;
     --deluni)		deluni="1";	shift 1 ;;
     --mupdf)		mupdf="1";	shift 1 ;;
     --pandoc)		pandoc="1";	shift 1 ;;
     --alt)		alt=1;		shift 1 ;;  
     --removepage)	removepage=1;	shift 1 ;;  
     -e  | --engine)	engine="$2";	shift 2 ;;
     -sx | --sext)	src_ext=$2;	shift 2 ;;
     -tx | --text)	tar_ext=$2;	shift 2 ;;
     -m  | --mode)	mode=$2;	shift 2 ;;
     --log)		log=1;		shift 1 ;;
     --justformat)	justformat=1;	shift 1 ;;
     --keeptmp)		keeptmp=1;	shift 1 ;;
     --noerr)		noerr=1;	shift 1 ;;
     --norepair)	norepair=1;	shift 1 ;;
     --nosource)	nosource=1;	shift 1 ;;
     --notrans)		notrans=1; 	tar_lang="none";	shift 1 ;;
     --)				shift; break ;;
   esac
 done


 ## '$@' returns an empty qoute for the file name, hence the need for the preset variable 
 [ -z "$src_file" ] && [ -z "$preset" ] && src_file="$@"

 line=1
 chunk=0
 chunkNum=0
 chunkTot=0

 if [ -z "$src_file" ]; then

   case $mode in
      pdf) src_ext="pdf"  ;;
      text) src_ext="txt"  ;;
      epub) src_ext="epub" ;;
      srt)  src_ext="srt"  ;;
      *) src_ext="*"       ;;
   esac

   ## Gui interface prompt for source file
   if [ "$gtk" -eq 1 ]; then
      gui_file=$(pwd)"/tmp.$(date +"%m%d%H%M%S").tmp"
      touch "$gui_file"
      CURRENT_PID=$$
      src_file=$(zenity --text='Please choose a source file to translate' --file-selection --multiple --file-filter=*.$src_ext)
      [ -z "$src_file" ] && abort 'No source file selected.'
      src_lang="$(zenity --text='Please choose source language:' --title='Source language:' --ok-label='Translate' --list --editable --column="Language" --height=225 --extra-button='auto' "en" "fr" "ru")"
      [ -z "$src_lang" ] && abort 'No source language chosen.'
   else
      echo -e "Pick an .srt file to translate:\n"
      select fname in *.$src_ext; do
         src_file="$fname"; break
      done
   fi
   [ -z "$src_file" ] && abort 'No source file selected.'
 fi

 [ ! -f "$src_file" ] && abort 'File '\'$src_file\'' does not exist.'

 src_ext="${src_file##*.}"

 if [ -z "$mode" ]; then
   case $src_ext in
      srt)  mode="srt"  ;;
      epub) mode="epub" ;;
      pdf)  mode="pdf" ;;
      txt)  mode="text" ;;
      *)    mode="text" ;;
   esac
 fi

 if [ -z "$tar_ext" ]; then
   if [ "$mode" = "srt" ]; then
      tar_ext="ssa";
   elif [ "$makesubs" -eq 1 ]; then
      tar_ext="srt"
   else
      tar_ext="txt"
   fi
 fi

 if [ -z "$out_file" ]; then
    out_file=$(echo "$src_file" | sed 's/\.'"${src_ext}"'/\ (new).'"${tar_ext}"'/g')
    echo "New output file: "$out_file
 fi

 ## Rendering engines 
 if [ "$calibre" -eq 1 ]; then

   [ -z "$(command -v ebook-convert)" ] && abort 'calibre not installed.'
   ebook-convert "$src_file" "$out_file" 
   src_file="$out_file"

 elif [ "$mupdf" -eq 1 ]; then

   [ -z "$(command -v mutool)" ] && abort 'mutool not installed.'
   mutool convert -o "$out_file" "$src_file"
   src_file="$out_file"

 elif [ "$pandoc" -eq 1 ]; then

   [ -z "$(command -v pandoc)" ] && abort 'pandoc not installed.'
   pandoc -t rst "$src_file" -o "$out_file"
   src_file="$out_file"

 elif [ "$mode" = "pdf" ]; then

   [ -z "$(command -v pdftotext)" ] && abort 'pdftotext not installed.'
   pdftotext -eol unix -nopgbrk "$src_file" "$out_file"
   src_file="$out_file"

 elif [ "$mode" = "epub" ]; then

   rm -rf tmp "$out_file" 
   unzip -d tmp "$src_file" > /dev/null 2>&1

   text=''
   files="$(find tmp/ -type f -name "*.xhtml" -o -name "*.html" | sort -V)"
   [ "$interface" -ge 3 ] && echo -e "\nEpub [x]html files:\n\n\033[0;32m$files\033[0m"

   ## Long file names with spaces need to be converted to be processed properly
   for file in $(echo "$files" | sed -s 's/ /+/g; s/:/ /g'); do
   
      ## Add spaces back to file name
      file=$(echo "$file" | sed 's/+/ /g')

      [ "$interface" -ge 4 ] && debug=$debug$(cat "$file")

      if [ "$w3mepub" -eq 1 ]; then
         text=$text$(w3m "$file" -dump)
      elif [ "$html2text" -eq 1 ]; then
         text=$text$(html2text --ignore-emphasis "$file")
      else

         ## Decode html to UTF-8
         LC_ALL=C perl -i -MHTML::Entities -0777 -CSDA  -ne  'print decode_entities($_)' "$file"

         tmptxt=$(cat "$file")

         text=$text$(echo "$tmptxt" \
           | perl -CS -ple 's#<\N{U+002F}?(a|b|u|i|span)>##g; s#<a.*?>##g;'\
           | perl -CS -0777 -ne 'push @a,print "$&'"${epub_newline1}"'" while /<p(.*?)<\/p>/gs' \
           | perl -CS -0777 -ne 'push @a,print "$1'"${epub_newline2}"'" while />(.*?)</gs' | tr '\0' '\n')

      fi

   done

   [ "$interface" -ge 4 ] && echo "$text" | less

   [ "$raw" -eq 0 ] && text=$(echo "$text" | perl -CS -ple 's#(\x0|\000}|\r)##g; s#^\s$##g; ')

   echo "$text" > "$out_file"
   src_file="$out_file"
   [ "$keeptmp" -ne 1 ] && rm -rf tmp
   [ "$interface" -ge 4 ] && echo "$debug" | less
   [ "$interface" -ge 4 ] && cat "$out_file" | less
 fi

 tmp_file="$out_file.$(date +"%m%d%H%M%S").tmp"

 if ([ "$tar_lang" = "" ] || [ "$tar_lang" = "auto" ]) && [ "$notrans" -eq 0 ] && [ "$gtk" -eq 1 ]; then
  tar_lang="$(zenity --text='Please choose a target language:' --title='Target language:' --extra-button='auto' --ok-label='Translate' --list --editable --column="Language" --height=225 "en" "fr" "ru")"
   [ -z "$tar_lang" ] && abort 'No target language selected.' 
  (while [ -f "$gui_file" ]; do cat "$gui_file"; sleep .5; done | (zenity --title='Press 'X' to cancel' --text='Translating...' --progress --percentage=0 --auto-kill --time-remaining --auto-close --cancel-label='backround' || kill $CURRENT_PID; rm -rf "$gui_file"))&
 fi

 t_file="$(file -bi "$src_file" | awk -F'=' '{print $2'})"

 if [ "$t_file" = "utf-8" ]; then
   iconv -f "utf-8" "$src_file" -o "$tmp_file"
 #elif [ "$t_file" = "unknown-8bit" ]; then
 #   cp "$src_file" "$tmp_file"
 else
   LC_ALL=C perl -CS -pwe '' "$src_file" > "$tmp_file"
 fi

 [ "$interface" -ge 3 ] && echo -e "\nInitial settings:\nSource lang: \033[0;32m$src_lang \033[0mTarget lang: \033[0;35m$tar_lang\033[0m top: \033[0;33m$top\033[0m bottom: \033[0;33m$bot\033[0m  Mode: \033[0;33m$mode\033[0m  File type: \033[0;33m$t_file\033[0m\n"


 ## Automatically find source language
 if [ "$src_lang" = "" ] || [ "$src_lang" = "auto" ]; then

   ## Grab text from the middle of the file
   middle=$(( $(wc -l < "$src_file") / 2 ))

   if [ "$mode" = "srt" ]; then
      text=$(tail -n $middle "$src_file" | head -n 20 | sed -n -r '/(-->|[0-9])/,${//!p;}' | tr '\r\n' ' ')
   else
      text=$(tail -n $middle "$src_file" | head -n 10 | tr '\r\n' ' ')
   fi

   [ "$interface" -ge 3 ] && echo -e "Grabbing text from line \033[0;35m$middle\033[0m for language detection:\n\n\033[0;32m\"$text\"\033[0m\n"
   src_lang=$(wget -U "Mozilla/5.0" -q -O- "http://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl=fr&dt=t&q=$text")
   src_lang=${src_lang##*\[\"}; src_lang=${src_lang%%\"*}
 fi

 if [ "$tar_lang" = "auto" ] || [ "$tar_lang" = "" ]; then
   if [ "$src_lang" = "$tar_pref1" ]; then tar_lang=$tar_pref2; else tar_lang=$tar_pref1; fi
 fi

 if [ -z "$top" ]; then
   if   [ "$src_lang" = "$top_pref1" ]; then
     top=$src_lang; bot=$tar_lang
   elif [ "$tar_lang" = "$top_pref1" ]; then
     top=$tar_lang; bot=$src_lang
   elif [ "$src_lang" = "$top_pref2" ]; then
     top=$src_lang; bot=$tar_lang
   elif [ "$tar_lang" = "$top_pref2" ]; then
     top=$tar_lang; bot=$src_lang
   else
     top=$src_lang; bot=$tar_lang
   fi
 elif [ "$top" = "$src_lang" ]; then bot=$tar_lang; else bot=$src_lang; fi

 t_file="$(file -bi "$tmp_file" | awk -F'=' '{print $2'})"

 [ "$interface" -ge 3 ] && echo -e "\nFinal settings:\nSource lang: \033[0;32m$src_lang \033[0mTarget lang: \033[0;35m$tar_lang\033[0m top: \033[0;33m$top\033[0m bottom: \033[0;33m$bot\033[0m  Mode: \033[0;33m$mode\033[0m  File type: \033[0;33m$t_file\033[0m\n"
 linespaced=1

 [ "$onelineeach" -eq 1 ] && LC_ALL=C perl -CS -i -pe 's`\n`\n\n`g;' "$tmp_file" && linespaced=1

 [ "$simpleclean" -eq 1 ] && LC_ALL=C perl -CS -i -pe 's#[\,|\.|\;|\:|\?|\!]#$&\n\n#g;' "$tmp_file" && linespaced=1
 [ "$commareturn" -eq 1 ] && LC_ALL=C perl -CS -i -pe 's#\,#$&\n\n#g;' "$tmp_file" && LC_ALL=C perl -CS -i -pe 's#^ ##g;' "$tmp_file" && linespaced=1

 LC_ALL=C perl -i -CS -ane 's(\r|`)//g; s/\.{2,}/.../g; $n=(@F==0) ? $n+1 : 0; print if $n<='"${linespaced}"'' "$tmp_file"


 [ "$interface" -ge 1 ] && echo -e -n "\rFormatting..."

 if [ "$initclean" -eq 1 ]; then
   text=$(cat "$tmp_file")
   text=$(clean "$text")
   echo "$text" > "$tmp_file"
 fi

 [ "$removeline" != "" ] && LC_ALL=C perl -CSDA -ni -e 'print unless /'"${removeline}"'/;' "$tmp_file"

 [ "$interface" -ge 4 ] && cat "$tmp_file" | less
 textlines=$(wc -l < "$tmp_file")
 
 info="[Script Info]\n\
 ScriptType: v4.00+\n\
 Collisions: Normal\n\
 PlayDepth: 0\n
 Timer: 100,0000\n\
 Video Aspect Ratio: 0\n\
 WrapStyle: 0\n\
 ScaledBorderAndShadow: no\n\
 \n\
 [V4+ Styles]\n\
 Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding\n\
 Style: $top,Arial,10,&H00F9FFFF,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,0,8,10,10,10,0\n\
 Style: $bot,Arial,18,&H00F9FFF9,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,10,0\n\
 \n\
 [Events]\n\
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"

 if [ "$mode" = "srt" ]; then
   [ "$interface" -ge 4 ] && sdebug="$info" 
   echo -e $info > "$out_file"
 else
   echo -n > "$out_file"
 fi

 while IFS='' read -r lineN; do

   lines=$(( lines + 1 ))

   ## Scroll down until text is found
   [ "$lineN" = "" ] && while IFS='' read -r lineN; [ "$lineN" = "" ]; do lines=$(( lines + 1 )) ; done

   if [ "$mode" = "srt" ]; then
      IFS='' read -r lineT; IFS='' read -r lineS
      s=$lineS
      lines=$(( lines + 2 ))
   else
      lines=$(( lines + 1 ))
      s=$lineN
   fi

   ## Grab the other lines if more than 1
   while IFS='' read -r lineS; [ -n "$lineS" ]; do
      s=$s$newline$lineS
      lines=$(( lines + 1 ))
   done

   s=$(echo "$s" | sed -r 's#\\n#'"${backnew}"'#g;')
   [ "$deltag" -eq 1 ] && s=$(echo "$s" | LC_ALL=C perl -CA -pe 's#</?\S?>?##g;')
   [ "$clean" -eq 1 ] && s=$(clean "$s")

   if [ "$s" = "" ] && [ "$delblank" -eq 1 ]; then
      [ "$interface" -ge 2 ] && echo "*** Blank line omitted ***"

      if [ "$counting" = "file" ]; then
         line=$(( line + 1 ))
         chunkNum=$(( chunkNum + 1 ))
      fi
   else
      if [ "$mode" = "srt" ]; then

         ## Grab and incorperate timeline info. This shell command is faster than awk, sed, and cut
         timeS="Dialogue: 0,"${lineT:1:7}"."${lineT:9:2}","${lineT:18:7}"."${lineT:26:2}","

         if [ "$nosource" -eq 1 ]; then
            final=$timeS$top",,0000,0000,0000,,(($line))"
         else
            if [ "$top" = "$src_lang" ]; then
               final1=$timeS$top",,0000,0000,0000,,"$s
               final2=$timeS$bot",,0000,0000,0000,,(($line))"
            else
               final1=$timeS$top",,0000,0000,0000,,(($line))"
               final2=$timeS$bot",,0000,0000,0000,,"$s
            fi
         fi

      elif [ "$makesubs" -eq 1 ]; then
        final1=$s 
      else

         if [ "$nosource" -eq 0 ]; then
            if [ "$top" = "$src_lang" ]; then
               final1=$s
               final2="(($line))"
            else
               final1="(($line))"
               final2=$s
            fi
         else
               final1="(($line))"
               final2=""
         fi
      fi

      if [ "$mode" = "srt" ] && [ "$makesubs" -ne 1 ]; then
         [ "$interface" -ge 4 ] && sdebug="$sdebug$final1\n$final2\n"
         printf '%s\n%s\n%s' "$final1" "$final2" >> "$out_file"
      elif [ "$makesubs" -eq 1 ]; then
         printf '%s\n00:00:00,000 --> 00:00:00,000\n%s\n\n' "$line" "$final1" >> "$out_file"

      else
         [ "$interface" -ge 4 ] && sdebug="$sdebug$final1\n\n$final2\n\n"
         printf '%s\n\n%s\n\n\n' "$final1" "$final2" >> "$out_file"         
      fi

      [ "$interface" -ge 1 ] && echo -n -e "Completed: \033[0;33m"$(( 100 - ( ($textlines * 1) / ($lines) ) ))"%\033[0m  \r"

      if [ $(( ${#srcClump[chunkTot]} + ${#s} )) -gt $charMax ]; then
         srcClump[$chunkTot]="${srcClump[$chunkTot]} (($line)) "
         srcClumpNum[$chunkTot]=$(( $line - 1 ))
         chunkNum=0
         chunkTot=$(( chunkTot + 1 ))
      fi

      srcClump[$chunkTot]="${srcClump[$chunkTot]} (($line)) $s"

      srcLine[$line]="$s"

      line=$(( line + 1 ))
      chunkNum=$(( chunkNum + 1 ))
   fi

 done < "$tmp_file"

 srcClumpNum[$chunkTot]=$(( $line - 1 ))
 srcClump[$chunkTot]="${srcClump[$chunkTot]} (($line)) "
 [ "$interface" -ge 4 ] && echo -e "$sdebug" | less

 [ "$justformat" -eq 1 ] && exit
 if [ "$makesubs" -eq 1 ]; then
   
  if [ "$makesubs" -eq 1 ]; then
   echo -e "\n\nRun:\n"

   video_file="${src_file/$src_ext/mp4}"
   srt_file="${src_file/$src_ext/ssa}"
   new_video="${video_file/./ (sub).}"

   echo "gnome-subtitles '$out_file' '$video_file'"
   echo "srtssa.sh -d '$out_file'" 
   echo "ffmpeg -i '${src_file/$src_ext/mp4}' -vf ass='$srt_file' '$new_video'" 

 fi
 exit
 fi

 [ "$interface" -ge 1 ] && printf "\rTranslating...%-50s"

 line=1
 transTime=$(( $charMax / 3000 )) # How long does it take to pull the results from Google and Process them?
 for (( chunkNum=0; chunkNum<=$chunkTot; chunkNum++ )); do

   s=$(translation "${srcClump[chunkNum]}")
   #s="${srcClump[chunkNum]}"

   ## Check and fix badly formatted brackets around line numbers
   fixed=$(echo "$s" | perl -CA -pe '\
         s#\(\s?\(\s?\(\s?(?=\d)# ((#g; \  ## (((10)) to ((10))
         s#（# \(#g; s#）#\) #g; \
         s#(\( \(|\(\( |\(\(  |\( \( )(?=\d+)#\(\(#g; s#(?<!\()(\(|\( )(?=\d+)#\(\(#g; \
         s#(?<=\d{1})(\) \)| \)\)|  \)\)| \) \))#\)\)#g; s#(?<=\d{1})((?!\)\)) \)|(?!\)\))\))#\)\)#g; \
         s#(?<=\S)\(\(# ((#g; s#\)\)(?=\S)#)) #g;')

   ## Latin translations will substitute decimal numbers for roman numerals, they need to be translated back
   if [ "$tar_lang" = 'la' ]; then
      for ((num=$line; num<=${srcClumpNum[chunkNum]}+1; num++ )); do 
         roman=$(roman "$num")
         fixed=$(echo "$fixed" | sed 's#(('"${roman}"'))#(('"${num}"'))#g')
      done
   fi

   [ "$interface" -ge 3 ] && echo -e "\n\nSource:\n\n\033[0;32m${srcClump[$chunkNum]}\033[0m\n\nTranslation:\n\n\033[0;35m$s\n\n\033[0mFixed Translation:\n\n\033[0;36m$fixed\n\n\033[0m"

   for (( num1=$line; num1<=${srcClumpNum[chunkNum]}; num1++ )); do 
      num2=$(( num1 + 1 ))
      new_line=$(echo "$fixed" | perl -CSDA -sae 'push @a,/(?<='"${num1}"'\)\) )(.*?)(?= \(\('"${num2}"')/; END{print @a }' | sed -e 's/^[[:space:]]*//g') 

      [ "$deluni" -eq 1 ] && new_line=$(echo "$new_line" | LC_ALL=C perl -CA -pe 's/\\u\S{4,}//g;')
      echo "$new_line" | LC_ALL=C perl -CA -ne 'push @a,print "\nWARNING: UTF encoding found: $&\n" while /\\u\S{4}.*$/gs;'

      new_line=$(echo "$new_line" | LC_ALL=C perl -CSDA -pe 's/\\u003c/</g; \
                                                             s/\\u003e/>/g; \
                                                             s/\s?(?<=(b|i|u))>\s?/>/g; \
                                                             s#\s?<\s?(?=/?(b|i|u))#<#g;')

      [ "$cleantrans" -eq 1 ] && newline=$(clean "$newline")

      ## Check for errors
      if [ "$noerr" -eq "0" ]; then

         ## Check for empty lines, brackets, or less than 3 characters-signs of a bad translation
         echo "$new_line" | LC_ALL=C sed -n -r '/^$/{q1}; /^(\.|\?|\,)[^\.{2,}]/{q4}; / P /{q6}; /-->/{q7}; /\\u/{q8};'
         #echo "$new_line" | LC_ALL=C sed -n -r '/^$/{q1}; /^(\.|\?|\,)[^\.{2,}]/{q4}; / P /{q6}; /-->/{q7}; '
         error=$?

         ## Compare number of l&r brackets in both src and tar. If unequal, Google likely made a mistake
         ls=${srcLine[num1]//[^(]}
         rs=${srcLine[num1]//[^)]}
         lt=${new_line//[^(]}
         rt=${new_line//[^)]}
         [ ${#lt} -ne ${#ls} ] || [ ${#rt} -ne ${#rs} ] && error=3

         ## Compare the difference in line lengths
         a=${#new_line};b=${#srcLine[num1]}
         difference=$((a*b*dif?(a>b?100*a/b:100*b/a)-100:0))

         if [ "$difference" -gt "$dif" ]; then
            c=$(echo "$new_line" | LC_ALL=C perl -CS -ne 'push @a,print 1 while /^\S+$/gs;')
            d=$(echo "${srcLine[num1]}" | LC_ALL=C perl -CS -ne 'push @a,print 1 while /^\S+$/gs;')

            ## It's not an error if there was only one word for both source and target text
            ## One word was simply longer than the other and we can assume this is normal
            if [ "$c" != "1" ] || [ "$d" != "1" ] || [ "$a" -le 2 ]; then error=5; fi
         fi

         if [ "$error" -ne 0 ]; then
            old_line=$new_line

            [ "$norepair" -eq 0 ] && new_line=$(translation "${srcLine[$num1]}")

            [ "$cleantrans" -eq 1 ] && newline=$(clean "$newline")

            if [ -n "$log" ]; then
               log="\n\nFile: $src_file\n\nSource:\n\n${srcClump[$chunkNum]}\n\nTranslation:\n\n$s\n\nFixed Translation:\n\n$fixed\n\n(ERROR: $error) $num1 = \"${srcLine[$num1]}\"\n(ERROR: $error) $num1 ≠ \"$old_line\"\n(ERROR: $error) $num1 ≈ \"$new_line\"\n\n"
               [ ! -f "$log_file" ] && log_file="${out_file/.$tar_ext/.log}" && touch "$log_file"
               echo -e "$log" >> "$log_file"
               echo -e "Err: $error\nFile: '$out_file'\n${srcLine[$num1]}\n$old_line\n$new_line\n\n" >> 'all.log'
            fi

            [ "$interface" -ge 2 ] && echo -e "\n\033[0;31m(ERROR: $error)\033[0m $num1 = \"\033[0;32m${srcLine[$num1]}\033[0m\"\n\033[0;31m(ERROR: $error)\033[0m $num1 ≠ \"\033[0;31m$old_line\033[0m\"\n\033[0;31m(ERROR: $error)\033[0m $num1 ≈ \"\033[0;35m$new_line\033[0m\"\n"
            errors=$(( errors + 1 ))

            if [ "$norepair" -eq 0 ]; then
               for (( i=$wait; i>0; i-- )); do
                  [ "$interface" -ge 2 ] && echo -n -e "Waiting \033[0;33m$i\033[0m seconds... \r"
                  sleep 1
               done
               printf "\r%-50s\r"
            fi
         fi
      fi

      [ "$interface" -ge 2 ] && echo -e "$num1 = \"\033[0;32m${srcLine[$num1]}\033[0m\"\n$num1 ≈ \"\033[0;35m$new_line\033[0m\""

      if [ "$new_line" = "" ] && [ "$delblank" -eq 1 ]; then
         LC_ALL=C perl -CS -ni -e 'print unless `\(\("$num1"\)\)`;' "$out_file"
      else
         LC_ALL=C perl -CS -i -pe 's`\(\('"${num1}"'\)\)`'"${new_line}"'`g;' "$out_file"
      fi

      line=$(( line + 1 ))
   done

   ## Don't make user wait after all lines are translated
   if [ "$chunkNum" -lt "$chunkTot" ]; then
      for (( i=$wait; i>0; i-- )); do

         perc=$(( (($chunkNum)  * 100 + ( 100 - ($i * 100 / $wait))) / ($chunkTot)  ))
         seconds=$(( ($chunkTot - $chunkNum - 1 ) * ($wait + $transTime + ( $chunkTot / 100 )) + $i + $transTime ))
         timeleft=$(date -d@$seconds -u +%H:%M:%S)

         [ "$interface" -ge 1 ] && echo -e -n "\r[ $out_file ]  \033[0;32m$src_lang\033[0m -> \033[0;35m$tar_lang\033[0m  $timeleft (\033[0;33m$perc%\033[0m) " && [ $errors ] && echo -e -n "\033[0;31mErrors: $errors\033[0m"
         [ "$gtk" -eq 1 ] && echo -e $perc > "$gui_file"
         sleep 1
      done

      [ "$interface" -ge 1 ] && printf "\rTranslating...%-80s"
  fi 
 done

 if [ "$interface" -ge 1 ]; then [ "$errors" ] && echo -e "\n$out_file done with ERRORS: $errors\n" || echo -e "\n$src_file done.          \n"; fi
No results found