kelvinauta · July 24, 2025 23:08
diff --git a/translate-youtube-video.sh b/translate-youtube-video.sh
 #!/usr/bin/env bash

 set -euo pipefail

 OUTPUT_LANG="español"
 CHUNK_SIZE=50 
 MODEL_TRANSLATE="gpt-4.1-mini"

 function usage(){
    cat <<EOF
 Usage: $(basename "$0") <youtube_url> [temp_dir]
 Downloads a YouTube video, extracts its audio, obtains subtitles with OpenAI Whisper, translates them into "${OUTPUT_LANG}" using the model "${MODEL_TRANSLATE}", and finally muxes the translated subtitles back into the video producing an .mkv file.
 Positional arguments:
  youtube_url   URL of the YouTube video you want to process (mandatory)
  temp_dir      Optional directory to place intermediate files. When omitted a random directory will be created under /tmp.
 Options:
  -h, --help    Show this help message and exit.
 Environment:
  OPENAI_API_KEY  Your OpenAI secret key. It must be exported before running this script.
 Dependencies: yt-dlp, ffmpeg, jq, curl, sed, mktemp, tr
 EOF
 }
 if [[ $# -eq 0 ]]; then echo "No arguments provided, use --help"; exit 1; fi
 if [[ -z $OPENAI_API_KEY ]]; then echo "Env required: OPENAI_API_KEY. use --help"; exit 1;fi
 ([ "$1" = -h ] || [ "$1" = --help ]) && usage && exit 0

 yt_url="$1"
 shift || true


 dir_tmp="${1:-$(mktemp -u "/tmp/video.XXXX")}"

 format="mp4"

 function setup(){
    echo "init setup"
    mkdir -p "$dir_tmp"
    title="$(yt-dlp --print title "$yt_url" | tr '[:upper:]' '[:lower:]' | tr --delete "\n" |  tr " " "-" | tr -s "-" | tr -cd "A-Za-z0-9-")"
    tmp_video_path="${dir_tmp}/${title}.${format}"
    echo "done setup: ${tmp_video_path}"
 }
 function download(){
    echo "init download"
    yt-dlp -f bestvideo+bestaudio --merge-output-format "$format"  -o "$tmp_video_path" "$yt_url"
    echo "init download video"
 }

 tmp_audio_path="${dir_tmp}/audio.ogg"
 function extract_audio(){
    echo "init extract audio"
    ffmpeg -loglevel quiet -i "$tmp_video_path" -vn -map_metadata -1 -ac 1 -c:a libopus -b:a 12k -application voip "$tmp_audio_path"
    echo "done extract audio ${tmp_audio_path}"
 }

 tmp_subtitle_path="${dir_tmp}/subtitle.srt"
 function transcribe(){
    echo "init transcribe"
    curl -sSf -o "$tmp_subtitle_path" https://api.openai.com/v1/audio/transcriptions \
      -H "Authorization: Bearer $OPENAI_API_KEY" \
      -H "Content-Type: multipart/form-data" \
      -F file="@${tmp_audio_path}" \
      -F model="whisper-1" \
      -F response_format="srt"
    echo "done transcribe ${tmp_subtitle_path}"
 }



 function translate_line(){
    text="$(sed '1,2d' <<< "$1" | tr '\n' ' ')"
    file="$2"
    line=$(( $3 + 3 ))
    body=$(jq -n \
      --arg text   "$text" \
      --arg lang   "$OUTPUT_LANG" \
      --arg model   "$MODEL_TRANSLATE" \
      '{
         model: $model,
         messages: [
           { role: "developer",
             content: "Translate the text below into \($lang), your answer must be only and exclusively the translation, respecting the original content in a single line"
           },
           { role: "user", content: $text }
         ]
       }')

    local response=$(curl -sS https://api.openai.com/v1/chat/completions \
      -H "Content-Type: application/json" \
      -H "Authorization: Bearer $OPENAI_API_KEY" \
      -d "$body" | jq '.choices[0].message.content')

    echo -e "\n"
    echo "$text"
    if [[ -n $response ]]; then
        local output_text=$(tr "\n" " " <<<"$response")
        echo "$output_text"
        sed -i "${line}s/.*/${output_text}/" "$file"
    else
        echo "line ${line} fail in translate"
    fi
 }

 function translate_all(){
    echo "init translate velocity chunks ${CHUNK_SIZE}"
    index_line=0
    steps=0
    pids=()
    while IFS= read -r -d '' block; do
        text=$(sed '${/^\s*$/d;}' <<<"$block")
        offset=$(( $(wc -l <<<"$text") + 1))  
        translate_line "$text" "$tmp_subtitle_path" "$index_line" &
        pids+=("$!")
        steps=$(( $steps + 1 ))
        [[ $steps -ge $CHUNK_SIZE  ]] && { wait "${pids[@]}"; pids=(); }
        index_line=$(( $index_line + $offset ))
    done< <(
      sed -z 's/\n[[:space:]]*\n/\n\x00/g' "$tmp_subtitle_path"
    )
    wait "${pids[@]}"
    echo "done translate"
 }
 function make_mkv(){
    echo "init make mkv"
    tmp_output_path="${dir_tmp}/${title}.mkv"
    ffmpeg -loglevel quiet -i "$tmp_video_path" -sub_charenc UTF-8 -i "$tmp_subtitle_path" \
        -c:v copy -c:a copy -c:s srt \
        -metadata:s:s:0 language=spa \
        "$tmp_output_path"
    echo "done make mkv, outputfile: ${tmp_output_path}"
 }

 setup
 download
 extract_audio
 transcribe
 translate_all
 make_mkv
	#!/usr/bin/env bash

	set -euo pipefail

	OUTPUT_LANG="español"
	CHUNK_SIZE=50
	MODEL_TRANSLATE="gpt-4.1-mini"

	function usage(){
	cat <<EOF
	Usage: $(basename "$0") <youtube_url> [temp_dir]
	Downloads a YouTube video, extracts its audio, obtains subtitles with OpenAI Whisper, translates them into "${OUTPUT_LANG}" using the model "${MODEL_TRANSLATE}", and finally muxes the translated subtitles back into the video producing an .mkv file.
	Positional arguments:
	youtube_url URL of the YouTube video you want to process (mandatory)
	temp_dir Optional directory to place intermediate files. When omitted a random directory will be created under /tmp.
	Options:
	-h, --help Show this help message and exit.
	Environment:
	OPENAI_API_KEY Your OpenAI secret key. It must be exported before running this script.
	Dependencies: yt-dlp, ffmpeg, jq, curl, sed, mktemp, tr
	EOF
	}
	if [[ $# -eq 0 ]]; then echo "No arguments provided, use --help"; exit 1; fi
	if [[ -z $OPENAI_API_KEY ]]; then echo "Env required: OPENAI_API_KEY. use --help"; exit 1;fi
	([ "$1" = -h ] \|\| [ "$1" = --help ]) && usage && exit 0

	yt_url="$1"
	shift \|\| true


	dir_tmp="${1:-$(mktemp -u "/tmp/video.XXXX")}"

	format="mp4"

	function setup(){
	echo "init setup"
	mkdir -p "$dir_tmp"
	title="$(yt-dlp --print title "$yt_url" \| tr '[:upper:]' '[:lower:]' \| tr --delete "\n" \| tr " " "-" \| tr -s "-" \| tr -cd "A-Za-z0-9-")"
	tmp_video_path="${dir_tmp}/${title}.${format}"
	echo "done setup: ${tmp_video_path}"
	}
	function download(){
	echo "init download"
	yt-dlp -f bestvideo+bestaudio --merge-output-format "$format" -o "$tmp_video_path" "$yt_url"
	echo "init download video"
	}

	tmp_audio_path="${dir_tmp}/audio.ogg"
	function extract_audio(){
	echo "init extract audio"
	ffmpeg -loglevel quiet -i "$tmp_video_path" -vn -map_metadata -1 -ac 1 -c:a libopus -b:a 12k -application voip "$tmp_audio_path"
	echo "done extract audio ${tmp_audio_path}"
	}

	tmp_subtitle_path="${dir_tmp}/subtitle.srt"
	function transcribe(){
	echo "init transcribe"
	curl -sSf -o "$tmp_subtitle_path" https://api.openai.com/v1/audio/transcriptions \
	-H "Authorization: Bearer $OPENAI_API_KEY" \
	-H "Content-Type: multipart/form-data" \
	-F file="@${tmp_audio_path}" \
	-F model="whisper-1" \
	-F response_format="srt"
	echo "done transcribe ${tmp_subtitle_path}"
	}



	function translate_line(){
	text="$(sed '1,2d' <<< "$1" \| tr '\n' ' ')"
	file="$2"
	line=$(( $3 + 3 ))
	body=$(jq -n \
	--arg text "$text" \
	--arg lang "$OUTPUT_LANG" \
	--arg model "$MODEL_TRANSLATE" \
	'{
	model: $model,
	messages: [
	{ role: "developer",
	content: "Translate the text below into \($lang), your answer must be only and exclusively the translation, respecting the original content in a single line"
	},
	{ role: "user", content: $text }
	]
	}')

	local response=$(curl -sS https://api.openai.com/v1/chat/completions \
	-H "Content-Type: application/json" \
	-H "Authorization: Bearer $OPENAI_API_KEY" \
	-d "$body" \| jq '.choices[0].message.content')

	echo -e "\n"
	echo "$text"
	if [[ -n $response ]]; then
	local output_text=$(tr "\n" " " <<<"$response")
	echo "$output_text"
	sed -i "${line}s/.*/${output_text}/" "$file"
	else
	echo "line ${line} fail in translate"
	fi
	}

	function translate_all(){
	echo "init translate velocity chunks ${CHUNK_SIZE}"
	index_line=0
	steps=0
	pids=()
	while IFS= read -r -d '' block; do
	text=$(sed '${/^\s*$/d;}' <<<"$block")
	offset=$(( $(wc -l <<<"$text") + 1))
	translate_line "$text" "$tmp_subtitle_path" "$index_line" &
	pids+=("$!")
	steps=$(( $steps + 1 ))
	[[ $steps -ge $CHUNK_SIZE ]] && { wait "${pids[@]}"; pids=(); }
	index_line=$(( $index_line + $offset ))
	done< <(
	sed -z 's/\n[[:space:]]*\n/\n\x00/g' "$tmp_subtitle_path"
	)
	wait "${pids[@]}"
	echo "done translate"
	}
	function make_mkv(){
	echo "init make mkv"
	tmp_output_path="${dir_tmp}/${title}.mkv"
	ffmpeg -loglevel quiet -i "$tmp_video_path" -sub_charenc UTF-8 -i "$tmp_subtitle_path" \
	-c:v copy -c:a copy -c:s srt \
	-metadata:s:s:0 language=spa \
	"$tmp_output_path"
	echo "done make mkv, outputfile: ${tmp_output_path}"
	}

	setup
	download
	extract_audio
	transcribe
	translate_all
	make_mkv