akazakou · September 19, 2025 18:16
diff --git a/openai_transcribe.sh b/openai_transcribe.sh
 # Transcribe long videos via OpenAI Audio API in 1400s chunks
 openai_transcribe() {
  local USAGE
  USAGE=$'%F{cyan}Usage:%f openai_transcribe <input_video> <output_text> [--model MODEL] [--chunk SECONDS] [--format text|srt|vtt|json]\n'\
 $'%F{cyan}Example:%f openai_transcribe "2025-09-19 11-04-11.mkv" meeting.txt --chunk 1400 --model gpt-4o-transcribe\n'\
 $'\nOptions:\n'\
 $'  --model   OpenAI model (default: gpt-4o-transcribe)\n'\
 $'  --chunk   Chunk length in seconds (default: 1400)\n'\
 $'  --format  response_format (text|srt|vtt|json; default: text)\n'\
 $'\nNotes:\n'\
 $'  * Requires: ffmpeg, curl, and $OPENAI_API_KEY set in env.\n'\
 $'  * Audio is copied to .m4a and split with FFmpeg segment muxer.\n'\
 $'  * Each chunk is sent to the API; outputs are appended in order.\n'

  # Help
  if [[ "$1" == "--help" || "$1" == "-h" || -z "$1" || -z "$2" ]]; then
    print -r -- "$USAGE"
    return 0
  fi

  # Args
  local in="$1"
  local out="$2"
  shift 2

  # Defaults
  local model="gpt-4o-transcribe"
  local chunk=1399
  local format="text"

  # Parse flags
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --model)  model="$2"; shift 2;;
      --chunk)  chunk="$2"; shift 2;;
      --format) format="$2"; shift 2;;
      *) echo "Unknown option: $1" >&2; return 2;;
    esac
  done

  # Checks
  if [[ -z "$OPENAI_API_KEY" ]]; then
    echo "ERROR: OPENAI_API_KEY is not set." >&2
    return 2
  fi
  if [[ ! -f "$in" ]]; then
    echo "ERROR: input file not found: $in" >&2
    return 2
  fi
  # Ensure tools
  command -v ffmpeg >/dev/null 2>&1 || { echo "ERROR: ffmpeg not found."; return 2; }
  command -v curl   >/dev/null 2>&1 || { echo "ERROR: curl not found.";   return 2; }

  # Prep
  local tmpdir
  tmpdir="$(mktemp -d -t oa_stt_XXXXXXXX)" || { echo "mktemp failed"; return 2; }
  trap 'rm -rf "$tmpdir"' EXIT

  local audio="$tmpdir/audio.m4a"
  local pattern="$tmpdir/part_%04d.m4a"

  # 1) Extract audio (try stream copy; if it fails, re-encode)
  if ! ffmpeg -hide_banner -loglevel error -y -i "$in" -vn -acodec copy "$audio"; then
    echo "Stream copy failed, re-encoding audio..."
    if ! ffmpeg -hide_banner -loglevel error -y -i "$in" -vn -ac 2 -ar 48000 -c:a aac -b:a 192k "$audio"; then
      echo "ERROR: audio extraction failed." >&2
      return 2
    fi
  fi

  # 2) Segment audio to <= $chunk seconds
  if ! ffmpeg -hide_banner -loglevel error -y -i "$audio" \
      -f segment -segment_time "$chunk" -reset_timestamps 1 \
      -map 0:a -c copy "$pattern"; then
    echo "ERROR: segmentation failed." >&2
    return 2
  fi

  # 3) Call OpenAI API on each chunk and append to output
  : > "$out" || { echo "ERROR: cannot write to $out"; return 2; }

  local idx=0 rc=0
  for f in "$tmpdir"/part_*.m4a; do
    [[ -e "$f" ]] || continue
    idx=$((idx+1))
    echo "Transcribing chunk #$idx: $(basename "$f")"

    # Make request
    local resp
    resp=$(curl -sS https://api.openai.com/v1/audio/transcriptions \
      -H "Authorization: Bearer $OPENAI_API_KEY" \
      -H "Content-Type: multipart/form-data" \
      -F "model=$model" \
      -F "response_format=$format" \
      -F "file=@${f}") || rc=$?

    if [[ $rc -ne 0 || -z "$resp" ]]; then
      echo "ERROR: request failed for chunk #$idx" >&2
      return 2
    fi

    # Simple error check
    if echo "$resp" | grep -q '"error"'; then
      echo "ERROR from API on chunk #$idx:" >&2
      echo "$resp" >&2
      return 2
    fi

    # Append with a separator for readability (text-like formats)
    if [[ "$format" == "text" || "$format" == "vtt" || "$format" == "srt" ]]; then
      {
        echo ""
        echo "----- [chunk $idx] -----"
        echo "$resp"
      } >> "$out"
    else
      # json: append as-is plus newline
      echo "$resp" >> "$out"
    fi
  done

  echo "Done. Output saved to: $out"
 }
	# Transcribe long videos via OpenAI Audio API in 1400s chunks
	openai_transcribe() {
	local USAGE
	USAGE=$'%F{cyan}Usage:%f openai_transcribe <input_video> <output_text> [--model MODEL] [--chunk SECONDS] [--format text\|srt\|vtt\|json]\n'\
	$'%F{cyan}Example:%f openai_transcribe "2025-09-19 11-04-11.mkv" meeting.txt --chunk 1400 --model gpt-4o-transcribe\n'\
	$'\nOptions:\n'\
	$' --model OpenAI model (default: gpt-4o-transcribe)\n'\
	$' --chunk Chunk length in seconds (default: 1400)\n'\
	$' --format response_format (text\|srt\|vtt\|json; default: text)\n'\
	$'\nNotes:\n'\
	$' * Requires: ffmpeg, curl, and $OPENAI_API_KEY set in env.\n'\
	$' * Audio is copied to .m4a and split with FFmpeg segment muxer.\n'\
	$' * Each chunk is sent to the API; outputs are appended in order.\n'

	# Help
	if [[ "$1" == "--help" \|\| "$1" == "-h" \|\| -z "$1" \|\| -z "$2" ]]; then
	print -r -- "$USAGE"
	return 0
	fi

	# Args
	local in="$1"
	local out="$2"
	shift 2

	# Defaults
	local model="gpt-4o-transcribe"
	local chunk=1399
	local format="text"

	# Parse flags
	while [[ $# -gt 0 ]]; do
	case "$1" in
	--model) model="$2"; shift 2;;
	--chunk) chunk="$2"; shift 2;;
	--format) format="$2"; shift 2;;
	*) echo "Unknown option: $1" >&2; return 2;;
	esac
	done

	# Checks
	if [[ -z "$OPENAI_API_KEY" ]]; then
	echo "ERROR: OPENAI_API_KEY is not set." >&2
	return 2
	fi
	if [[ ! -f "$in" ]]; then
	echo "ERROR: input file not found: $in" >&2
	return 2
	fi
	# Ensure tools
	command -v ffmpeg >/dev/null 2>&1 \|\| { echo "ERROR: ffmpeg not found."; return 2; }
	command -v curl >/dev/null 2>&1 \|\| { echo "ERROR: curl not found."; return 2; }

	# Prep
	local tmpdir
	tmpdir="$(mktemp -d -t oa_stt_XXXXXXXX)" \|\| { echo "mktemp failed"; return 2; }
	trap 'rm -rf "$tmpdir"' EXIT

	local audio="$tmpdir/audio.m4a"
	local pattern="$tmpdir/part_%04d.m4a"

	# 1) Extract audio (try stream copy; if it fails, re-encode)
	if ! ffmpeg -hide_banner -loglevel error -y -i "$in" -vn -acodec copy "$audio"; then
	echo "Stream copy failed, re-encoding audio..."
	if ! ffmpeg -hide_banner -loglevel error -y -i "$in" -vn -ac 2 -ar 48000 -c:a aac -b:a 192k "$audio"; then
	echo "ERROR: audio extraction failed." >&2
	return 2
	fi
	fi

	# 2) Segment audio to <= $chunk seconds
	if ! ffmpeg -hide_banner -loglevel error -y -i "$audio" \
	-f segment -segment_time "$chunk" -reset_timestamps 1 \
	-map 0:a -c copy "$pattern"; then
	echo "ERROR: segmentation failed." >&2
	return 2
	fi

	# 3) Call OpenAI API on each chunk and append to output
	: > "$out" \|\| { echo "ERROR: cannot write to $out"; return 2; }

	local idx=0 rc=0
	for f in "$tmpdir"/part_*.m4a; do
	[[ -e "$f" ]] \|\| continue
	idx=$((idx+1))
	echo "Transcribing chunk #$idx: $(basename "$f")"

	# Make request
	local resp
	resp=$(curl -sS https://api.openai.com/v1/audio/transcriptions \
	-H "Authorization: Bearer $OPENAI_API_KEY" \
	-H "Content-Type: multipart/form-data" \
	-F "model=$model" \
	-F "response_format=$format" \
	-F "file=@${f}") \|\| rc=$?

	if [[ $rc -ne 0 \|\| -z "$resp" ]]; then
	echo "ERROR: request failed for chunk #$idx" >&2
	return 2
	fi

	# Simple error check
	if echo "$resp" \| grep -q '"error"'; then
	echo "ERROR from API on chunk #$idx:" >&2
	echo "$resp" >&2
	return 2
	fi

	# Append with a separator for readability (text-like formats)
	if [[ "$format" == "text" \|\| "$format" == "vtt" \|\| "$format" == "srt" ]]; then
	{
	echo ""
	echo "----- [chunk $idx] -----"
	echo "$resp"
	} >> "$out"
	else
	# json: append as-is plus newline
	echo "$resp" >> "$out"
	fi
	done

	echo "Done. Output saved to: $out"
	}