Skip to content

Instantly share code, notes, and snippets.

@akazakou
Created September 19, 2025 18:16
Show Gist options
  • Save akazakou/d74b74dbf50dd3b714ec3835112e8ae0 to your computer and use it in GitHub Desktop.
Save akazakou/d74b74dbf50dd3b714ec3835112e8ae0 to your computer and use it in GitHub Desktop.
There is sample for the using OpenAI from console to transcript Speech To Text. Required OpenAI key, ffmpeg and curl
# Transcribe long videos via OpenAI Audio API in 1400s chunks
openai_transcribe() {
local USAGE
USAGE=$'%F{cyan}Usage:%f openai_transcribe <input_video> <output_text> [--model MODEL] [--chunk SECONDS] [--format text|srt|vtt|json]\n'\
$'%F{cyan}Example:%f openai_transcribe "2025-09-19 11-04-11.mkv" meeting.txt --chunk 1400 --model gpt-4o-transcribe\n'\
$'\nOptions:\n'\
$' --model OpenAI model (default: gpt-4o-transcribe)\n'\
$' --chunk Chunk length in seconds (default: 1400)\n'\
$' --format response_format (text|srt|vtt|json; default: text)\n'\
$'\nNotes:\n'\
$' * Requires: ffmpeg, curl, and $OPENAI_API_KEY set in env.\n'\
$' * Audio is copied to .m4a and split with FFmpeg segment muxer.\n'\
$' * Each chunk is sent to the API; outputs are appended in order.\n'
# Help
if [[ "$1" == "--help" || "$1" == "-h" || -z "$1" || -z "$2" ]]; then
print -r -- "$USAGE"
return 0
fi
# Args
local in="$1"
local out="$2"
shift 2
# Defaults
local model="gpt-4o-transcribe"
local chunk=1399
local format="text"
# Parse flags
while [[ $# -gt 0 ]]; do
case "$1" in
--model) model="$2"; shift 2;;
--chunk) chunk="$2"; shift 2;;
--format) format="$2"; shift 2;;
*) echo "Unknown option: $1" >&2; return 2;;
esac
done
# Checks
if [[ -z "$OPENAI_API_KEY" ]]; then
echo "ERROR: OPENAI_API_KEY is not set." >&2
return 2
fi
if [[ ! -f "$in" ]]; then
echo "ERROR: input file not found: $in" >&2
return 2
fi
# Ensure tools
command -v ffmpeg >/dev/null 2>&1 || { echo "ERROR: ffmpeg not found."; return 2; }
command -v curl >/dev/null 2>&1 || { echo "ERROR: curl not found."; return 2; }
# Prep
local tmpdir
tmpdir="$(mktemp -d -t oa_stt_XXXXXXXX)" || { echo "mktemp failed"; return 2; }
trap 'rm -rf "$tmpdir"' EXIT
local audio="$tmpdir/audio.m4a"
local pattern="$tmpdir/part_%04d.m4a"
# 1) Extract audio (try stream copy; if it fails, re-encode)
if ! ffmpeg -hide_banner -loglevel error -y -i "$in" -vn -acodec copy "$audio"; then
echo "Stream copy failed, re-encoding audio..."
if ! ffmpeg -hide_banner -loglevel error -y -i "$in" -vn -ac 2 -ar 48000 -c:a aac -b:a 192k "$audio"; then
echo "ERROR: audio extraction failed." >&2
return 2
fi
fi
# 2) Segment audio to <= $chunk seconds
if ! ffmpeg -hide_banner -loglevel error -y -i "$audio" \
-f segment -segment_time "$chunk" -reset_timestamps 1 \
-map 0:a -c copy "$pattern"; then
echo "ERROR: segmentation failed." >&2
return 2
fi
# 3) Call OpenAI API on each chunk and append to output
: > "$out" || { echo "ERROR: cannot write to $out"; return 2; }
local idx=0 rc=0
for f in "$tmpdir"/part_*.m4a; do
[[ -e "$f" ]] || continue
idx=$((idx+1))
echo "Transcribing chunk #$idx: $(basename "$f")"
# Make request
local resp
resp=$(curl -sS https://api.openai.com/v1/audio/transcriptions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: multipart/form-data" \
-F "model=$model" \
-F "response_format=$format" \
-F "file=@${f}") || rc=$?
if [[ $rc -ne 0 || -z "$resp" ]]; then
echo "ERROR: request failed for chunk #$idx" >&2
return 2
fi
# Simple error check
if echo "$resp" | grep -q '"error"'; then
echo "ERROR from API on chunk #$idx:" >&2
echo "$resp" >&2
return 2
fi
# Append with a separator for readability (text-like formats)
if [[ "$format" == "text" || "$format" == "vtt" || "$format" == "srt" ]]; then
{
echo ""
echo "----- [chunk $idx] -----"
echo "$resp"
} >> "$out"
else
# json: append as-is plus newline
echo "$resp" >> "$out"
fi
done
echo "Done. Output saved to: $out"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment