Created
September 19, 2025 18:16
-
-
Save akazakou/d74b74dbf50dd3b714ec3835112e8ae0 to your computer and use it in GitHub Desktop.
There is sample for the using OpenAI from console to transcript Speech To Text. Required OpenAI key, ffmpeg and curl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Transcribe long videos via OpenAI Audio API in 1400s chunks | |
openai_transcribe() { | |
local USAGE | |
USAGE=$'%F{cyan}Usage:%f openai_transcribe <input_video> <output_text> [--model MODEL] [--chunk SECONDS] [--format text|srt|vtt|json]\n'\ | |
$'%F{cyan}Example:%f openai_transcribe "2025-09-19 11-04-11.mkv" meeting.txt --chunk 1400 --model gpt-4o-transcribe\n'\ | |
$'\nOptions:\n'\ | |
$' --model OpenAI model (default: gpt-4o-transcribe)\n'\ | |
$' --chunk Chunk length in seconds (default: 1400)\n'\ | |
$' --format response_format (text|srt|vtt|json; default: text)\n'\ | |
$'\nNotes:\n'\ | |
$' * Requires: ffmpeg, curl, and $OPENAI_API_KEY set in env.\n'\ | |
$' * Audio is copied to .m4a and split with FFmpeg segment muxer.\n'\ | |
$' * Each chunk is sent to the API; outputs are appended in order.\n' | |
# Help | |
if [[ "$1" == "--help" || "$1" == "-h" || -z "$1" || -z "$2" ]]; then | |
print -r -- "$USAGE" | |
return 0 | |
fi | |
# Args | |
local in="$1" | |
local out="$2" | |
shift 2 | |
# Defaults | |
local model="gpt-4o-transcribe" | |
local chunk=1399 | |
local format="text" | |
# Parse flags | |
while [[ $# -gt 0 ]]; do | |
case "$1" in | |
--model) model="$2"; shift 2;; | |
--chunk) chunk="$2"; shift 2;; | |
--format) format="$2"; shift 2;; | |
*) echo "Unknown option: $1" >&2; return 2;; | |
esac | |
done | |
# Checks | |
if [[ -z "$OPENAI_API_KEY" ]]; then | |
echo "ERROR: OPENAI_API_KEY is not set." >&2 | |
return 2 | |
fi | |
if [[ ! -f "$in" ]]; then | |
echo "ERROR: input file not found: $in" >&2 | |
return 2 | |
fi | |
# Ensure tools | |
command -v ffmpeg >/dev/null 2>&1 || { echo "ERROR: ffmpeg not found."; return 2; } | |
command -v curl >/dev/null 2>&1 || { echo "ERROR: curl not found."; return 2; } | |
# Prep | |
local tmpdir | |
tmpdir="$(mktemp -d -t oa_stt_XXXXXXXX)" || { echo "mktemp failed"; return 2; } | |
trap 'rm -rf "$tmpdir"' EXIT | |
local audio="$tmpdir/audio.m4a" | |
local pattern="$tmpdir/part_%04d.m4a" | |
# 1) Extract audio (try stream copy; if it fails, re-encode) | |
if ! ffmpeg -hide_banner -loglevel error -y -i "$in" -vn -acodec copy "$audio"; then | |
echo "Stream copy failed, re-encoding audio..." | |
if ! ffmpeg -hide_banner -loglevel error -y -i "$in" -vn -ac 2 -ar 48000 -c:a aac -b:a 192k "$audio"; then | |
echo "ERROR: audio extraction failed." >&2 | |
return 2 | |
fi | |
fi | |
# 2) Segment audio to <= $chunk seconds | |
if ! ffmpeg -hide_banner -loglevel error -y -i "$audio" \ | |
-f segment -segment_time "$chunk" -reset_timestamps 1 \ | |
-map 0:a -c copy "$pattern"; then | |
echo "ERROR: segmentation failed." >&2 | |
return 2 | |
fi | |
# 3) Call OpenAI API on each chunk and append to output | |
: > "$out" || { echo "ERROR: cannot write to $out"; return 2; } | |
local idx=0 rc=0 | |
for f in "$tmpdir"/part_*.m4a; do | |
[[ -e "$f" ]] || continue | |
idx=$((idx+1)) | |
echo "Transcribing chunk #$idx: $(basename "$f")" | |
# Make request | |
local resp | |
resp=$(curl -sS https://api.openai.com/v1/audio/transcriptions \ | |
-H "Authorization: Bearer $OPENAI_API_KEY" \ | |
-H "Content-Type: multipart/form-data" \ | |
-F "model=$model" \ | |
-F "response_format=$format" \ | |
-F "file=@${f}") || rc=$? | |
if [[ $rc -ne 0 || -z "$resp" ]]; then | |
echo "ERROR: request failed for chunk #$idx" >&2 | |
return 2 | |
fi | |
# Simple error check | |
if echo "$resp" | grep -q '"error"'; then | |
echo "ERROR from API on chunk #$idx:" >&2 | |
echo "$resp" >&2 | |
return 2 | |
fi | |
# Append with a separator for readability (text-like formats) | |
if [[ "$format" == "text" || "$format" == "vtt" || "$format" == "srt" ]]; then | |
{ | |
echo "" | |
echo "----- [chunk $idx] -----" | |
echo "$resp" | |
} >> "$out" | |
else | |
# json: append as-is plus newline | |
echo "$resp" >> "$out" | |
fi | |
done | |
echo "Done. Output saved to: $out" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment