Skip to content

Instantly share code, notes, and snippets.

@abdusco
Last active July 31, 2025 11:37
Show Gist options
  • Save abdusco/f79fdd7ddcf8e0d3f623a15fcf9495d3 to your computer and use it in GitHub Desktop.
Save abdusco/f79fdd7ddcf8e0d3f623a15fcf9495d3 to your computer and use it in GitHub Desktop.
Generate SRT subtitles for a video / audio using OpenAI's Whisper model
#!/usr/bin/env -S uv run --script
# /// script
# dependencies = ["httpx", "srt"]
# ///
"""
# Whisper Subtitles
This script generates SRT subtitles for audio or video files using the OpenAI Whisper API.
## Features
- Extracts audio from video files.
- Speeds up the audio before transcription to reduce token usage and cost (transcription still works reliably).
- Stretches subtitle timings back to match the original speed.
- Saves the result as SRT subtitle files.
## Usage
- Make sure ffmpeg and [uv is installed](https://docs.astral.sh/uv/getting-started/installation/).
- Set your OpenAI API key in the environment variable `OPENAI_API_KEY`.
```bash
chmod +x whisper_subtitles.py
OPENAI_API_KEY=sk-123 ./whisper_subtitles.py input_file.mp4 -o output.srt
```
"""
import argparse
import logging
import os
import subprocess
import tempfile
from pathlib import Path
import httpx
import srt
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
logger = logging.getLogger(__name__)
def check_ffmpeg_installed():
try:
subprocess.run(["ffmpeg", "-version"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except FileNotFoundError:
raise RuntimeError("ffmpeg is not installed. Please install it to use this script.")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Generate SRT subtitles using OpenAI Whisper API.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("input", type=Path, help="Input audio or video file")
parser.add_argument("-o", "--output", type=Path, required=True, help="Output SRT file path")
parser.add_argument(
"--openai-api-key",
type=str,
default=os.getenv("OPENAI_API_KEY"),
required=True,
help="OpenAI API key (default: from OPENAI_API_KEY env var)",
)
def float_between(min: float, max: float) -> callable:
def parser(value: str) -> float:
fvalue = float(value)
if not (min <= fvalue <= max):
raise argparse.ArgumentTypeError(f"Value must be between {min} and {max}")
return value
return parser
parser.add_argument(
"--speed",
type=float_between(1, 10),
default=2.5,
help="Audio speed-up factor for transcription to reduce token usage at the cost of accuracy",
)
return parser.parse_args()
def extract_audio(input_path: Path, save_path: Path, speed: float) -> Path:
cmd = [
"ffmpeg",
"-y",
"-i",
str(input_path),
"-vn",
"-acodec",
"aac",
"-ar",
"16000",
"-ac",
"1",
"-b:a",
"32k",
"-filter:a",
f"atempo={speed}",
str(save_path),
]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return save_path
def stretch_srt(srt_text: str, speed: float) -> str:
subs = list(srt.parse(srt_text))
for sub in subs:
sub.start = sub.start * speed
sub.end = sub.end * speed
return srt.compose(subs)
def read_audio_duration(audio_path: Path) -> float:
cmd = [
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
str(audio_path),
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return float(result.stdout.strip())
def transcribe_as_srt(audio_path: Path, api_key: str) -> str:
client = httpx.Client(
base_url="https://api.openai.com/v1/",
headers={"Authorization": f"Bearer {api_key}"},
timeout=120,
)
with audio_path.open("rb") as f:
res = client.post(
"/audio/transcriptions",
data={"model": "whisper-1", "response_format": "srt"},
files={"file": f},
)
if res.is_error:
logger.error(f"Error during transcription: {res.text}")
res.raise_for_status()
if ms := res.headers.get("openai-processing-ms"):
logger.info(f"Transcription done in {ms} ms")
try:
price_per_min = 0.006
duration = read_audio_duration(audio_path)
cost = (duration / 60) * price_per_min
logger.info(f"Transcription cost: ${cost:.4f} for {duration:.2f} seconds")
except Exception:
pass
return res.text
def main() -> None:
args = parse_args()
check_ffmpeg_installed()
save_path: Path = args.output.resolve().with_name(f"{args.input.stem}.srt")
logger.info(f"Extracting audio from {args.input}")
with tempfile.TemporaryDirectory() as tmpdir:
audio_path = Path(tmpdir) / "audio.m4a"
extract_audio(input_path=args.input, save_path=audio_path, speed=args.speed)
try:
logger.info(f"Transcribing {audio_path}")
srt_text = transcribe_as_srt(audio_path=audio_path, api_key=args.api_key)
save_path.with_name(f"{args.input.stem}.txt").write_text(srt_text)
except Exception as e:
logger.error(f"Error during transcription: {e}")
raise SystemExit(1)
logger.debug("Stretching SRT timings to 1x")
srt_stretched = stretch_srt(srt_text=srt_text, speed=args.speed)
save_path.write_text(srt_stretched)
logger.info(f"SRT written to {save_path}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment