Skip to content

Instantly share code, notes, and snippets.

@chenhan1218
Created May 26, 2025 14:42
Show Gist options
  • Save chenhan1218/58c3ccae1e06ddb12a909ae9d05e3477 to your computer and use it in GitHub Desktop.
Save chenhan1218/58c3ccae1e06ddb12a909ae9d05e3477 to your computer and use it in GitHub Desktop.
whisper.py
from faster_whisper import WhisperModel
from pathlib import Path
# import opencc
import argparse
# python3 .\whisper.py '.\audio.mp3' --language zh --keywords 這是說正體中文的節目 --model turbo
# parse command line arguments
parser = argparse.ArgumentParser(
description="Transcribe an audio file using Whisper and save as SRT/VTT."
)
parser.add_argument("filename", type=str, help="Path to the audio file")
parser.add_argument("--model", type=str, help="Model to use", default="large-v3")
parser.add_argument(
"--language", type=str, help="Language of the audio file", default="zh"
)
parser.add_argument(
"--keywords", type=str, help="Initial prompt for the model", default=""
)
parser.add_argument(
"--offset", type=int, help="Offset for the timecode (ms)", default=0
)
args = parser.parse_args()
print(args)
# Load Whisper model
model = WhisperModel(args.model, device="cpu", compute_type="int8")
filename = args.filename
# Transcribe the audio file
initial_prompt = (
f"{args.keywords}"
)
segments, info = model.transcribe(
filename,
language=args.language,
initial_prompt=initial_prompt,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500, "max_speech_duration_s": 10},
)
# Function to format time in SRT format with more precise milliseconds
def format_timestamp(seconds):
seconds += args.offset / 1000
milliseconds = int(seconds * 1000) % 1000
seconds = int(seconds)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
# Save VTT
file_path = Path(filename + ".vtt")
with open(file_path, "w", encoding="utf-8") as f:
print("WEBVTT\n", file=f)
for segment in segments:
start = format_timestamp(segment.start)
end = format_timestamp(segment.end)
text = segment.text.strip().replace("-->", "->")
print(f"{start} --> {end}\n{text}\n", file=f, flush=True)
print(f"{start} --> {end}\n{text}\n", flush=True)
print(f"VTT file saved to {file_path}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment