Created
May 26, 2025 14:42
-
-
Save chenhan1218/58c3ccae1e06ddb12a909ae9d05e3477 to your computer and use it in GitHub Desktop.
whisper.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from faster_whisper import WhisperModel | |
from pathlib import Path | |
# import opencc | |
import argparse | |
# python3 .\whisper.py '.\audio.mp3' --language zh --keywords 這是說正體中文的節目 --model turbo | |
# parse command line arguments | |
parser = argparse.ArgumentParser( | |
description="Transcribe an audio file using Whisper and save as SRT/VTT." | |
) | |
parser.add_argument("filename", type=str, help="Path to the audio file") | |
parser.add_argument("--model", type=str, help="Model to use", default="large-v3") | |
parser.add_argument( | |
"--language", type=str, help="Language of the audio file", default="zh" | |
) | |
parser.add_argument( | |
"--keywords", type=str, help="Initial prompt for the model", default="" | |
) | |
parser.add_argument( | |
"--offset", type=int, help="Offset for the timecode (ms)", default=0 | |
) | |
args = parser.parse_args() | |
print(args) | |
# Load Whisper model | |
model = WhisperModel(args.model, device="cpu", compute_type="int8") | |
filename = args.filename | |
# Transcribe the audio file | |
initial_prompt = ( | |
f"{args.keywords}" | |
) | |
segments, info = model.transcribe( | |
filename, | |
language=args.language, | |
initial_prompt=initial_prompt, | |
word_timestamps=True, | |
vad_filter=True, | |
vad_parameters={"min_silence_duration_ms": 500, "max_speech_duration_s": 10}, | |
) | |
# Function to format time in SRT format with more precise milliseconds | |
def format_timestamp(seconds): | |
seconds += args.offset / 1000 | |
milliseconds = int(seconds * 1000) % 1000 | |
seconds = int(seconds) | |
minutes, seconds = divmod(seconds, 60) | |
hours, minutes = divmod(minutes, 60) | |
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}" | |
# Save VTT | |
file_path = Path(filename + ".vtt") | |
with open(file_path, "w", encoding="utf-8") as f: | |
print("WEBVTT\n", file=f) | |
for segment in segments: | |
start = format_timestamp(segment.start) | |
end = format_timestamp(segment.end) | |
text = segment.text.strip().replace("-->", "->") | |
print(f"{start} --> {end}\n{text}\n", file=f, flush=True) | |
print(f"{start} --> {end}\n{text}\n", flush=True) | |
print(f"VTT file saved to {file_path}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment