chenhan1218 · May 26, 2025 14:42
diff --git a/whisper.py b/whisper.py
 from faster_whisper import WhisperModel
 from pathlib import Path
 # import opencc
 import argparse

 # python3 .\whisper.py '.\audio.mp3' --language zh --keywords 這是說正體中文的節目 --model turbo

 # parse command line arguments
 parser = argparse.ArgumentParser(
    description="Transcribe an audio file using Whisper and save as SRT/VTT."
 )
 parser.add_argument("filename", type=str, help="Path to the audio file")
 parser.add_argument("--model", type=str, help="Model to use", default="large-v3")
 parser.add_argument(
    "--language", type=str, help="Language of the audio file", default="zh"
 )
 parser.add_argument(
    "--keywords", type=str, help="Initial prompt for the model", default=""
 )
 parser.add_argument(
    "--offset", type=int, help="Offset for the timecode (ms)", default=0
 )
 args = parser.parse_args()
 print(args)

 # Load Whisper model
 model = WhisperModel(args.model, device="cpu", compute_type="int8")
 filename = args.filename

 # Transcribe the audio file
 initial_prompt = (
    f"{args.keywords}"
 )

 segments, info = model.transcribe(
    filename,
    language=args.language,
    initial_prompt=initial_prompt,
    word_timestamps=True,
    vad_filter=True,
    vad_parameters={"min_silence_duration_ms": 500, "max_speech_duration_s": 10},
 )


 # Function to format time in SRT format with more precise milliseconds
 def format_timestamp(seconds):
    seconds += args.offset / 1000
    milliseconds = int(seconds * 1000) % 1000
    seconds = int(seconds)
    minutes, seconds = divmod(seconds, 60)
    hours, minutes = divmod(minutes, 60)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"

 # Save VTT
 file_path = Path(filename + ".vtt")
 with open(file_path, "w", encoding="utf-8") as f:

    print("WEBVTT\n", file=f)
    for segment in segments:
        start = format_timestamp(segment.start)
        end = format_timestamp(segment.end)
        text = segment.text.strip().replace("-->", "->")
        print(f"{start} --> {end}\n{text}\n", file=f, flush=True)
        print(f"{start} --> {end}\n{text}\n", flush=True)

 print(f"VTT file saved to {file_path}")
	from faster_whisper import WhisperModel
	from pathlib import Path
	# import opencc
	import argparse

	# python3 .\whisper.py '.\audio.mp3' --language zh --keywords 這是說正體中文的節目 --model turbo

	# parse command line arguments
	parser = argparse.ArgumentParser(
	description="Transcribe an audio file using Whisper and save as SRT/VTT."
	)
	parser.add_argument("filename", type=str, help="Path to the audio file")
	parser.add_argument("--model", type=str, help="Model to use", default="large-v3")
	parser.add_argument(
	"--language", type=str, help="Language of the audio file", default="zh"
	)
	parser.add_argument(
	"--keywords", type=str, help="Initial prompt for the model", default=""
	)
	parser.add_argument(
	"--offset", type=int, help="Offset for the timecode (ms)", default=0
	)
	args = parser.parse_args()
	print(args)

	# Load Whisper model
	model = WhisperModel(args.model, device="cpu", compute_type="int8")
	filename = args.filename

	# Transcribe the audio file
	initial_prompt = (
	f"{args.keywords}"
	)

	segments, info = model.transcribe(
	filename,
	language=args.language,
	initial_prompt=initial_prompt,
	word_timestamps=True,
	vad_filter=True,
	vad_parameters={"min_silence_duration_ms": 500, "max_speech_duration_s": 10},
	)


	# Function to format time in SRT format with more precise milliseconds
	def format_timestamp(seconds):
	seconds += args.offset / 1000
	milliseconds = int(seconds * 1000) % 1000
	seconds = int(seconds)
	minutes, seconds = divmod(seconds, 60)
	hours, minutes = divmod(minutes, 60)
	return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"

	# Save VTT
	file_path = Path(filename + ".vtt")
	with open(file_path, "w", encoding="utf-8") as f:

	print("WEBVTT\n", file=f)
	for segment in segments:
	start = format_timestamp(segment.start)
	end = format_timestamp(segment.end)
	text = segment.text.strip().replace("-->", "->")
	print(f"{start} --> {end}\n{text}\n", file=f, flush=True)
	print(f"{start} --> {end}\n{text}\n", flush=True)

	print(f"VTT file saved to {file_path}")