mbutler · June 3, 2025 19:28
diff --git a/transcribe-audio-whisper.py b/transcribe-audio-whisper.py
 import os
 import math
 import subprocess
 import openai
 import pysrt
 from datetime import timedelta

 API_KEY = "api-key"
 INPUT_DIR = "input_media"
 CHUNK_DIR = "audio_chunks"
 OUTPUT_DIR = "output"
 CHUNK_DURATION_SEC = 300

 client = openai.OpenAI(api_key=API_KEY)
 os.makedirs(CHUNK_DIR, exist_ok=True)
 os.makedirs(OUTPUT_DIR, exist_ok=True)

 def list_media_files(directory):
    return [
        os.path.join(directory, f)
        for f in os.listdir(directory)
        if f.lower().endswith(('.mp4', '.mp3'))
    ]

 def get_duration_seconds(input_path):
    cmd = [
        "ffprobe", "-v", "error",
        "-show_entries", "format=duration",
        "-of", "default=noprint_wrappers=1:nokey=1",
        input_path
    ]
    return float(subprocess.check_output(cmd).decode().strip())

 def split_media(input_path, output_dir, chunk_duration):
    total_duration = get_duration_seconds(input_path)
    total_chunks = math.ceil(total_duration / chunk_duration)
    base = os.path.splitext(os.path.basename(input_path))[0]
    chunk_paths = []

    for i in range(total_chunks):
        start_time = i * chunk_duration
        output_file = os.path.join(output_dir, f"{base}_chunk_{i:03}.mp3")
        cmd = [
            "ffmpeg", "-y", "-i", input_path, "-ss", str(start_time),
            "-t", str(chunk_duration),
            "-vn", "-ar", "16000", "-ac", "1", "-b:a", "64k",
            output_file
        ]
        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        chunk_paths.append(output_file)

    return chunk_paths

 def transcribe_chunk(path):
    with open(path, "rb") as f:
        return client.audio.transcriptions.create(
            model="whisper-1",
            file=f,
            response_format="srt"
        )

 def shift_srt(srt_data, shift_seconds):
    subs = pysrt.from_string(srt_data)
    for sub in subs:
        sub.shift(seconds=shift_seconds)
    return subs

 def process_file(filepath):
    base_name = os.path.splitext(os.path.basename(filepath))[0]
    print(f"Processing: {base_name}")

    chunk_paths = split_media(filepath, CHUNK_DIR, CHUNK_DURATION_SEC)
    all_subs = pysrt.SubRipFile()

    for i, chunk_path in enumerate(chunk_paths):
        print(f"  Transcribing chunk {i + 1}/{len(chunk_paths)}")
        try:
            srt_text = transcribe_chunk(chunk_path)
            shifted = shift_srt(srt_text, i * CHUNK_DURATION_SEC)
            all_subs.extend(shifted)
        except Exception as e:
            print(f"  Error on chunk {i}: {e}")

    final_srt_path = os.path.join(OUTPUT_DIR, base_name + ".srt")
    all_subs.save(final_srt_path, encoding="utf-8")
    print(f"Saved: {final_srt_path}\n")

 def main():
    media_files = list_media_files(INPUT_DIR)
    print(f"Found {len(media_files)} media files to process\n")
    for f in media_files:
        process_file(f)

 if __name__ == "__main__":
    main()
	import os
	import math
	import subprocess
	import openai
	import pysrt
	from datetime import timedelta

	API_KEY = "api-key"
	INPUT_DIR = "input_media"
	CHUNK_DIR = "audio_chunks"
	OUTPUT_DIR = "output"
	CHUNK_DURATION_SEC = 300

	client = openai.OpenAI(api_key=API_KEY)
	os.makedirs(CHUNK_DIR, exist_ok=True)
	os.makedirs(OUTPUT_DIR, exist_ok=True)

	def list_media_files(directory):
	return [
	os.path.join(directory, f)
	for f in os.listdir(directory)
	if f.lower().endswith(('.mp4', '.mp3'))
	]

	def get_duration_seconds(input_path):
	cmd = [
	"ffprobe", "-v", "error",
	"-show_entries", "format=duration",
	"-of", "default=noprint_wrappers=1:nokey=1",
	input_path
	]
	return float(subprocess.check_output(cmd).decode().strip())

	def split_media(input_path, output_dir, chunk_duration):
	total_duration = get_duration_seconds(input_path)
	total_chunks = math.ceil(total_duration / chunk_duration)
	base = os.path.splitext(os.path.basename(input_path))[0]
	chunk_paths = []

	for i in range(total_chunks):
	start_time = i * chunk_duration
	output_file = os.path.join(output_dir, f"{base}_chunk_{i:03}.mp3")
	cmd = [
	"ffmpeg", "-y", "-i", input_path, "-ss", str(start_time),
	"-t", str(chunk_duration),
	"-vn", "-ar", "16000", "-ac", "1", "-b:a", "64k",
	output_file
	]
	subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	chunk_paths.append(output_file)

	return chunk_paths

	def transcribe_chunk(path):
	with open(path, "rb") as f:
	return client.audio.transcriptions.create(
	model="whisper-1",
	file=f,
	response_format="srt"
	)

	def shift_srt(srt_data, shift_seconds):
	subs = pysrt.from_string(srt_data)
	for sub in subs:
	sub.shift(seconds=shift_seconds)
	return subs

	def process_file(filepath):
	base_name = os.path.splitext(os.path.basename(filepath))[0]
	print(f"Processing: {base_name}")

	chunk_paths = split_media(filepath, CHUNK_DIR, CHUNK_DURATION_SEC)
	all_subs = pysrt.SubRipFile()

	for i, chunk_path in enumerate(chunk_paths):
	print(f" Transcribing chunk {i + 1}/{len(chunk_paths)}")
	try:
	srt_text = transcribe_chunk(chunk_path)
	shifted = shift_srt(srt_text, i * CHUNK_DURATION_SEC)
	all_subs.extend(shifted)
	except Exception as e:
	print(f" Error on chunk {i}: {e}")

	final_srt_path = os.path.join(OUTPUT_DIR, base_name + ".srt")
	all_subs.save(final_srt_path, encoding="utf-8")
	print(f"Saved: {final_srt_path}\n")

	def main():
	media_files = list_media_files(INPUT_DIR)
	print(f"Found {len(media_files)} media files to process\n")
	for f in media_files:
	process_file(f)

	if __name__ == "__main__":
	main()