Skip to content

Instantly share code, notes, and snippets.

@sebington
Last active February 1, 2025 01:36
Show Gist options
  • Save sebington/e9f98e7a7e20478817fa2be08ab4deb4 to your computer and use it in GitHub Desktop.
Save sebington/e9f98e7a7e20478817fa2be08ab4deb4 to your computer and use it in GitHub Desktop.
Transcribe an audio/video file with Groq Whisper
import os
import math
import tkinter as tk
from tkinter import filedialog, messagebox
from typing import Dict, Optional
from groq import Groq
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('transcription.log'),
logging.StreamHandler()
]
)
class TranscriptionError(Exception):
"""Custom exception for transcription-related errors."""
pass
class AudioTranscriber:
# Available models and their max file sizes in MB
MODELS = {
"whisper-large-v3-turbo": {"max_size": 25, "description": "Fast, optimized for speed"},
"distil-whisper-large-v3-en": {"max_size": 25, "description": "English-optimized, smaller model"},
"whisper-large-v3": {"max_size": 25, "description": "Most accurate, supports multiple languages"}
}
def __init__(self):
self.api_key = self._get_api_key()
self.client = self._initialize_client()
def _get_api_key(self) -> str:
"""Retrieve and validate the Groq API key."""
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
raise TranscriptionError("GROQ_API_KEY environment variable not found")
return api_key
def _initialize_client(self) -> Groq:
"""Initialize the Groq client."""
try:
return Groq(api_key=self.api_key)
except Exception as e:
raise TranscriptionError(f"Failed to initialize Groq client: {str(e)}")
def _select_model(self) -> str:
"""Display model selection dialog and return chosen model."""
root = tk.Tk()
root.title("Select Transcription Model")
selected_model = tk.StringVar()
def on_select():
root.quit()
tk.Label(root, text="Choose a transcription model:").pack(pady=10)
for model, info in self.MODELS.items():
tk.Radiobutton(
root,
text=f"{model}\n{info['description']}",
variable=selected_model,
value=model
).pack(anchor=tk.W, padx=20)
selected_model.set("whisper-large-v3") # Default selection
tk.Button(root, text="Select", command=on_select).pack(pady=20)
root.mainloop()
chosen_model = selected_model.get()
root.destroy()
return chosen_model
def _select_file(self) -> Optional[str]:
"""Display file selection dialog and return chosen file path."""
root = tk.Tk()
root.withdraw()
filename = filedialog.askopenfilename(
title="Select an audio file (25 MB max)",
filetypes=[("Supported File Types", "*.mp3 *.mp4 *.mpeg *.mpga *.m4a *.wav *.webm")]
)
return filename
def _validate_file(self, filename: str, model: str) -> None:
"""Validate the selected file."""
if not os.path.exists(filename):
raise TranscriptionError(f"File not found: {filename}")
file_size_mb = os.path.getsize(filename) / (1024 * 1024)
max_size = self.MODELS[model]["max_size"]
if file_size_mb > max_size:
raise TranscriptionError(
f"File size ({file_size_mb:.1f} MB) exceeds the maximum allowed size ({max_size} MB)"
)
@staticmethod
def _convert_seconds_to_hms(seconds: float) -> str:
"""Convert seconds to HH:MM:SS,mmm format."""
hours, remainder = divmod(seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = math.floor((seconds % 1) * 1000)
return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}"
def _create_srt(self, segments: list, output_file: str) -> None:
"""Create SRT file from transcription segments."""
try:
with open(output_file, 'w', encoding='utf-8') as f:
for i, segment in enumerate(segments, 1):
duration = (
f"{self._convert_seconds_to_hms(segment['start'])} --> "
f"{self._convert_seconds_to_hms(segment['end'])}\n"
)
text = f"{segment['text'].lstrip()}\n\n"
f.write(f"{i}\n{duration}{text}")
except IOError as e:
raise TranscriptionError(f"Failed to write SRT file: {str(e)}")
def transcribe(self) -> None:
"""Main transcription process."""
try:
# Select model and file
model = self._select_model()
filename = self._select_file()
if not filename:
logging.info("No file selected. Exiting...")
return
# Validate file
self._validate_file(filename, model)
# Process transcription
logging.info(f"Starting transcription of {filename} using {model}")
try:
with open(filename, "rb") as file:
transcription = self.client.audio.transcriptions.create(
file=(filename, file.read()),
model=model,
response_format="verbose_json"
)
except Exception as e:
raise TranscriptionError(f"API request failed: {str(e)}")
# Create SRT file
output_file = os.path.splitext(filename)[0] + ".srt"
self._create_srt(transcription.segments, output_file)
logging.info(f"Successfully created SRT file: {output_file}")
messagebox.showinfo("Success", f"SRT file saved successfully: {output_file}")
except TranscriptionError as e:
logging.error(f"Transcription error: {str(e)}")
messagebox.showerror("Error", str(e))
except Exception as e:
logging.error(f"Unexpected error: {str(e)}")
messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")
def main():
"""Entry point of the script."""
try:
transcriber = AudioTranscriber()
transcriber.transcribe()
except Exception as e:
logging.error(f"Failed to initialize transcriber: {str(e)}")
messagebox.showerror("Error", f"Failed to initialize transcriber: {str(e)}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment