Last active
February 1, 2025 01:36
-
-
Save sebington/e9f98e7a7e20478817fa2be08ab4deb4 to your computer and use it in GitHub Desktop.
Transcribe an audio/video file with Groq Whisper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import math | |
import tkinter as tk | |
from tkinter import filedialog, messagebox | |
from typing import Dict, Optional | |
from groq import Groq | |
import logging | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.FileHandler('transcription.log'), | |
logging.StreamHandler() | |
] | |
) | |
class TranscriptionError(Exception): | |
"""Custom exception for transcription-related errors.""" | |
pass | |
class AudioTranscriber: | |
# Available models and their max file sizes in MB | |
MODELS = { | |
"whisper-large-v3-turbo": {"max_size": 25, "description": "Fast, optimized for speed"}, | |
"distil-whisper-large-v3-en": {"max_size": 25, "description": "English-optimized, smaller model"}, | |
"whisper-large-v3": {"max_size": 25, "description": "Most accurate, supports multiple languages"} | |
} | |
def __init__(self): | |
self.api_key = self._get_api_key() | |
self.client = self._initialize_client() | |
def _get_api_key(self) -> str: | |
"""Retrieve and validate the Groq API key.""" | |
api_key = os.getenv("GROQ_API_KEY") | |
if not api_key: | |
raise TranscriptionError("GROQ_API_KEY environment variable not found") | |
return api_key | |
def _initialize_client(self) -> Groq: | |
"""Initialize the Groq client.""" | |
try: | |
return Groq(api_key=self.api_key) | |
except Exception as e: | |
raise TranscriptionError(f"Failed to initialize Groq client: {str(e)}") | |
def _select_model(self) -> str: | |
"""Display model selection dialog and return chosen model.""" | |
root = tk.Tk() | |
root.title("Select Transcription Model") | |
selected_model = tk.StringVar() | |
def on_select(): | |
root.quit() | |
tk.Label(root, text="Choose a transcription model:").pack(pady=10) | |
for model, info in self.MODELS.items(): | |
tk.Radiobutton( | |
root, | |
text=f"{model}\n{info['description']}", | |
variable=selected_model, | |
value=model | |
).pack(anchor=tk.W, padx=20) | |
selected_model.set("whisper-large-v3") # Default selection | |
tk.Button(root, text="Select", command=on_select).pack(pady=20) | |
root.mainloop() | |
chosen_model = selected_model.get() | |
root.destroy() | |
return chosen_model | |
def _select_file(self) -> Optional[str]: | |
"""Display file selection dialog and return chosen file path.""" | |
root = tk.Tk() | |
root.withdraw() | |
filename = filedialog.askopenfilename( | |
title="Select an audio file (25 MB max)", | |
filetypes=[("Supported File Types", "*.mp3 *.mp4 *.mpeg *.mpga *.m4a *.wav *.webm")] | |
) | |
return filename | |
def _validate_file(self, filename: str, model: str) -> None: | |
"""Validate the selected file.""" | |
if not os.path.exists(filename): | |
raise TranscriptionError(f"File not found: {filename}") | |
file_size_mb = os.path.getsize(filename) / (1024 * 1024) | |
max_size = self.MODELS[model]["max_size"] | |
if file_size_mb > max_size: | |
raise TranscriptionError( | |
f"File size ({file_size_mb:.1f} MB) exceeds the maximum allowed size ({max_size} MB)" | |
) | |
@staticmethod | |
def _convert_seconds_to_hms(seconds: float) -> str: | |
"""Convert seconds to HH:MM:SS,mmm format.""" | |
hours, remainder = divmod(seconds, 3600) | |
minutes, seconds = divmod(remainder, 60) | |
milliseconds = math.floor((seconds % 1) * 1000) | |
return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}" | |
def _create_srt(self, segments: list, output_file: str) -> None: | |
"""Create SRT file from transcription segments.""" | |
try: | |
with open(output_file, 'w', encoding='utf-8') as f: | |
for i, segment in enumerate(segments, 1): | |
duration = ( | |
f"{self._convert_seconds_to_hms(segment['start'])} --> " | |
f"{self._convert_seconds_to_hms(segment['end'])}\n" | |
) | |
text = f"{segment['text'].lstrip()}\n\n" | |
f.write(f"{i}\n{duration}{text}") | |
except IOError as e: | |
raise TranscriptionError(f"Failed to write SRT file: {str(e)}") | |
def transcribe(self) -> None: | |
"""Main transcription process.""" | |
try: | |
# Select model and file | |
model = self._select_model() | |
filename = self._select_file() | |
if not filename: | |
logging.info("No file selected. Exiting...") | |
return | |
# Validate file | |
self._validate_file(filename, model) | |
# Process transcription | |
logging.info(f"Starting transcription of {filename} using {model}") | |
try: | |
with open(filename, "rb") as file: | |
transcription = self.client.audio.transcriptions.create( | |
file=(filename, file.read()), | |
model=model, | |
response_format="verbose_json" | |
) | |
except Exception as e: | |
raise TranscriptionError(f"API request failed: {str(e)}") | |
# Create SRT file | |
output_file = os.path.splitext(filename)[0] + ".srt" | |
self._create_srt(transcription.segments, output_file) | |
logging.info(f"Successfully created SRT file: {output_file}") | |
messagebox.showinfo("Success", f"SRT file saved successfully: {output_file}") | |
except TranscriptionError as e: | |
logging.error(f"Transcription error: {str(e)}") | |
messagebox.showerror("Error", str(e)) | |
except Exception as e: | |
logging.error(f"Unexpected error: {str(e)}") | |
messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}") | |
def main(): | |
"""Entry point of the script.""" | |
try: | |
transcriber = AudioTranscriber() | |
transcriber.transcribe() | |
except Exception as e: | |
logging.error(f"Failed to initialize transcriber: {str(e)}") | |
messagebox.showerror("Error", f"Failed to initialize transcriber: {str(e)}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment