sebington · February 1, 2025 01:36
diff --git a/whisper_groq2srt_claude.py b/whisper_groq2srt_claude.py
 import os
 import math
 import tkinter as tk
 from tkinter import filedialog, messagebox
 from typing import Dict, Optional
 from groq import Groq
 import logging

 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('transcription.log'),
        logging.StreamHandler()
    ]
 )

 class TranscriptionError(Exception):
    """Custom exception for transcription-related errors."""
    pass

 class AudioTranscriber:
    # Available models and their max file sizes in MB
    MODELS = {
        "whisper-large-v3-turbo": {"max_size": 25, "description": "Fast, optimized for speed"},
        "distil-whisper-large-v3-en": {"max_size": 25, "description": "English-optimized, smaller model"},
        "whisper-large-v3": {"max_size": 25, "description": "Most accurate, supports multiple languages"}
    }
    
    def __init__(self):
        self.api_key = self._get_api_key()
        self.client = self._initialize_client()
        
    def _get_api_key(self) -> str:
        """Retrieve and validate the Groq API key."""
        api_key = os.getenv("GROQ_API_KEY")
        if not api_key:
            raise TranscriptionError("GROQ_API_KEY environment variable not found")
        return api_key
    
    def _initialize_client(self) -> Groq:
        """Initialize the Groq client."""
        try:
            return Groq(api_key=self.api_key)
        except Exception as e:
            raise TranscriptionError(f"Failed to initialize Groq client: {str(e)}")
    
    def _select_model(self) -> str:
        """Display model selection dialog and return chosen model."""
        root = tk.Tk()
        root.title("Select Transcription Model")
        
        selected_model = tk.StringVar()
        
        def on_select():
            root.quit()
        
        tk.Label(root, text="Choose a transcription model:").pack(pady=10)
        
        for model, info in self.MODELS.items():
            tk.Radiobutton(
                root,
                text=f"{model}\n{info['description']}",
                variable=selected_model,
                value=model
            ).pack(anchor=tk.W, padx=20)
        
        selected_model.set("whisper-large-v3")  # Default selection
        
        tk.Button(root, text="Select", command=on_select).pack(pady=20)
        
        root.mainloop()
        chosen_model = selected_model.get()
        root.destroy()
        
        return chosen_model
    
    def _select_file(self) -> Optional[str]:
        """Display file selection dialog and return chosen file path."""
        root = tk.Tk()
        root.withdraw()
        
        filename = filedialog.askopenfilename(
            title="Select an audio file (25 MB max)",
            filetypes=[("Supported File Types", "*.mp3 *.mp4 *.mpeg *.mpga *.m4a *.wav *.webm")]
        )
        
        return filename
    
    def _validate_file(self, filename: str, model: str) -> None:
        """Validate the selected file."""
        if not os.path.exists(filename):
            raise TranscriptionError(f"File not found: {filename}")
        
        file_size_mb = os.path.getsize(filename) / (1024 * 1024)
        max_size = self.MODELS[model]["max_size"]
        
        if file_size_mb > max_size:
            raise TranscriptionError(
                f"File size ({file_size_mb:.1f} MB) exceeds the maximum allowed size ({max_size} MB)"
            )
    
    @staticmethod
    def _convert_seconds_to_hms(seconds: float) -> str:
        """Convert seconds to HH:MM:SS,mmm format."""
        hours, remainder = divmod(seconds, 3600)
        minutes, seconds = divmod(remainder, 60)
        milliseconds = math.floor((seconds % 1) * 1000)
        return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}"
    
    def _create_srt(self, segments: list, output_file: str) -> None:
        """Create SRT file from transcription segments."""
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                for i, segment in enumerate(segments, 1):
                    duration = (
                        f"{self._convert_seconds_to_hms(segment['start'])} --> "
                        f"{self._convert_seconds_to_hms(segment['end'])}\n"
                    )
                    text = f"{segment['text'].lstrip()}\n\n"
                    f.write(f"{i}\n{duration}{text}")
        except IOError as e:
            raise TranscriptionError(f"Failed to write SRT file: {str(e)}")
    
    def transcribe(self) -> None:
        """Main transcription process."""
        try:
            # Select model and file
            model = self._select_model()
            filename = self._select_file()
            
            if not filename:
                logging.info("No file selected. Exiting...")
                return
            
            # Validate file
            self._validate_file(filename, model)
            
            # Process transcription
            logging.info(f"Starting transcription of {filename} using {model}")
            
            try:
                with open(filename, "rb") as file:
                    transcription = self.client.audio.transcriptions.create(
                        file=(filename, file.read()),
                        model=model,
                        response_format="verbose_json"
                    )
            except Exception as e:
                raise TranscriptionError(f"API request failed: {str(e)}")
            
            # Create SRT file
            output_file = os.path.splitext(filename)[0] + ".srt"
            self._create_srt(transcription.segments, output_file)
            
            logging.info(f"Successfully created SRT file: {output_file}")
            messagebox.showinfo("Success", f"SRT file saved successfully: {output_file}")
            
        except TranscriptionError as e:
            logging.error(f"Transcription error: {str(e)}")
            messagebox.showerror("Error", str(e))
        except Exception as e:
            logging.error(f"Unexpected error: {str(e)}")
            messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")

 def main():
    """Entry point of the script."""
    try:
        transcriber = AudioTranscriber()
        transcriber.transcribe()
    except Exception as e:
        logging.error(f"Failed to initialize transcriber: {str(e)}")
        messagebox.showerror("Error", f"Failed to initialize transcriber: {str(e)}")

 if __name__ == "__main__":
    main()
	import os
	import math
	import tkinter as tk
	from tkinter import filedialog, messagebox
	from typing import Dict, Optional
	from groq import Groq
	import logging

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[
	logging.FileHandler('transcription.log'),
	logging.StreamHandler()
	]
	)

	class TranscriptionError(Exception):
	"""Custom exception for transcription-related errors."""
	pass

	class AudioTranscriber:
	# Available models and their max file sizes in MB
	MODELS = {
	"whisper-large-v3-turbo": {"max_size": 25, "description": "Fast, optimized for speed"},
	"distil-whisper-large-v3-en": {"max_size": 25, "description": "English-optimized, smaller model"},
	"whisper-large-v3": {"max_size": 25, "description": "Most accurate, supports multiple languages"}
	}

	def __init__(self):
	self.api_key = self._get_api_key()
	self.client = self._initialize_client()

	def _get_api_key(self) -> str:
	"""Retrieve and validate the Groq API key."""
	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	raise TranscriptionError("GROQ_API_KEY environment variable not found")
	return api_key

	def _initialize_client(self) -> Groq:
	"""Initialize the Groq client."""
	try:
	return Groq(api_key=self.api_key)
	except Exception as e:
	raise TranscriptionError(f"Failed to initialize Groq client: {str(e)}")

	def _select_model(self) -> str:
	"""Display model selection dialog and return chosen model."""
	root = tk.Tk()
	root.title("Select Transcription Model")

	selected_model = tk.StringVar()

	def on_select():
	root.quit()

	tk.Label(root, text="Choose a transcription model:").pack(pady=10)

	for model, info in self.MODELS.items():
	tk.Radiobutton(
	root,
	text=f"{model}\n{info['description']}",
	variable=selected_model,
	value=model
	).pack(anchor=tk.W, padx=20)

	selected_model.set("whisper-large-v3") # Default selection

	tk.Button(root, text="Select", command=on_select).pack(pady=20)

	root.mainloop()
	chosen_model = selected_model.get()
	root.destroy()

	return chosen_model

	def _select_file(self) -> Optional[str]:
	"""Display file selection dialog and return chosen file path."""
	root = tk.Tk()
	root.withdraw()

	filename = filedialog.askopenfilename(
	title="Select an audio file (25 MB max)",
	filetypes=[("Supported File Types", ".mp3 .mp4 .mpeg .mpga .m4a .wav *.webm")]
	)

	return filename

	def _validate_file(self, filename: str, model: str) -> None:
	"""Validate the selected file."""
	if not os.path.exists(filename):
	raise TranscriptionError(f"File not found: {filename}")

	file_size_mb = os.path.getsize(filename) / (1024 * 1024)
	max_size = self.MODELS[model]["max_size"]

	if file_size_mb > max_size:
	raise TranscriptionError(
	f"File size ({file_size_mb:.1f} MB) exceeds the maximum allowed size ({max_size} MB)"
	)

	@staticmethod
	def _convert_seconds_to_hms(seconds: float) -> str:
	"""Convert seconds to HH:MM:SS,mmm format."""
	hours, remainder = divmod(seconds, 3600)
	minutes, seconds = divmod(remainder, 60)
	milliseconds = math.floor((seconds % 1) * 1000)
	return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}"

	def _create_srt(self, segments: list, output_file: str) -> None:
	"""Create SRT file from transcription segments."""
	try:
	with open(output_file, 'w', encoding='utf-8') as f:
	for i, segment in enumerate(segments, 1):
	duration = (
	f"{self._convert_seconds_to_hms(segment['start'])} --> "
	f"{self._convert_seconds_to_hms(segment['end'])}\n"
	)
	text = f"{segment['text'].lstrip()}\n\n"
	f.write(f"{i}\n{duration}{text}")
	except IOError as e:
	raise TranscriptionError(f"Failed to write SRT file: {str(e)}")

	def transcribe(self) -> None:
	"""Main transcription process."""
	try:
	# Select model and file
	model = self._select_model()
	filename = self._select_file()

	if not filename:
	logging.info("No file selected. Exiting...")
	return

	# Validate file
	self._validate_file(filename, model)

	# Process transcription
	logging.info(f"Starting transcription of {filename} using {model}")

	try:
	with open(filename, "rb") as file:
	transcription = self.client.audio.transcriptions.create(
	file=(filename, file.read()),
	model=model,
	response_format="verbose_json"
	)
	except Exception as e:
	raise TranscriptionError(f"API request failed: {str(e)}")

	# Create SRT file
	output_file = os.path.splitext(filename)[0] + ".srt"
	self._create_srt(transcription.segments, output_file)

	logging.info(f"Successfully created SRT file: {output_file}")
	messagebox.showinfo("Success", f"SRT file saved successfully: {output_file}")

	except TranscriptionError as e:
	logging.error(f"Transcription error: {str(e)}")
	messagebox.showerror("Error", str(e))
	except Exception as e:
	logging.error(f"Unexpected error: {str(e)}")
	messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")

	def main():
	"""Entry point of the script."""
	try:
	transcriber = AudioTranscriber()
	transcriber.transcribe()
	except Exception as e:
	logging.error(f"Failed to initialize transcriber: {str(e)}")
	messagebox.showerror("Error", f"Failed to initialize transcriber: {str(e)}")

	if __name__ == "__main__":
	main()