0asa · October 31, 2025 14:31
diff --git a/Whisper Menu Bar.md b/Whisper Menu Bar.md
diff --git a/whisper-push-to-talk.py b/whisper-push-to-talk.py
 #!/usr/bin/env python3
 """
 Minimal Speech-to-Text Menu Bar App
 Clean, simple voice transcription using Whisper.
 """
 # /// script
 # dependencies = [
 #   "openai-whisper>=20231117",
 #   "pyobjc-framework-Cocoa>=10.0",
 #   "pyaudio>=0.2.13",
 # ]
 # ///

 import threading
 import tempfile
 import os
 from Foundation import NSObject, NSTimer, NSOperationQueue
 from AppKit import (
    NSApplication, NSStatusBar, NSMenu, NSMenuItem, NSVariableStatusItemLength,
    NSOnState, NSOffState, NSEventModifierFlagCommand, NSEventModifierFlagOption,
    NSApplicationActivationPolicyAccessory, NSPasteboard, NSStringPboardType,
    NSEvent, NSEventMaskFlagsChanged
 )
 from PyObjCTools import AppHelper
 import pyaudio
 import wave
 import whisper


 class WhisperMenuBar(NSObject):
    """Minimal speech-to-text menu bar application"""
    
    def applicationDidFinishLaunching_(self, notification):
        """Initialize the application"""
        print("Starting Whisper Menu Bar...")
        
        # Set as accessory app (no dock icon)
        app = NSApplication.sharedApplication()
        app.setActivationPolicy_(NSApplicationActivationPolicyAccessory)
        
        # Initialize state
        self.recording = False
        self.audio_frames = []
        self.whisper_model = None
        self.model_loaded = False
        self.whisper_model_name = "base"
        self.modifier_pressed = False
        self.trigger_modifier = NSEventModifierFlagOption  # Option key
        
        # Audio settings
        self.audio_format = pyaudio.paInt16
        self.channels = 1
        self.rate = 16000
        self.chunk = 1024
        self.audio_stream = None
        
        # Initialize audio
        try:
            self.pyaudio_instance = pyaudio.PyAudio()
            print("Audio initialized")
        except Exception as e:
            print(f"Error: Audio initialization failed: {e}")
            self.pyaudio_instance = None
        
        # Setup menu bar
        self._setup_menu_bar()
        
        # Setup push-to-talk (Option key monitoring)
        self._setup_push_to_talk()
        
        # Load model in background
        threading.Thread(target=self._load_model, daemon=True).start()
    
    def _setup_menu_bar(self):
        """Create status bar icon and menu"""
        # Create status bar item
        statusBar = NSStatusBar.systemStatusBar()
        self.statusItem = statusBar.statusItemWithLength_(NSVariableStatusItemLength)
        self.statusItem.setTitle_("🎤")
        
        # Create menu
        menu = NSMenu.alloc().init()
        
        # Status item (shows current state)
        self.statusMenuItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
            "Ready", None, ""
        )
        self.statusMenuItem.setEnabled_(False)
        menu.addItem_(self.statusMenuItem)
        
        menu.addItem_(NSMenuItem.separatorItem())
        
        # Push-to-talk only (no explicit start/stop menu item)
        menu.addItem_(NSMenuItem.separatorItem())
        
        # Model selection
        modelMenuItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
            "Model", None, ""
        )
        modelMenu = NSMenu.alloc().init()
        
        self.modelMenuItems = {}
        for model_name in ["tiny", "base", "small", "medium"]:
            item = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
                model_name.capitalize(), "changeModel:", ""
            )
            item.setTarget_(self)
            item.setRepresentedObject_(model_name)
            
            if model_name == "base":
                item.setState_(NSOnState)
            
            modelMenu.addItem_(item)
            self.modelMenuItems[model_name] = item
        
        modelMenuItem.setSubmenu_(modelMenu)
        menu.addItem_(modelMenuItem)
        
        menu.addItem_(NSMenuItem.separatorItem())
        
        # Quit
        quitItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
            "Quit", "terminate:", "q"
        )
        quitItem.setKeyEquivalentModifierMask_(NSEventModifierFlagCommand)
        menu.addItem_(quitItem)
        
        self.statusItem.setMenu_(menu)
    
    def _setup_push_to_talk(self):
        """Setup push-to-talk with Option key"""
        # Global monitoring for modifier flags (works even when app doesn't have focus)
        self.flagsChangedMonitor = NSEvent.addGlobalMonitorForEventsMatchingMask_handler_(
            NSEventMaskFlagsChanged, self._handle_flags_changed
        )
        
        # Local monitoring for modifier flags (when app has focus)
        self.localFlagsChangedMonitor = NSEvent.addLocalMonitorForEventsMatchingMask_handler_(
            NSEventMaskFlagsChanged, self._handle_local_flags_changed
        )
        
        print("Push-to-talk enabled: Hold Option key to record")
    
    def _handle_flags_changed(self, event):
        """Handle global modifier key changes"""
        modifierFlags = event.modifierFlags()
        trigger_pressed = bool(modifierFlags & self.trigger_modifier)
        
        # Option key pressed - start recording
        if trigger_pressed and not self.modifier_pressed and not self.recording:
            self.modifier_pressed = True
            def start():
                self._start_recording()
            NSOperationQueue.mainQueue().addOperationWithBlock_(start)
        
        # Option key released - stop recording
        elif not trigger_pressed and self.modifier_pressed and self.recording:
            self.modifier_pressed = False
            def stop():
                self._stop_recording()
            NSOperationQueue.mainQueue().addOperationWithBlock_(stop)
        else:
            self.modifier_pressed = trigger_pressed
    
    def _handle_local_flags_changed(self, event):
        """Handle local modifier key changes"""
        modifierFlags = event.modifierFlags()
        trigger_pressed = bool(modifierFlags & self.trigger_modifier)
        
        # Option key pressed - start recording
        if trigger_pressed and not self.modifier_pressed and not self.recording:
            self.modifier_pressed = True
            self._start_recording()
            return None  # Consume event
        
        # Option key released - stop recording
        elif not trigger_pressed and self.modifier_pressed and self.recording:
            self.modifier_pressed = False
            self._stop_recording()
            return None  # Consume event
        else:
            self.modifier_pressed = trigger_pressed
        
        return event  # Pass through
    
    def _load_model(self):
        """Load Whisper model"""
        try:
            self._update_status("Loading model...")
            print(f"Loading Whisper model: {self.whisper_model_name}")
            
            # Load model (will use CPU on Mac, or CUDA if available)
            self.whisper_model = whisper.load_model(self.whisper_model_name, device="cpu")
            self.model_loaded = True
            
            self._update_status("Ready")
            print(f"Model loaded: {self.whisper_model_name}")
            
        except Exception as e:
            print(f"Error: Model loading failed: {e}")
            self._update_status("Model load failed")
            self.model_loaded = False
    
    def _update_status(self, text):
        """Update status menu item (thread-safe)"""
        def update():
            self.statusMenuItem.setTitle_(text)
        NSOperationQueue.mainQueue().addOperationWithBlock_(update)
    
    def changeModel_(self, sender):
        """Change Whisper model"""
        new_model = sender.representedObject()
        
        if new_model == self.whisper_model_name:
            return
        
        # Update checkmarks
        for name, item in self.modelMenuItems.items():
            item.setState_(NSOnState if name == new_model else NSOffState)
        
        # Reload model
        self.whisper_model_name = new_model
        self.model_loaded = False
        threading.Thread(target=self._load_model, daemon=True).start()
    
    
    def _start_recording(self):
        """Start recording audio"""
        if not self.pyaudio_instance:
            print("Error: Audio not available")
            return
        
        if not self.model_loaded:
            print("Error: Model not loaded")
            return
        
        try:
            self.recording = True
            self.audio_frames = []
            
            # Audio callback
            def audio_callback(in_data, frame_count, time_info, status):
                if self.recording:
                    self.audio_frames.append(in_data)
                return (in_data, pyaudio.paContinue)
            
            # Open stream
            self.audio_stream = self.pyaudio_instance.open(
                format=self.audio_format,
                channels=self.channels,
                rate=self.rate,
                input=True,
                frames_per_buffer=self.chunk,
                stream_callback=audio_callback
            )
            
            self.audio_stream.start_stream()
            
            # Update UI
            self._update_status("Recording...")
            self.statusItem.setTitle_("🔴")
            
            # No menu label change needed
            
            print("Recording started")
            
        except Exception as e:
            print(f"Error: Recording failed: {e}")
            self.recording = False
    
    def _stop_recording(self):
        """Stop recording and transcribe"""
        if not self.recording:
            return
        
        try:
            self.recording = False
            
            # Stop stream
            if self.audio_stream:
                self.audio_stream.stop_stream()
                self.audio_stream.close()
                self.audio_stream = None
            
            # Update UI
            self._update_status("Transcribing...")
            self.statusItem.setTitle_("🎤")
            
            # No menu label change needed
            
            print("Recording stopped")
            
            # Transcribe in background
            frames = self.audio_frames.copy()
            threading.Thread(target=lambda: self._transcribe_audio(frames), daemon=True).start()
            
        except Exception as e:
            print(f"Error: Stop recording failed: {e}")
    
    def _transcribe_audio(self, frames):
        """Transcribe audio"""
        try:
            if not frames:
                print("Error: No audio data")
                self._update_status("No audio recorded")
                return
            
            # Save to temp file
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
                temp_path = temp_file.name
            
            wf = wave.open(temp_path, 'wb')
            wf.setnchannels(self.channels)
            wf.setsampwidth(self.pyaudio_instance.get_sample_size(self.audio_format))
            wf.setframerate(self.rate)
            wf.writeframes(b''.join(frames))
            wf.close()
            
            print(f"Transcribing audio file: {temp_path}")
            
            # Transcribe
            result = self.whisper_model.transcribe(temp_path, fp16=False)
            text = result["text"].strip()
            
            # Cleanup
            os.unlink(temp_path)
            
            if text:
                print(f"Transcription: {text}")
                
                # Copy to clipboard
                self._copy_to_clipboard(text)
                
                self._update_status("Done - Copied to clipboard")
                
                # Show result in console
                print(f"Result copied to clipboard: {text}")
            else:
                print("No speech detected")
                self._update_status("No speech detected")
            
        except Exception as e:
            print(f"Error: Transcription failed: {e}")
            self._update_status("Transcription failed")
    
    def _copy_to_clipboard(self, text):
        """Copy text to clipboard"""
        pasteboard = NSPasteboard.generalPasteboard()
        pasteboard.clearContents()
        pasteboard.setString_forType_(text, NSStringPboardType)
    
    def applicationWillTerminate_(self, notification):
        """Cleanup on quit"""
        print("Shutting down...")
        
        # Remove event monitors
        if hasattr(self, 'flagsChangedMonitor') and self.flagsChangedMonitor:
            NSEvent.removeMonitor_(self.flagsChangedMonitor)
        if hasattr(self, 'localFlagsChangedMonitor') and self.localFlagsChangedMonitor:
            NSEvent.removeMonitor_(self.localFlagsChangedMonitor)
        
        if self.audio_stream:
            self.audio_stream.stop_stream()
            self.audio_stream.close()
        
        if self.pyaudio_instance:
            self.pyaudio_instance.terminate()


 if __name__ == "__main__":
    print("Initializing Whisper Menu Bar App...")
    
    app = NSApplication.sharedApplication()
    delegate = WhisperMenuBar.alloc().init()
    app.setDelegate_(delegate)
    
    print("Starting...")
    AppHelper.runEventLoop()
	#!/usr/bin/env python3
	"""
	Minimal Speech-to-Text Menu Bar App
	Clean, simple voice transcription using Whisper.
	"""
	# /// script
	# dependencies = [
	# "openai-whisper>=20231117",
	# "pyobjc-framework-Cocoa>=10.0",
	# "pyaudio>=0.2.13",
	# ]
	# ///

	import threading
	import tempfile
	import os
	from Foundation import NSObject, NSTimer, NSOperationQueue
	from AppKit import (
	NSApplication, NSStatusBar, NSMenu, NSMenuItem, NSVariableStatusItemLength,
	NSOnState, NSOffState, NSEventModifierFlagCommand, NSEventModifierFlagOption,
	NSApplicationActivationPolicyAccessory, NSPasteboard, NSStringPboardType,
	NSEvent, NSEventMaskFlagsChanged
	)
	from PyObjCTools import AppHelper
	import pyaudio
	import wave
	import whisper


	class WhisperMenuBar(NSObject):
	"""Minimal speech-to-text menu bar application"""

	def applicationDidFinishLaunching_(self, notification):
	"""Initialize the application"""
	print("Starting Whisper Menu Bar...")

	# Set as accessory app (no dock icon)
	app = NSApplication.sharedApplication()
	app.setActivationPolicy_(NSApplicationActivationPolicyAccessory)

	# Initialize state
	self.recording = False
	self.audio_frames = []
	self.whisper_model = None
	self.model_loaded = False
	self.whisper_model_name = "base"
	self.modifier_pressed = False
	self.trigger_modifier = NSEventModifierFlagOption # Option key

	# Audio settings
	self.audio_format = pyaudio.paInt16
	self.channels = 1
	self.rate = 16000
	self.chunk = 1024
	self.audio_stream = None

	# Initialize audio
	try:
	self.pyaudio_instance = pyaudio.PyAudio()
	print("Audio initialized")
	except Exception as e:
	print(f"Error: Audio initialization failed: {e}")
	self.pyaudio_instance = None

	# Setup menu bar
	self._setup_menu_bar()

	# Setup push-to-talk (Option key monitoring)
	self._setup_push_to_talk()

	# Load model in background
	threading.Thread(target=self._load_model, daemon=True).start()

	def _setup_menu_bar(self):
	"""Create status bar icon and menu"""
	# Create status bar item
	statusBar = NSStatusBar.systemStatusBar()
	self.statusItem = statusBar.statusItemWithLength_(NSVariableStatusItemLength)
	self.statusItem.setTitle_("🎤")

	# Create menu
	menu = NSMenu.alloc().init()

	# Status item (shows current state)
	self.statusMenuItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
	"Ready", None, ""
	)
	self.statusMenuItem.setEnabled_(False)
	menu.addItem_(self.statusMenuItem)

	menu.addItem_(NSMenuItem.separatorItem())

	# Push-to-talk only (no explicit start/stop menu item)
	menu.addItem_(NSMenuItem.separatorItem())

	# Model selection
	modelMenuItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
	"Model", None, ""
	)
	modelMenu = NSMenu.alloc().init()

	self.modelMenuItems = {}
	for model_name in ["tiny", "base", "small", "medium"]:
	item = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
	model_name.capitalize(), "changeModel:", ""
	)
	item.setTarget_(self)
	item.setRepresentedObject_(model_name)

	if model_name == "base":
	item.setState_(NSOnState)

	modelMenu.addItem_(item)
	self.modelMenuItems[model_name] = item

	modelMenuItem.setSubmenu_(modelMenu)
	menu.addItem_(modelMenuItem)

	menu.addItem_(NSMenuItem.separatorItem())

	# Quit
	quitItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
	"Quit", "terminate:", "q"
	)
	quitItem.setKeyEquivalentModifierMask_(NSEventModifierFlagCommand)
	menu.addItem_(quitItem)

	self.statusItem.setMenu_(menu)

	def _setup_push_to_talk(self):
	"""Setup push-to-talk with Option key"""
	# Global monitoring for modifier flags (works even when app doesn't have focus)
	self.flagsChangedMonitor = NSEvent.addGlobalMonitorForEventsMatchingMask_handler_(
	NSEventMaskFlagsChanged, self._handle_flags_changed
	)

	# Local monitoring for modifier flags (when app has focus)
	self.localFlagsChangedMonitor = NSEvent.addLocalMonitorForEventsMatchingMask_handler_(
	NSEventMaskFlagsChanged, self._handle_local_flags_changed
	)

	print("Push-to-talk enabled: Hold Option key to record")

	def _handle_flags_changed(self, event):
	"""Handle global modifier key changes"""
	modifierFlags = event.modifierFlags()
	trigger_pressed = bool(modifierFlags & self.trigger_modifier)

	# Option key pressed - start recording
	if trigger_pressed and not self.modifier_pressed and not self.recording:
	self.modifier_pressed = True
	def start():
	self._start_recording()
	NSOperationQueue.mainQueue().addOperationWithBlock_(start)

	# Option key released - stop recording
	elif not trigger_pressed and self.modifier_pressed and self.recording:
	self.modifier_pressed = False
	def stop():
	self._stop_recording()
	NSOperationQueue.mainQueue().addOperationWithBlock_(stop)
	else:
	self.modifier_pressed = trigger_pressed

	def _handle_local_flags_changed(self, event):
	"""Handle local modifier key changes"""
	modifierFlags = event.modifierFlags()
	trigger_pressed = bool(modifierFlags & self.trigger_modifier)

	# Option key pressed - start recording
	if trigger_pressed and not self.modifier_pressed and not self.recording:
	self.modifier_pressed = True
	self._start_recording()
	return None # Consume event

	# Option key released - stop recording
	elif not trigger_pressed and self.modifier_pressed and self.recording:
	self.modifier_pressed = False
	self._stop_recording()
	return None # Consume event
	else:
	self.modifier_pressed = trigger_pressed

	return event # Pass through

	def _load_model(self):
	"""Load Whisper model"""
	try:
	self._update_status("Loading model...")
	print(f"Loading Whisper model: {self.whisper_model_name}")

	# Load model (will use CPU on Mac, or CUDA if available)
	self.whisper_model = whisper.load_model(self.whisper_model_name, device="cpu")
	self.model_loaded = True

	self._update_status("Ready")
	print(f"Model loaded: {self.whisper_model_name}")

	except Exception as e:
	print(f"Error: Model loading failed: {e}")
	self._update_status("Model load failed")
	self.model_loaded = False

	def _update_status(self, text):
	"""Update status menu item (thread-safe)"""
	def update():
	self.statusMenuItem.setTitle_(text)
	NSOperationQueue.mainQueue().addOperationWithBlock_(update)

	def changeModel_(self, sender):
	"""Change Whisper model"""
	new_model = sender.representedObject()

	if new_model == self.whisper_model_name:
	return

	# Update checkmarks
	for name, item in self.modelMenuItems.items():
	item.setState_(NSOnState if name == new_model else NSOffState)

	# Reload model
	self.whisper_model_name = new_model
	self.model_loaded = False
	threading.Thread(target=self._load_model, daemon=True).start()


	def _start_recording(self):
	"""Start recording audio"""
	if not self.pyaudio_instance:
	print("Error: Audio not available")
	return

	if not self.model_loaded:
	print("Error: Model not loaded")
	return

	try:
	self.recording = True
	self.audio_frames = []

	# Audio callback
	def audio_callback(in_data, frame_count, time_info, status):
	if self.recording:
	self.audio_frames.append(in_data)
	return (in_data, pyaudio.paContinue)

	# Open stream
	self.audio_stream = self.pyaudio_instance.open(
	format=self.audio_format,
	channels=self.channels,
	rate=self.rate,
	input=True,
	frames_per_buffer=self.chunk,
	stream_callback=audio_callback
	)

	self.audio_stream.start_stream()

	# Update UI
	self._update_status("Recording...")
	self.statusItem.setTitle_("🔴")

	# No menu label change needed

	print("Recording started")

	except Exception as e:
	print(f"Error: Recording failed: {e}")
	self.recording = False

	def _stop_recording(self):
	"""Stop recording and transcribe"""
	if not self.recording:
	return

	try:
	self.recording = False

	# Stop stream
	if self.audio_stream:
	self.audio_stream.stop_stream()
	self.audio_stream.close()
	self.audio_stream = None

	# Update UI
	self._update_status("Transcribing...")
	self.statusItem.setTitle_("🎤")

	# No menu label change needed

	print("Recording stopped")

	# Transcribe in background
	frames = self.audio_frames.copy()
	threading.Thread(target=lambda: self._transcribe_audio(frames), daemon=True).start()

	except Exception as e:
	print(f"Error: Stop recording failed: {e}")

	def _transcribe_audio(self, frames):
	"""Transcribe audio"""
	try:
	if not frames:
	print("Error: No audio data")
	self._update_status("No audio recorded")
	return

	# Save to temp file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
	temp_path = temp_file.name

	wf = wave.open(temp_path, 'wb')
	wf.setnchannels(self.channels)
	wf.setsampwidth(self.pyaudio_instance.get_sample_size(self.audio_format))
	wf.setframerate(self.rate)
	wf.writeframes(b''.join(frames))
	wf.close()

	print(f"Transcribing audio file: {temp_path}")

	# Transcribe
	result = self.whisper_model.transcribe(temp_path, fp16=False)
	text = result["text"].strip()

	# Cleanup
	os.unlink(temp_path)

	if text:
	print(f"Transcription: {text}")

	# Copy to clipboard
	self._copy_to_clipboard(text)

	self._update_status("Done - Copied to clipboard")

	# Show result in console
	print(f"Result copied to clipboard: {text}")
	else:
	print("No speech detected")
	self._update_status("No speech detected")

	except Exception as e:
	print(f"Error: Transcription failed: {e}")
	self._update_status("Transcription failed")

	def _copy_to_clipboard(self, text):
	"""Copy text to clipboard"""
	pasteboard = NSPasteboard.generalPasteboard()
	pasteboard.clearContents()
	pasteboard.setString_forType_(text, NSStringPboardType)

	def applicationWillTerminate_(self, notification):
	"""Cleanup on quit"""
	print("Shutting down...")

	# Remove event monitors
	if hasattr(self, 'flagsChangedMonitor') and self.flagsChangedMonitor:
	NSEvent.removeMonitor_(self.flagsChangedMonitor)
	if hasattr(self, 'localFlagsChangedMonitor') and self.localFlagsChangedMonitor:
	NSEvent.removeMonitor_(self.localFlagsChangedMonitor)

	if self.audio_stream:
	self.audio_stream.stop_stream()
	self.audio_stream.close()

	if self.pyaudio_instance:
	self.pyaudio_instance.terminate()


	if __name__ == "__main__":
	print("Initializing Whisper Menu Bar App...")

	app = NSApplication.sharedApplication()
	delegate = WhisperMenuBar.alloc().init()
	app.setDelegate_(delegate)

	print("Starting...")
	AppHelper.runEventLoop()
No results found