|
#!/usr/bin/env python3 |
|
""" |
|
Minimal Speech-to-Text Menu Bar App |
|
Clean, simple voice transcription using Whisper. |
|
""" |
|
# /// script |
|
# dependencies = [ |
|
# "openai-whisper>=20231117", |
|
# "pyobjc-framework-Cocoa>=10.0", |
|
# "pyaudio>=0.2.13", |
|
# ] |
|
# /// |
|
|
|
import threading |
|
import tempfile |
|
import os |
|
from Foundation import NSObject, NSTimer, NSOperationQueue |
|
from AppKit import ( |
|
NSApplication, NSStatusBar, NSMenu, NSMenuItem, NSVariableStatusItemLength, |
|
NSOnState, NSOffState, NSEventModifierFlagCommand, NSEventModifierFlagOption, |
|
NSApplicationActivationPolicyAccessory, NSPasteboard, NSStringPboardType, |
|
NSEvent, NSEventMaskFlagsChanged |
|
) |
|
from PyObjCTools import AppHelper |
|
import pyaudio |
|
import wave |
|
import whisper |
|
|
|
|
|
class WhisperMenuBar(NSObject): |
|
"""Minimal speech-to-text menu bar application""" |
|
|
|
def applicationDidFinishLaunching_(self, notification): |
|
"""Initialize the application""" |
|
print("Starting Whisper Menu Bar...") |
|
|
|
# Set as accessory app (no dock icon) |
|
app = NSApplication.sharedApplication() |
|
app.setActivationPolicy_(NSApplicationActivationPolicyAccessory) |
|
|
|
# Initialize state |
|
self.recording = False |
|
self.audio_frames = [] |
|
self.whisper_model = None |
|
self.model_loaded = False |
|
self.whisper_model_name = "base" |
|
self.modifier_pressed = False |
|
self.trigger_modifier = NSEventModifierFlagOption # Option key |
|
|
|
# Audio settings |
|
self.audio_format = pyaudio.paInt16 |
|
self.channels = 1 |
|
self.rate = 16000 |
|
self.chunk = 1024 |
|
self.audio_stream = None |
|
|
|
# Initialize audio |
|
try: |
|
self.pyaudio_instance = pyaudio.PyAudio() |
|
print("Audio initialized") |
|
except Exception as e: |
|
print(f"Error: Audio initialization failed: {e}") |
|
self.pyaudio_instance = None |
|
|
|
# Setup menu bar |
|
self._setup_menu_bar() |
|
|
|
# Setup push-to-talk (Option key monitoring) |
|
self._setup_push_to_talk() |
|
|
|
# Load model in background |
|
threading.Thread(target=self._load_model, daemon=True).start() |
|
|
|
def _setup_menu_bar(self): |
|
"""Create status bar icon and menu""" |
|
# Create status bar item |
|
statusBar = NSStatusBar.systemStatusBar() |
|
self.statusItem = statusBar.statusItemWithLength_(NSVariableStatusItemLength) |
|
self.statusItem.setTitle_("🎤") |
|
|
|
# Create menu |
|
menu = NSMenu.alloc().init() |
|
|
|
# Status item (shows current state) |
|
self.statusMenuItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_( |
|
"Ready", None, "" |
|
) |
|
self.statusMenuItem.setEnabled_(False) |
|
menu.addItem_(self.statusMenuItem) |
|
|
|
menu.addItem_(NSMenuItem.separatorItem()) |
|
|
|
# Push-to-talk only (no explicit start/stop menu item) |
|
menu.addItem_(NSMenuItem.separatorItem()) |
|
|
|
# Model selection |
|
modelMenuItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_( |
|
"Model", None, "" |
|
) |
|
modelMenu = NSMenu.alloc().init() |
|
|
|
self.modelMenuItems = {} |
|
for model_name in ["tiny", "base", "small", "medium"]: |
|
item = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_( |
|
model_name.capitalize(), "changeModel:", "" |
|
) |
|
item.setTarget_(self) |
|
item.setRepresentedObject_(model_name) |
|
|
|
if model_name == "base": |
|
item.setState_(NSOnState) |
|
|
|
modelMenu.addItem_(item) |
|
self.modelMenuItems[model_name] = item |
|
|
|
modelMenuItem.setSubmenu_(modelMenu) |
|
menu.addItem_(modelMenuItem) |
|
|
|
menu.addItem_(NSMenuItem.separatorItem()) |
|
|
|
# Quit |
|
quitItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_( |
|
"Quit", "terminate:", "q" |
|
) |
|
quitItem.setKeyEquivalentModifierMask_(NSEventModifierFlagCommand) |
|
menu.addItem_(quitItem) |
|
|
|
self.statusItem.setMenu_(menu) |
|
|
|
def _setup_push_to_talk(self): |
|
"""Setup push-to-talk with Option key""" |
|
# Global monitoring for modifier flags (works even when app doesn't have focus) |
|
self.flagsChangedMonitor = NSEvent.addGlobalMonitorForEventsMatchingMask_handler_( |
|
NSEventMaskFlagsChanged, self._handle_flags_changed |
|
) |
|
|
|
# Local monitoring for modifier flags (when app has focus) |
|
self.localFlagsChangedMonitor = NSEvent.addLocalMonitorForEventsMatchingMask_handler_( |
|
NSEventMaskFlagsChanged, self._handle_local_flags_changed |
|
) |
|
|
|
print("Push-to-talk enabled: Hold Option key to record") |
|
|
|
def _handle_flags_changed(self, event): |
|
"""Handle global modifier key changes""" |
|
modifierFlags = event.modifierFlags() |
|
trigger_pressed = bool(modifierFlags & self.trigger_modifier) |
|
|
|
# Option key pressed - start recording |
|
if trigger_pressed and not self.modifier_pressed and not self.recording: |
|
self.modifier_pressed = True |
|
def start(): |
|
self._start_recording() |
|
NSOperationQueue.mainQueue().addOperationWithBlock_(start) |
|
|
|
# Option key released - stop recording |
|
elif not trigger_pressed and self.modifier_pressed and self.recording: |
|
self.modifier_pressed = False |
|
def stop(): |
|
self._stop_recording() |
|
NSOperationQueue.mainQueue().addOperationWithBlock_(stop) |
|
else: |
|
self.modifier_pressed = trigger_pressed |
|
|
|
def _handle_local_flags_changed(self, event): |
|
"""Handle local modifier key changes""" |
|
modifierFlags = event.modifierFlags() |
|
trigger_pressed = bool(modifierFlags & self.trigger_modifier) |
|
|
|
# Option key pressed - start recording |
|
if trigger_pressed and not self.modifier_pressed and not self.recording: |
|
self.modifier_pressed = True |
|
self._start_recording() |
|
return None # Consume event |
|
|
|
# Option key released - stop recording |
|
elif not trigger_pressed and self.modifier_pressed and self.recording: |
|
self.modifier_pressed = False |
|
self._stop_recording() |
|
return None # Consume event |
|
else: |
|
self.modifier_pressed = trigger_pressed |
|
|
|
return event # Pass through |
|
|
|
def _load_model(self): |
|
"""Load Whisper model""" |
|
try: |
|
self._update_status("Loading model...") |
|
print(f"Loading Whisper model: {self.whisper_model_name}") |
|
|
|
# Load model (will use CPU on Mac, or CUDA if available) |
|
self.whisper_model = whisper.load_model(self.whisper_model_name, device="cpu") |
|
self.model_loaded = True |
|
|
|
self._update_status("Ready") |
|
print(f"Model loaded: {self.whisper_model_name}") |
|
|
|
except Exception as e: |
|
print(f"Error: Model loading failed: {e}") |
|
self._update_status("Model load failed") |
|
self.model_loaded = False |
|
|
|
def _update_status(self, text): |
|
"""Update status menu item (thread-safe)""" |
|
def update(): |
|
self.statusMenuItem.setTitle_(text) |
|
NSOperationQueue.mainQueue().addOperationWithBlock_(update) |
|
|
|
def changeModel_(self, sender): |
|
"""Change Whisper model""" |
|
new_model = sender.representedObject() |
|
|
|
if new_model == self.whisper_model_name: |
|
return |
|
|
|
# Update checkmarks |
|
for name, item in self.modelMenuItems.items(): |
|
item.setState_(NSOnState if name == new_model else NSOffState) |
|
|
|
# Reload model |
|
self.whisper_model_name = new_model |
|
self.model_loaded = False |
|
threading.Thread(target=self._load_model, daemon=True).start() |
|
|
|
|
|
def _start_recording(self): |
|
"""Start recording audio""" |
|
if not self.pyaudio_instance: |
|
print("Error: Audio not available") |
|
return |
|
|
|
if not self.model_loaded: |
|
print("Error: Model not loaded") |
|
return |
|
|
|
try: |
|
self.recording = True |
|
self.audio_frames = [] |
|
|
|
# Audio callback |
|
def audio_callback(in_data, frame_count, time_info, status): |
|
if self.recording: |
|
self.audio_frames.append(in_data) |
|
return (in_data, pyaudio.paContinue) |
|
|
|
# Open stream |
|
self.audio_stream = self.pyaudio_instance.open( |
|
format=self.audio_format, |
|
channels=self.channels, |
|
rate=self.rate, |
|
input=True, |
|
frames_per_buffer=self.chunk, |
|
stream_callback=audio_callback |
|
) |
|
|
|
self.audio_stream.start_stream() |
|
|
|
# Update UI |
|
self._update_status("Recording...") |
|
self.statusItem.setTitle_("🔴") |
|
|
|
# No menu label change needed |
|
|
|
print("Recording started") |
|
|
|
except Exception as e: |
|
print(f"Error: Recording failed: {e}") |
|
self.recording = False |
|
|
|
def _stop_recording(self): |
|
"""Stop recording and transcribe""" |
|
if not self.recording: |
|
return |
|
|
|
try: |
|
self.recording = False |
|
|
|
# Stop stream |
|
if self.audio_stream: |
|
self.audio_stream.stop_stream() |
|
self.audio_stream.close() |
|
self.audio_stream = None |
|
|
|
# Update UI |
|
self._update_status("Transcribing...") |
|
self.statusItem.setTitle_("🎤") |
|
|
|
# No menu label change needed |
|
|
|
print("Recording stopped") |
|
|
|
# Transcribe in background |
|
frames = self.audio_frames.copy() |
|
threading.Thread(target=lambda: self._transcribe_audio(frames), daemon=True).start() |
|
|
|
except Exception as e: |
|
print(f"Error: Stop recording failed: {e}") |
|
|
|
def _transcribe_audio(self, frames): |
|
"""Transcribe audio""" |
|
try: |
|
if not frames: |
|
print("Error: No audio data") |
|
self._update_status("No audio recorded") |
|
return |
|
|
|
# Save to temp file |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: |
|
temp_path = temp_file.name |
|
|
|
wf = wave.open(temp_path, 'wb') |
|
wf.setnchannels(self.channels) |
|
wf.setsampwidth(self.pyaudio_instance.get_sample_size(self.audio_format)) |
|
wf.setframerate(self.rate) |
|
wf.writeframes(b''.join(frames)) |
|
wf.close() |
|
|
|
print(f"Transcribing audio file: {temp_path}") |
|
|
|
# Transcribe |
|
result = self.whisper_model.transcribe(temp_path, fp16=False) |
|
text = result["text"].strip() |
|
|
|
# Cleanup |
|
os.unlink(temp_path) |
|
|
|
if text: |
|
print(f"Transcription: {text}") |
|
|
|
# Copy to clipboard |
|
self._copy_to_clipboard(text) |
|
|
|
self._update_status("Done - Copied to clipboard") |
|
|
|
# Show result in console |
|
print(f"Result copied to clipboard: {text}") |
|
else: |
|
print("No speech detected") |
|
self._update_status("No speech detected") |
|
|
|
except Exception as e: |
|
print(f"Error: Transcription failed: {e}") |
|
self._update_status("Transcription failed") |
|
|
|
def _copy_to_clipboard(self, text): |
|
"""Copy text to clipboard""" |
|
pasteboard = NSPasteboard.generalPasteboard() |
|
pasteboard.clearContents() |
|
pasteboard.setString_forType_(text, NSStringPboardType) |
|
|
|
def applicationWillTerminate_(self, notification): |
|
"""Cleanup on quit""" |
|
print("Shutting down...") |
|
|
|
# Remove event monitors |
|
if hasattr(self, 'flagsChangedMonitor') and self.flagsChangedMonitor: |
|
NSEvent.removeMonitor_(self.flagsChangedMonitor) |
|
if hasattr(self, 'localFlagsChangedMonitor') and self.localFlagsChangedMonitor: |
|
NSEvent.removeMonitor_(self.localFlagsChangedMonitor) |
|
|
|
if self.audio_stream: |
|
self.audio_stream.stop_stream() |
|
self.audio_stream.close() |
|
|
|
if self.pyaudio_instance: |
|
self.pyaudio_instance.terminate() |
|
|
|
|
|
if __name__ == "__main__": |
|
print("Initializing Whisper Menu Bar App...") |
|
|
|
app = NSApplication.sharedApplication() |
|
delegate = WhisperMenuBar.alloc().init() |
|
app.setDelegate_(delegate) |
|
|
|
print("Starting...") |
|
AppHelper.runEventLoop() |