Skip to content

Instantly share code, notes, and snippets.

@0asa
Last active October 31, 2025 14:31
Show Gist options
  • Select an option

  • Save 0asa/3861da03d1f49106e8073416b39a32bd to your computer and use it in GitHub Desktop.

Select an option

Save 0asa/3861da03d1f49106e8073416b39a32bd to your computer and use it in GitHub Desktop.
A minimal, clean speech-to-text menu bar application for macOS using OpenAI's Whisper.

Whisper Menu Bar

A minimal, clean speech-to-text menu bar application for macOS using OpenAI's Whisper.

Features

  • 🎤 Push-to-talk: Hold Option key to record, release to transcribe
  • 📋 Auto-clipboard: Transcribed text automatically copied to clipboard
  • 🔄 Model selection: Switch between tiny, base, small, and medium models
  • 🎯 Clean & minimal: Simple interface, ~300 lines of code

Requirements

  • macOS (tested on macOS 10.15+)
  • Python 3.8 or higher
  • Microphone access permissions

Installation and Usage

  1. Copy the whisper-push-to-talk.py somewhere

  2. Install uv and run:

    uv run whisper-push-to-talk.py

    Note: On macOS, you may need to install PortAudio first for PyAudio:

    brew install portaudio
  3. Grant microphone permissions to Terminal/your Python app when prompted

The app will:

  1. Show a microphone icon (🎤) in your menu bar
  2. Load the Whisper model in the background (first run may take a moment)
  3. Display "Ready" when ready to use

Recording

  • Option key: Hold to record, release to transcribe, Text appears in clipboard → Ready to paste anywhere
  • Click the menu bar icon → Model → Select your preferred model:
    • tiny: Fastest, lowest accuracy (~1GB)
    • base: Good balance (default, ~1GB)
    • small: Better accuracy (~2GB)
    • medium: Best accuracy (~5GB)

Quit

Click the menu bar icon → Quit

License

MIT License - Feel free to modify and distribute

Credits

Built with OpenAI's Whisper model for speech recognition.

#!/usr/bin/env python3
"""
Minimal Speech-to-Text Menu Bar App
Clean, simple voice transcription using Whisper.
"""
# /// script
# dependencies = [
# "openai-whisper>=20231117",
# "pyobjc-framework-Cocoa>=10.0",
# "pyaudio>=0.2.13",
# ]
# ///
import threading
import tempfile
import os
from Foundation import NSObject, NSTimer, NSOperationQueue
from AppKit import (
NSApplication, NSStatusBar, NSMenu, NSMenuItem, NSVariableStatusItemLength,
NSOnState, NSOffState, NSEventModifierFlagCommand, NSEventModifierFlagOption,
NSApplicationActivationPolicyAccessory, NSPasteboard, NSStringPboardType,
NSEvent, NSEventMaskFlagsChanged
)
from PyObjCTools import AppHelper
import pyaudio
import wave
import whisper
class WhisperMenuBar(NSObject):
"""Minimal speech-to-text menu bar application"""
def applicationDidFinishLaunching_(self, notification):
"""Initialize the application"""
print("Starting Whisper Menu Bar...")
# Set as accessory app (no dock icon)
app = NSApplication.sharedApplication()
app.setActivationPolicy_(NSApplicationActivationPolicyAccessory)
# Initialize state
self.recording = False
self.audio_frames = []
self.whisper_model = None
self.model_loaded = False
self.whisper_model_name = "base"
self.modifier_pressed = False
self.trigger_modifier = NSEventModifierFlagOption # Option key
# Audio settings
self.audio_format = pyaudio.paInt16
self.channels = 1
self.rate = 16000
self.chunk = 1024
self.audio_stream = None
# Initialize audio
try:
self.pyaudio_instance = pyaudio.PyAudio()
print("Audio initialized")
except Exception as e:
print(f"Error: Audio initialization failed: {e}")
self.pyaudio_instance = None
# Setup menu bar
self._setup_menu_bar()
# Setup push-to-talk (Option key monitoring)
self._setup_push_to_talk()
# Load model in background
threading.Thread(target=self._load_model, daemon=True).start()
def _setup_menu_bar(self):
"""Create status bar icon and menu"""
# Create status bar item
statusBar = NSStatusBar.systemStatusBar()
self.statusItem = statusBar.statusItemWithLength_(NSVariableStatusItemLength)
self.statusItem.setTitle_("🎤")
# Create menu
menu = NSMenu.alloc().init()
# Status item (shows current state)
self.statusMenuItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
"Ready", None, ""
)
self.statusMenuItem.setEnabled_(False)
menu.addItem_(self.statusMenuItem)
menu.addItem_(NSMenuItem.separatorItem())
# Push-to-talk only (no explicit start/stop menu item)
menu.addItem_(NSMenuItem.separatorItem())
# Model selection
modelMenuItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
"Model", None, ""
)
modelMenu = NSMenu.alloc().init()
self.modelMenuItems = {}
for model_name in ["tiny", "base", "small", "medium"]:
item = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
model_name.capitalize(), "changeModel:", ""
)
item.setTarget_(self)
item.setRepresentedObject_(model_name)
if model_name == "base":
item.setState_(NSOnState)
modelMenu.addItem_(item)
self.modelMenuItems[model_name] = item
modelMenuItem.setSubmenu_(modelMenu)
menu.addItem_(modelMenuItem)
menu.addItem_(NSMenuItem.separatorItem())
# Quit
quitItem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
"Quit", "terminate:", "q"
)
quitItem.setKeyEquivalentModifierMask_(NSEventModifierFlagCommand)
menu.addItem_(quitItem)
self.statusItem.setMenu_(menu)
def _setup_push_to_talk(self):
"""Setup push-to-talk with Option key"""
# Global monitoring for modifier flags (works even when app doesn't have focus)
self.flagsChangedMonitor = NSEvent.addGlobalMonitorForEventsMatchingMask_handler_(
NSEventMaskFlagsChanged, self._handle_flags_changed
)
# Local monitoring for modifier flags (when app has focus)
self.localFlagsChangedMonitor = NSEvent.addLocalMonitorForEventsMatchingMask_handler_(
NSEventMaskFlagsChanged, self._handle_local_flags_changed
)
print("Push-to-talk enabled: Hold Option key to record")
def _handle_flags_changed(self, event):
"""Handle global modifier key changes"""
modifierFlags = event.modifierFlags()
trigger_pressed = bool(modifierFlags & self.trigger_modifier)
# Option key pressed - start recording
if trigger_pressed and not self.modifier_pressed and not self.recording:
self.modifier_pressed = True
def start():
self._start_recording()
NSOperationQueue.mainQueue().addOperationWithBlock_(start)
# Option key released - stop recording
elif not trigger_pressed and self.modifier_pressed and self.recording:
self.modifier_pressed = False
def stop():
self._stop_recording()
NSOperationQueue.mainQueue().addOperationWithBlock_(stop)
else:
self.modifier_pressed = trigger_pressed
def _handle_local_flags_changed(self, event):
"""Handle local modifier key changes"""
modifierFlags = event.modifierFlags()
trigger_pressed = bool(modifierFlags & self.trigger_modifier)
# Option key pressed - start recording
if trigger_pressed and not self.modifier_pressed and not self.recording:
self.modifier_pressed = True
self._start_recording()
return None # Consume event
# Option key released - stop recording
elif not trigger_pressed and self.modifier_pressed and self.recording:
self.modifier_pressed = False
self._stop_recording()
return None # Consume event
else:
self.modifier_pressed = trigger_pressed
return event # Pass through
def _load_model(self):
"""Load Whisper model"""
try:
self._update_status("Loading model...")
print(f"Loading Whisper model: {self.whisper_model_name}")
# Load model (will use CPU on Mac, or CUDA if available)
self.whisper_model = whisper.load_model(self.whisper_model_name, device="cpu")
self.model_loaded = True
self._update_status("Ready")
print(f"Model loaded: {self.whisper_model_name}")
except Exception as e:
print(f"Error: Model loading failed: {e}")
self._update_status("Model load failed")
self.model_loaded = False
def _update_status(self, text):
"""Update status menu item (thread-safe)"""
def update():
self.statusMenuItem.setTitle_(text)
NSOperationQueue.mainQueue().addOperationWithBlock_(update)
def changeModel_(self, sender):
"""Change Whisper model"""
new_model = sender.representedObject()
if new_model == self.whisper_model_name:
return
# Update checkmarks
for name, item in self.modelMenuItems.items():
item.setState_(NSOnState if name == new_model else NSOffState)
# Reload model
self.whisper_model_name = new_model
self.model_loaded = False
threading.Thread(target=self._load_model, daemon=True).start()
def _start_recording(self):
"""Start recording audio"""
if not self.pyaudio_instance:
print("Error: Audio not available")
return
if not self.model_loaded:
print("Error: Model not loaded")
return
try:
self.recording = True
self.audio_frames = []
# Audio callback
def audio_callback(in_data, frame_count, time_info, status):
if self.recording:
self.audio_frames.append(in_data)
return (in_data, pyaudio.paContinue)
# Open stream
self.audio_stream = self.pyaudio_instance.open(
format=self.audio_format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer=self.chunk,
stream_callback=audio_callback
)
self.audio_stream.start_stream()
# Update UI
self._update_status("Recording...")
self.statusItem.setTitle_("🔴")
# No menu label change needed
print("Recording started")
except Exception as e:
print(f"Error: Recording failed: {e}")
self.recording = False
def _stop_recording(self):
"""Stop recording and transcribe"""
if not self.recording:
return
try:
self.recording = False
# Stop stream
if self.audio_stream:
self.audio_stream.stop_stream()
self.audio_stream.close()
self.audio_stream = None
# Update UI
self._update_status("Transcribing...")
self.statusItem.setTitle_("🎤")
# No menu label change needed
print("Recording stopped")
# Transcribe in background
frames = self.audio_frames.copy()
threading.Thread(target=lambda: self._transcribe_audio(frames), daemon=True).start()
except Exception as e:
print(f"Error: Stop recording failed: {e}")
def _transcribe_audio(self, frames):
"""Transcribe audio"""
try:
if not frames:
print("Error: No audio data")
self._update_status("No audio recorded")
return
# Save to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
temp_path = temp_file.name
wf = wave.open(temp_path, 'wb')
wf.setnchannels(self.channels)
wf.setsampwidth(self.pyaudio_instance.get_sample_size(self.audio_format))
wf.setframerate(self.rate)
wf.writeframes(b''.join(frames))
wf.close()
print(f"Transcribing audio file: {temp_path}")
# Transcribe
result = self.whisper_model.transcribe(temp_path, fp16=False)
text = result["text"].strip()
# Cleanup
os.unlink(temp_path)
if text:
print(f"Transcription: {text}")
# Copy to clipboard
self._copy_to_clipboard(text)
self._update_status("Done - Copied to clipboard")
# Show result in console
print(f"Result copied to clipboard: {text}")
else:
print("No speech detected")
self._update_status("No speech detected")
except Exception as e:
print(f"Error: Transcription failed: {e}")
self._update_status("Transcription failed")
def _copy_to_clipboard(self, text):
"""Copy text to clipboard"""
pasteboard = NSPasteboard.generalPasteboard()
pasteboard.clearContents()
pasteboard.setString_forType_(text, NSStringPboardType)
def applicationWillTerminate_(self, notification):
"""Cleanup on quit"""
print("Shutting down...")
# Remove event monitors
if hasattr(self, 'flagsChangedMonitor') and self.flagsChangedMonitor:
NSEvent.removeMonitor_(self.flagsChangedMonitor)
if hasattr(self, 'localFlagsChangedMonitor') and self.localFlagsChangedMonitor:
NSEvent.removeMonitor_(self.localFlagsChangedMonitor)
if self.audio_stream:
self.audio_stream.stop_stream()
self.audio_stream.close()
if self.pyaudio_instance:
self.pyaudio_instance.terminate()
if __name__ == "__main__":
print("Initializing Whisper Menu Bar App...")
app = NSApplication.sharedApplication()
delegate = WhisperMenuBar.alloc().init()
app.setDelegate_(delegate)
print("Starting...")
AppHelper.runEventLoop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment