Skip to content

Instantly share code, notes, and snippets.

@senko
Created November 6, 2025 08:20
Show Gist options
  • Select an option

  • Save senko/c25d1106f7cc0dfd3001368e4c1added to your computer and use it in GitHub Desktop.

Select an option

Save senko/c25d1106f7cc0dfd3001368e4c1added to your computer and use it in GitHub Desktop.
Dictat - simple dictation+transcription app for GNOME in Python, using PipeWire and Whisper
#!/usr/bin/env -S uv run -s
"""
Dictat - A simple dictation and transcription app for GNOME
Records audio using PipeWire (pw-record) and transcribes using local Whisper.
"""
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "openai-whisper>=20250625",
# "pygobject==3.50.0",
# ]
# ///
import gi
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk, Gdk, GLib
import subprocess
import signal
import tempfile
import shutil
import os
from pathlib import Path
from enum import Enum
class AppState(Enum):
"""Application states"""
IDLE = 1
RECORDING = 2
TRANSCRIBING = 3
SHOWING_RESULT = 4
class DictatWindow(Gtk.Window):
"""Main application window"""
def __init__(self):
super().__init__(title="Dictat - Voice Transcription")
# Window setup
self.set_position(Gtk.WindowPosition.CENTER)
self.set_default_size(500, 400)
self.set_border_width(10)
# Connect signals
self.connect("destroy", self.on_destroy)
self.connect("key-press-event", self.on_key_press)
# Initialize state
self.state = AppState.IDLE
self.recording_process = None
self.transcription_process = None
self.temp_dir = None
self.audio_file = None
self.transcript_text = ""
# Create main container
self.vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
self.add(self.vbox)
# Build initial UI
self.build_ui()
self.show_all()
def on_key_press(self, widget, event):
"""Handle keyboard shortcuts"""
# Check for ESC key
if event.keyval == Gdk.KEY_Escape:
Gtk.main_quit()
return True
# Check for Ctrl+W and Ctrl+Q
if (event.state & Gdk.ModifierType.CONTROL_MASK) != 0:
if event.keyval == Gdk.KEY_q or event.keyval == Gdk.KEY_w:
Gtk.main_quit()
return True
return False
def build_ui(self):
"""Build UI based on current state"""
# Clear existing widgets
for child in self.vbox.get_children():
self.vbox.remove(child)
if self.state == AppState.IDLE:
self.build_idle_ui()
elif self.state == AppState.RECORDING:
self.build_recording_ui()
elif self.state == AppState.TRANSCRIBING:
self.build_transcribing_ui()
elif self.state == AppState.SHOWING_RESULT:
self.build_result_ui()
self.vbox.show_all()
def build_idle_ui(self):
"""Build UI for idle state"""
# Add some spacing at the top
self.vbox.pack_start(Gtk.Box(), True, True, 0)
# Start recording button
button = Gtk.Button(label="Start Recording")
button.connect("clicked", self.on_start_recording)
self.vbox.pack_start(button, False, False, 0)
# Add some spacing at the bottom
self.vbox.pack_start(Gtk.Box(), True, True, 0)
def build_recording_ui(self):
"""Build UI for recording state"""
# Add some spacing at the top
self.vbox.pack_start(Gtk.Box(), True, True, 0)
# Recording label
label = Gtk.Label(label="Recording...")
label.set_markup("<big><b>Recording...</b></big>")
self.vbox.pack_start(label, False, False, 10)
# Stop recording button
button = Gtk.Button(label="Stop Recording")
button.connect("clicked", self.on_stop_recording)
self.vbox.pack_start(button, False, False, 0)
# Add some spacing at the bottom
self.vbox.pack_start(Gtk.Box(), True, True, 0)
def build_transcribing_ui(self):
"""Build UI for transcribing state"""
# Add some spacing at the top
self.vbox.pack_start(Gtk.Box(), True, True, 0)
# Spinner
spinner = Gtk.Spinner()
spinner.start()
self.vbox.pack_start(spinner, False, False, 10)
# Transcribing label
label = Gtk.Label(label="Transcribing...")
label.set_markup("<big><b>Transcribing...</b></big>")
self.vbox.pack_start(label, False, False, 10)
# Add some spacing at the bottom
self.vbox.pack_start(Gtk.Box(), True, True, 0)
def build_result_ui(self):
"""Build UI for showing transcription result"""
# Create scrolled window for text view
scrolled_window = Gtk.ScrolledWindow()
scrolled_window.set_hexpand(True)
scrolled_window.set_vexpand(True)
scrolled_window.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
# Create text view
text_view = Gtk.TextView()
text_view.set_wrap_mode(Gtk.WrapMode.WORD)
text_view.set_editable(True)
text_view.set_cursor_visible(True)
text_view.set_left_margin(10)
text_view.set_right_margin(10)
text_view.set_top_margin(10)
text_view.set_bottom_margin(10)
# Set the text
text_buffer = text_view.get_buffer()
text_buffer.set_text(self.transcript_text)
scrolled_window.add(text_view)
self.vbox.pack_start(scrolled_window, True, True, 0)
# Button box for actions
button_box = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10)
button_box.set_halign(Gtk.Align.CENTER)
# Copy to clipboard button
copy_button = Gtk.Button(label="Copy to Clipboard")
copy_button.connect("clicked", self.on_copy_to_clipboard, text_view)
button_box.pack_start(copy_button, False, False, 0)
# New recording button
new_button = Gtk.Button(label="New Recording")
new_button.connect("clicked", self.on_new_recording)
button_box.pack_start(new_button, False, False, 0)
self.vbox.pack_start(button_box, False, False, 0)
def on_start_recording(self, button):
"""Start recording audio"""
try:
# Create temporary directory for this session
self.temp_dir = tempfile.mkdtemp(prefix="dictat_")
self.audio_file = Path(self.temp_dir) / "recording.wav"
# Start pw-record process
self.recording_process = subprocess.Popen(
["pw-record", str(self.audio_file)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
# Update state and UI
self.state = AppState.RECORDING
self.build_ui()
except Exception as e:
self.show_error(f"Failed to start recording: {e}")
def on_stop_recording(self, button):
"""Stop recording and start transcription"""
if self.recording_process is None:
return
try:
# Send SIGINT to pw-record
self.recording_process.send_signal(signal.SIGINT)
# Wait for process to exit
self.recording_process.wait(timeout=5)
self.recording_process = None
# Update state to transcribing
self.state = AppState.TRANSCRIBING
self.build_ui()
# Start transcription in background
GLib.idle_add(self.transcribe_audio)
except subprocess.TimeoutExpired:
# Force kill if it doesn't stop gracefully
self.recording_process.kill()
self.recording_process.wait()
self.recording_process = None
self.show_error("Recording process did not stop gracefully")
except Exception as e:
self.show_error(f"Failed to stop recording: {e}")
def transcribe_audio(self):
"""Start transcribing the recorded audio using Whisper (non-blocking)"""
try:
# Start whisper process without blocking
self.transcription_process = subprocess.Popen(
[
"whisper",
"--model", "turbo",
"--output_format", "txt",
"--task", "transcribe",
str(self.audio_file)
],
cwd=str(self.temp_dir),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
# Start polling to check when transcription is complete
# Check every 200ms to keep UI responsive
GLib.timeout_add(200, self.check_transcription_complete)
except Exception as e:
self.show_error(f"Failed to start transcription: {e}")
return False # Don't repeat this idle callback
def check_transcription_complete(self):
"""Poll transcription process and update UI when complete"""
if self.transcription_process is None:
return False # Stop polling
# Check if process has finished
returncode = self.transcription_process.poll()
if returncode is None:
# Still running, keep polling
return True
# Process has finished
try:
# Get any error output
_, stderr = self.transcription_process.communicate()
# Check if whisper succeeded
if returncode != 0:
error_msg = stderr.decode() if stderr else "Unknown error"
self.show_error(f"Transcription failed: {error_msg}")
return False
# Read the transcript file
transcript_file = Path(self.temp_dir) / "recording.txt"
if transcript_file.exists():
self.transcript_text = transcript_file.read_text().strip()
else:
self.show_error("Transcript file not found")
return False
# Update state and UI to show result
self.state = AppState.SHOWING_RESULT
self.build_ui()
except Exception as e:
self.show_error(f"Transcription error: {e}")
finally:
self.transcription_process = None
return False # Stop polling
def on_copy_to_clipboard(self, button, text_view):
"""Copy transcript to clipboard"""
# Get the text from the text view
text_buffer = text_view.get_buffer()
start_iter = text_buffer.get_start_iter()
end_iter = text_buffer.get_end_iter()
text = text_buffer.get_text(start_iter, end_iter, True)
# Copy to clipboard
clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
clipboard.set_text(text, -1)
# Provide feedback
original_label = button.get_label()
button.set_label("Copied!")
GLib.timeout_add(1000, lambda: button.set_label(original_label))
def on_new_recording(self, button):
"""Start a new recording session"""
# Clean up previous session
self.cleanup_temp_files()
# Reset state
self.transcript_text = ""
self.state = AppState.IDLE
self.build_ui()
def show_error(self, message):
"""Show error message and return to idle state"""
dialog = Gtk.MessageDialog(
transient_for=self,
flags=0,
message_type=Gtk.MessageType.ERROR,
buttons=Gtk.ButtonsType.OK,
text="Error",
)
dialog.format_secondary_text(message)
dialog.run()
dialog.destroy()
# Clean up and return to idle
self.cleanup_temp_files()
self.state = AppState.IDLE
self.build_ui()
def cleanup_temp_files(self):
"""Clean up temporary files"""
if self.temp_dir and os.path.exists(self.temp_dir):
try:
shutil.rmtree(self.temp_dir)
except Exception as e:
print(f"Error cleaning up temp files: {e}")
finally:
self.temp_dir = None
self.audio_file = None
def on_destroy(self, widget):
"""Handle window destruction"""
# Kill any running processes
if self.recording_process:
try:
self.recording_process.kill()
self.recording_process.wait()
except:
pass
if self.transcription_process:
try:
self.transcription_process.kill()
self.transcription_process.wait()
except:
pass
# Clean up temp files
self.cleanup_temp_files()
Gtk.main_quit()
def main():
"""Main entry point"""
window = DictatWindow()
Gtk.main()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment