Created
November 6, 2025 08:20
-
-
Save senko/c25d1106f7cc0dfd3001368e4c1added to your computer and use it in GitHub Desktop.
Dictat - simple dictation+transcription app for GNOME in Python, using PipeWire and Whisper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run -s | |
| """ | |
| Dictat - A simple dictation and transcription app for GNOME | |
| Records audio using PipeWire (pw-record) and transcribes using local Whisper. | |
| """ | |
| # /// script | |
| # requires-python = ">=3.13" | |
| # dependencies = [ | |
| # "openai-whisper>=20250625", | |
| # "pygobject==3.50.0", | |
| # ] | |
| # /// | |
| import gi | |
| gi.require_version('Gtk', '3.0') | |
| from gi.repository import Gtk, Gdk, GLib | |
| import subprocess | |
| import signal | |
| import tempfile | |
| import shutil | |
| import os | |
| from pathlib import Path | |
| from enum import Enum | |
| class AppState(Enum): | |
| """Application states""" | |
| IDLE = 1 | |
| RECORDING = 2 | |
| TRANSCRIBING = 3 | |
| SHOWING_RESULT = 4 | |
| class DictatWindow(Gtk.Window): | |
| """Main application window""" | |
| def __init__(self): | |
| super().__init__(title="Dictat - Voice Transcription") | |
| # Window setup | |
| self.set_position(Gtk.WindowPosition.CENTER) | |
| self.set_default_size(500, 400) | |
| self.set_border_width(10) | |
| # Connect signals | |
| self.connect("destroy", self.on_destroy) | |
| self.connect("key-press-event", self.on_key_press) | |
| # Initialize state | |
| self.state = AppState.IDLE | |
| self.recording_process = None | |
| self.transcription_process = None | |
| self.temp_dir = None | |
| self.audio_file = None | |
| self.transcript_text = "" | |
| # Create main container | |
| self.vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) | |
| self.add(self.vbox) | |
| # Build initial UI | |
| self.build_ui() | |
| self.show_all() | |
| def on_key_press(self, widget, event): | |
| """Handle keyboard shortcuts""" | |
| # Check for ESC key | |
| if event.keyval == Gdk.KEY_Escape: | |
| Gtk.main_quit() | |
| return True | |
| # Check for Ctrl+W and Ctrl+Q | |
| if (event.state & Gdk.ModifierType.CONTROL_MASK) != 0: | |
| if event.keyval == Gdk.KEY_q or event.keyval == Gdk.KEY_w: | |
| Gtk.main_quit() | |
| return True | |
| return False | |
| def build_ui(self): | |
| """Build UI based on current state""" | |
| # Clear existing widgets | |
| for child in self.vbox.get_children(): | |
| self.vbox.remove(child) | |
| if self.state == AppState.IDLE: | |
| self.build_idle_ui() | |
| elif self.state == AppState.RECORDING: | |
| self.build_recording_ui() | |
| elif self.state == AppState.TRANSCRIBING: | |
| self.build_transcribing_ui() | |
| elif self.state == AppState.SHOWING_RESULT: | |
| self.build_result_ui() | |
| self.vbox.show_all() | |
| def build_idle_ui(self): | |
| """Build UI for idle state""" | |
| # Add some spacing at the top | |
| self.vbox.pack_start(Gtk.Box(), True, True, 0) | |
| # Start recording button | |
| button = Gtk.Button(label="Start Recording") | |
| button.connect("clicked", self.on_start_recording) | |
| self.vbox.pack_start(button, False, False, 0) | |
| # Add some spacing at the bottom | |
| self.vbox.pack_start(Gtk.Box(), True, True, 0) | |
| def build_recording_ui(self): | |
| """Build UI for recording state""" | |
| # Add some spacing at the top | |
| self.vbox.pack_start(Gtk.Box(), True, True, 0) | |
| # Recording label | |
| label = Gtk.Label(label="Recording...") | |
| label.set_markup("<big><b>Recording...</b></big>") | |
| self.vbox.pack_start(label, False, False, 10) | |
| # Stop recording button | |
| button = Gtk.Button(label="Stop Recording") | |
| button.connect("clicked", self.on_stop_recording) | |
| self.vbox.pack_start(button, False, False, 0) | |
| # Add some spacing at the bottom | |
| self.vbox.pack_start(Gtk.Box(), True, True, 0) | |
| def build_transcribing_ui(self): | |
| """Build UI for transcribing state""" | |
| # Add some spacing at the top | |
| self.vbox.pack_start(Gtk.Box(), True, True, 0) | |
| # Spinner | |
| spinner = Gtk.Spinner() | |
| spinner.start() | |
| self.vbox.pack_start(spinner, False, False, 10) | |
| # Transcribing label | |
| label = Gtk.Label(label="Transcribing...") | |
| label.set_markup("<big><b>Transcribing...</b></big>") | |
| self.vbox.pack_start(label, False, False, 10) | |
| # Add some spacing at the bottom | |
| self.vbox.pack_start(Gtk.Box(), True, True, 0) | |
| def build_result_ui(self): | |
| """Build UI for showing transcription result""" | |
| # Create scrolled window for text view | |
| scrolled_window = Gtk.ScrolledWindow() | |
| scrolled_window.set_hexpand(True) | |
| scrolled_window.set_vexpand(True) | |
| scrolled_window.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC) | |
| # Create text view | |
| text_view = Gtk.TextView() | |
| text_view.set_wrap_mode(Gtk.WrapMode.WORD) | |
| text_view.set_editable(True) | |
| text_view.set_cursor_visible(True) | |
| text_view.set_left_margin(10) | |
| text_view.set_right_margin(10) | |
| text_view.set_top_margin(10) | |
| text_view.set_bottom_margin(10) | |
| # Set the text | |
| text_buffer = text_view.get_buffer() | |
| text_buffer.set_text(self.transcript_text) | |
| scrolled_window.add(text_view) | |
| self.vbox.pack_start(scrolled_window, True, True, 0) | |
| # Button box for actions | |
| button_box = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10) | |
| button_box.set_halign(Gtk.Align.CENTER) | |
| # Copy to clipboard button | |
| copy_button = Gtk.Button(label="Copy to Clipboard") | |
| copy_button.connect("clicked", self.on_copy_to_clipboard, text_view) | |
| button_box.pack_start(copy_button, False, False, 0) | |
| # New recording button | |
| new_button = Gtk.Button(label="New Recording") | |
| new_button.connect("clicked", self.on_new_recording) | |
| button_box.pack_start(new_button, False, False, 0) | |
| self.vbox.pack_start(button_box, False, False, 0) | |
| def on_start_recording(self, button): | |
| """Start recording audio""" | |
| try: | |
| # Create temporary directory for this session | |
| self.temp_dir = tempfile.mkdtemp(prefix="dictat_") | |
| self.audio_file = Path(self.temp_dir) / "recording.wav" | |
| # Start pw-record process | |
| self.recording_process = subprocess.Popen( | |
| ["pw-record", str(self.audio_file)], | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.PIPE | |
| ) | |
| # Update state and UI | |
| self.state = AppState.RECORDING | |
| self.build_ui() | |
| except Exception as e: | |
| self.show_error(f"Failed to start recording: {e}") | |
| def on_stop_recording(self, button): | |
| """Stop recording and start transcription""" | |
| if self.recording_process is None: | |
| return | |
| try: | |
| # Send SIGINT to pw-record | |
| self.recording_process.send_signal(signal.SIGINT) | |
| # Wait for process to exit | |
| self.recording_process.wait(timeout=5) | |
| self.recording_process = None | |
| # Update state to transcribing | |
| self.state = AppState.TRANSCRIBING | |
| self.build_ui() | |
| # Start transcription in background | |
| GLib.idle_add(self.transcribe_audio) | |
| except subprocess.TimeoutExpired: | |
| # Force kill if it doesn't stop gracefully | |
| self.recording_process.kill() | |
| self.recording_process.wait() | |
| self.recording_process = None | |
| self.show_error("Recording process did not stop gracefully") | |
| except Exception as e: | |
| self.show_error(f"Failed to stop recording: {e}") | |
| def transcribe_audio(self): | |
| """Start transcribing the recorded audio using Whisper (non-blocking)""" | |
| try: | |
| # Start whisper process without blocking | |
| self.transcription_process = subprocess.Popen( | |
| [ | |
| "whisper", | |
| "--model", "turbo", | |
| "--output_format", "txt", | |
| "--task", "transcribe", | |
| str(self.audio_file) | |
| ], | |
| cwd=str(self.temp_dir), | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.PIPE | |
| ) | |
| # Start polling to check when transcription is complete | |
| # Check every 200ms to keep UI responsive | |
| GLib.timeout_add(200, self.check_transcription_complete) | |
| except Exception as e: | |
| self.show_error(f"Failed to start transcription: {e}") | |
| return False # Don't repeat this idle callback | |
| def check_transcription_complete(self): | |
| """Poll transcription process and update UI when complete""" | |
| if self.transcription_process is None: | |
| return False # Stop polling | |
| # Check if process has finished | |
| returncode = self.transcription_process.poll() | |
| if returncode is None: | |
| # Still running, keep polling | |
| return True | |
| # Process has finished | |
| try: | |
| # Get any error output | |
| _, stderr = self.transcription_process.communicate() | |
| # Check if whisper succeeded | |
| if returncode != 0: | |
| error_msg = stderr.decode() if stderr else "Unknown error" | |
| self.show_error(f"Transcription failed: {error_msg}") | |
| return False | |
| # Read the transcript file | |
| transcript_file = Path(self.temp_dir) / "recording.txt" | |
| if transcript_file.exists(): | |
| self.transcript_text = transcript_file.read_text().strip() | |
| else: | |
| self.show_error("Transcript file not found") | |
| return False | |
| # Update state and UI to show result | |
| self.state = AppState.SHOWING_RESULT | |
| self.build_ui() | |
| except Exception as e: | |
| self.show_error(f"Transcription error: {e}") | |
| finally: | |
| self.transcription_process = None | |
| return False # Stop polling | |
| def on_copy_to_clipboard(self, button, text_view): | |
| """Copy transcript to clipboard""" | |
| # Get the text from the text view | |
| text_buffer = text_view.get_buffer() | |
| start_iter = text_buffer.get_start_iter() | |
| end_iter = text_buffer.get_end_iter() | |
| text = text_buffer.get_text(start_iter, end_iter, True) | |
| # Copy to clipboard | |
| clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD) | |
| clipboard.set_text(text, -1) | |
| # Provide feedback | |
| original_label = button.get_label() | |
| button.set_label("Copied!") | |
| GLib.timeout_add(1000, lambda: button.set_label(original_label)) | |
| def on_new_recording(self, button): | |
| """Start a new recording session""" | |
| # Clean up previous session | |
| self.cleanup_temp_files() | |
| # Reset state | |
| self.transcript_text = "" | |
| self.state = AppState.IDLE | |
| self.build_ui() | |
| def show_error(self, message): | |
| """Show error message and return to idle state""" | |
| dialog = Gtk.MessageDialog( | |
| transient_for=self, | |
| flags=0, | |
| message_type=Gtk.MessageType.ERROR, | |
| buttons=Gtk.ButtonsType.OK, | |
| text="Error", | |
| ) | |
| dialog.format_secondary_text(message) | |
| dialog.run() | |
| dialog.destroy() | |
| # Clean up and return to idle | |
| self.cleanup_temp_files() | |
| self.state = AppState.IDLE | |
| self.build_ui() | |
| def cleanup_temp_files(self): | |
| """Clean up temporary files""" | |
| if self.temp_dir and os.path.exists(self.temp_dir): | |
| try: | |
| shutil.rmtree(self.temp_dir) | |
| except Exception as e: | |
| print(f"Error cleaning up temp files: {e}") | |
| finally: | |
| self.temp_dir = None | |
| self.audio_file = None | |
| def on_destroy(self, widget): | |
| """Handle window destruction""" | |
| # Kill any running processes | |
| if self.recording_process: | |
| try: | |
| self.recording_process.kill() | |
| self.recording_process.wait() | |
| except: | |
| pass | |
| if self.transcription_process: | |
| try: | |
| self.transcription_process.kill() | |
| self.transcription_process.wait() | |
| except: | |
| pass | |
| # Clean up temp files | |
| self.cleanup_temp_files() | |
| Gtk.main_quit() | |
| def main(): | |
| """Main entry point""" | |
| window = DictatWindow() | |
| Gtk.main() | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment