senko · November 6, 2025 08:20
diff --git a/dictat.py b/dictat.py
 #!/usr/bin/env -S uv run -s
 """
 Dictat - A simple dictation and transcription app for GNOME

 Records audio using PipeWire (pw-record) and transcribes using local Whisper.
 """

 # /// script
 # requires-python = ">=3.13"
 # dependencies = [
 #     "openai-whisper>=20250625",
 #     "pygobject==3.50.0",
 # ]
 # ///

 import gi
 gi.require_version('Gtk', '3.0')
 from gi.repository import Gtk, Gdk, GLib

 import subprocess
 import signal
 import tempfile
 import shutil
 import os
 from pathlib import Path
 from enum import Enum


 class AppState(Enum):
    """Application states"""
    IDLE = 1
    RECORDING = 2
    TRANSCRIBING = 3
    SHOWING_RESULT = 4


 class DictatWindow(Gtk.Window):
    """Main application window"""

    def __init__(self):
        super().__init__(title="Dictat - Voice Transcription")

        # Window setup
        self.set_position(Gtk.WindowPosition.CENTER)
        self.set_default_size(500, 400)
        self.set_border_width(10)

        # Connect signals
        self.connect("destroy", self.on_destroy)
        self.connect("key-press-event", self.on_key_press)

        # Initialize state
        self.state = AppState.IDLE
        self.recording_process = None
        self.transcription_process = None
        self.temp_dir = None
        self.audio_file = None
        self.transcript_text = ""

        # Create main container
        self.vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
        self.add(self.vbox)

        # Build initial UI
        self.build_ui()

        self.show_all()

    def on_key_press(self, widget, event):
        """Handle keyboard shortcuts"""
        # Check for ESC key
        if event.keyval == Gdk.KEY_Escape:
            Gtk.main_quit()
            return True

        # Check for Ctrl+W and Ctrl+Q
        if (event.state & Gdk.ModifierType.CONTROL_MASK) != 0:
            if event.keyval == Gdk.KEY_q or event.keyval == Gdk.KEY_w:
                Gtk.main_quit()
                return True

        return False

    def build_ui(self):
        """Build UI based on current state"""
        # Clear existing widgets
        for child in self.vbox.get_children():
            self.vbox.remove(child)

        if self.state == AppState.IDLE:
            self.build_idle_ui()
        elif self.state == AppState.RECORDING:
            self.build_recording_ui()
        elif self.state == AppState.TRANSCRIBING:
            self.build_transcribing_ui()
        elif self.state == AppState.SHOWING_RESULT:
            self.build_result_ui()

        self.vbox.show_all()

    def build_idle_ui(self):
        """Build UI for idle state"""
        # Add some spacing at the top
        self.vbox.pack_start(Gtk.Box(), True, True, 0)

        # Start recording button
        button = Gtk.Button(label="Start Recording")
        button.connect("clicked", self.on_start_recording)
        self.vbox.pack_start(button, False, False, 0)

        # Add some spacing at the bottom
        self.vbox.pack_start(Gtk.Box(), True, True, 0)

    def build_recording_ui(self):
        """Build UI for recording state"""
        # Add some spacing at the top
        self.vbox.pack_start(Gtk.Box(), True, True, 0)

        # Recording label
        label = Gtk.Label(label="Recording...")
        label.set_markup("<big><b>Recording...</b></big>")
        self.vbox.pack_start(label, False, False, 10)

        # Stop recording button
        button = Gtk.Button(label="Stop Recording")
        button.connect("clicked", self.on_stop_recording)
        self.vbox.pack_start(button, False, False, 0)

        # Add some spacing at the bottom
        self.vbox.pack_start(Gtk.Box(), True, True, 0)

    def build_transcribing_ui(self):
        """Build UI for transcribing state"""
        # Add some spacing at the top
        self.vbox.pack_start(Gtk.Box(), True, True, 0)

        # Spinner
        spinner = Gtk.Spinner()
        spinner.start()
        self.vbox.pack_start(spinner, False, False, 10)

        # Transcribing label
        label = Gtk.Label(label="Transcribing...")
        label.set_markup("<big><b>Transcribing...</b></big>")
        self.vbox.pack_start(label, False, False, 10)

        # Add some spacing at the bottom
        self.vbox.pack_start(Gtk.Box(), True, True, 0)

    def build_result_ui(self):
        """Build UI for showing transcription result"""
        # Create scrolled window for text view
        scrolled_window = Gtk.ScrolledWindow()
        scrolled_window.set_hexpand(True)
        scrolled_window.set_vexpand(True)
        scrolled_window.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)

        # Create text view
        text_view = Gtk.TextView()
        text_view.set_wrap_mode(Gtk.WrapMode.WORD)
        text_view.set_editable(True)
        text_view.set_cursor_visible(True)
        text_view.set_left_margin(10)
        text_view.set_right_margin(10)
        text_view.set_top_margin(10)
        text_view.set_bottom_margin(10)

        # Set the text
        text_buffer = text_view.get_buffer()
        text_buffer.set_text(self.transcript_text)

        scrolled_window.add(text_view)
        self.vbox.pack_start(scrolled_window, True, True, 0)

        # Button box for actions
        button_box = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10)
        button_box.set_halign(Gtk.Align.CENTER)

        # Copy to clipboard button
        copy_button = Gtk.Button(label="Copy to Clipboard")
        copy_button.connect("clicked", self.on_copy_to_clipboard, text_view)
        button_box.pack_start(copy_button, False, False, 0)

        # New recording button
        new_button = Gtk.Button(label="New Recording")
        new_button.connect("clicked", self.on_new_recording)
        button_box.pack_start(new_button, False, False, 0)

        self.vbox.pack_start(button_box, False, False, 0)

    def on_start_recording(self, button):
        """Start recording audio"""
        try:
            # Create temporary directory for this session
            self.temp_dir = tempfile.mkdtemp(prefix="dictat_")
            self.audio_file = Path(self.temp_dir) / "recording.wav"

            # Start pw-record process
            self.recording_process = subprocess.Popen(
                ["pw-record", str(self.audio_file)],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )

            # Update state and UI
            self.state = AppState.RECORDING
            self.build_ui()

        except Exception as e:
            self.show_error(f"Failed to start recording: {e}")

    def on_stop_recording(self, button):
        """Stop recording and start transcription"""
        if self.recording_process is None:
            return

        try:
            # Send SIGINT to pw-record
            self.recording_process.send_signal(signal.SIGINT)

            # Wait for process to exit
            self.recording_process.wait(timeout=5)
            self.recording_process = None

            # Update state to transcribing
            self.state = AppState.TRANSCRIBING
            self.build_ui()

            # Start transcription in background
            GLib.idle_add(self.transcribe_audio)

        except subprocess.TimeoutExpired:
            # Force kill if it doesn't stop gracefully
            self.recording_process.kill()
            self.recording_process.wait()
            self.recording_process = None
            self.show_error("Recording process did not stop gracefully")
        except Exception as e:
            self.show_error(f"Failed to stop recording: {e}")

    def transcribe_audio(self):
        """Start transcribing the recorded audio using Whisper (non-blocking)"""
        try:
            # Start whisper process without blocking
            self.transcription_process = subprocess.Popen(
                [
                    "whisper",
                    "--model", "turbo",
                    "--output_format", "txt",
                    "--task", "transcribe",
                    str(self.audio_file)
                ],
                cwd=str(self.temp_dir),
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )

            # Start polling to check when transcription is complete
            # Check every 200ms to keep UI responsive
            GLib.timeout_add(200, self.check_transcription_complete)

        except Exception as e:
            self.show_error(f"Failed to start transcription: {e}")

        return False  # Don't repeat this idle callback

    def check_transcription_complete(self):
        """Poll transcription process and update UI when complete"""
        if self.transcription_process is None:
            return False  # Stop polling

        # Check if process has finished
        returncode = self.transcription_process.poll()

        if returncode is None:
            # Still running, keep polling
            return True

        # Process has finished
        try:
            # Get any error output
            _, stderr = self.transcription_process.communicate()

            # Check if whisper succeeded
            if returncode != 0:
                error_msg = stderr.decode() if stderr else "Unknown error"
                self.show_error(f"Transcription failed: {error_msg}")
                return False

            # Read the transcript file
            transcript_file = Path(self.temp_dir) / "recording.txt"
            if transcript_file.exists():
                self.transcript_text = transcript_file.read_text().strip()
            else:
                self.show_error("Transcript file not found")
                return False

            # Update state and UI to show result
            self.state = AppState.SHOWING_RESULT
            self.build_ui()

        except Exception as e:
            self.show_error(f"Transcription error: {e}")
        finally:
            self.transcription_process = None

        return False  # Stop polling

    def on_copy_to_clipboard(self, button, text_view):
        """Copy transcript to clipboard"""
        # Get the text from the text view
        text_buffer = text_view.get_buffer()
        start_iter = text_buffer.get_start_iter()
        end_iter = text_buffer.get_end_iter()
        text = text_buffer.get_text(start_iter, end_iter, True)

        # Copy to clipboard
        clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
        clipboard.set_text(text, -1)

        # Provide feedback
        original_label = button.get_label()
        button.set_label("Copied!")
        GLib.timeout_add(1000, lambda: button.set_label(original_label))

    def on_new_recording(self, button):
        """Start a new recording session"""
        # Clean up previous session
        self.cleanup_temp_files()

        # Reset state
        self.transcript_text = ""
        self.state = AppState.IDLE
        self.build_ui()

    def show_error(self, message):
        """Show error message and return to idle state"""
        dialog = Gtk.MessageDialog(
            transient_for=self,
            flags=0,
            message_type=Gtk.MessageType.ERROR,
            buttons=Gtk.ButtonsType.OK,
            text="Error",
        )
        dialog.format_secondary_text(message)
        dialog.run()
        dialog.destroy()

        # Clean up and return to idle
        self.cleanup_temp_files()
        self.state = AppState.IDLE
        self.build_ui()

    def cleanup_temp_files(self):
        """Clean up temporary files"""
        if self.temp_dir and os.path.exists(self.temp_dir):
            try:
                shutil.rmtree(self.temp_dir)
            except Exception as e:
                print(f"Error cleaning up temp files: {e}")
            finally:
                self.temp_dir = None
                self.audio_file = None

    def on_destroy(self, widget):
        """Handle window destruction"""
        # Kill any running processes
        if self.recording_process:
            try:
                self.recording_process.kill()
                self.recording_process.wait()
            except:
                pass

        if self.transcription_process:
            try:
                self.transcription_process.kill()
                self.transcription_process.wait()
            except:
                pass

        # Clean up temp files
        self.cleanup_temp_files()

        Gtk.main_quit()


 def main():
    """Main entry point"""
    window = DictatWindow()
    Gtk.main()


 if __name__ == "__main__":
    main()
	#!/usr/bin/env -S uv run -s
	"""
	Dictat - A simple dictation and transcription app for GNOME

	Records audio using PipeWire (pw-record) and transcribes using local Whisper.
	"""

	# /// script
	# requires-python = ">=3.13"
	# dependencies = [
	# "openai-whisper>=20250625",
	# "pygobject==3.50.0",
	# ]
	# ///

	import gi
	gi.require_version('Gtk', '3.0')
	from gi.repository import Gtk, Gdk, GLib

	import subprocess
	import signal
	import tempfile
	import shutil
	import os
	from pathlib import Path
	from enum import Enum


	class AppState(Enum):
	"""Application states"""
	IDLE = 1
	RECORDING = 2
	TRANSCRIBING = 3
	SHOWING_RESULT = 4


	class DictatWindow(Gtk.Window):
	"""Main application window"""

	def __init__(self):
	super().__init__(title="Dictat - Voice Transcription")

	# Window setup
	self.set_position(Gtk.WindowPosition.CENTER)
	self.set_default_size(500, 400)
	self.set_border_width(10)

	# Connect signals
	self.connect("destroy", self.on_destroy)
	self.connect("key-press-event", self.on_key_press)

	# Initialize state
	self.state = AppState.IDLE
	self.recording_process = None
	self.transcription_process = None
	self.temp_dir = None
	self.audio_file = None
	self.transcript_text = ""

	# Create main container
	self.vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
	self.add(self.vbox)

	# Build initial UI
	self.build_ui()

	self.show_all()

	def on_key_press(self, widget, event):
	"""Handle keyboard shortcuts"""
	# Check for ESC key
	if event.keyval == Gdk.KEY_Escape:
	Gtk.main_quit()
	return True

	# Check for Ctrl+W and Ctrl+Q
	if (event.state & Gdk.ModifierType.CONTROL_MASK) != 0:
	if event.keyval == Gdk.KEY_q or event.keyval == Gdk.KEY_w:
	Gtk.main_quit()
	return True

	return False

	def build_ui(self):
	"""Build UI based on current state"""
	# Clear existing widgets
	for child in self.vbox.get_children():
	self.vbox.remove(child)

	if self.state == AppState.IDLE:
	self.build_idle_ui()
	elif self.state == AppState.RECORDING:
	self.build_recording_ui()
	elif self.state == AppState.TRANSCRIBING:
	self.build_transcribing_ui()
	elif self.state == AppState.SHOWING_RESULT:
	self.build_result_ui()

	self.vbox.show_all()

	def build_idle_ui(self):
	"""Build UI for idle state"""
	# Add some spacing at the top
	self.vbox.pack_start(Gtk.Box(), True, True, 0)

	# Start recording button
	button = Gtk.Button(label="Start Recording")
	button.connect("clicked", self.on_start_recording)
	self.vbox.pack_start(button, False, False, 0)

	# Add some spacing at the bottom
	self.vbox.pack_start(Gtk.Box(), True, True, 0)

	def build_recording_ui(self):
	"""Build UI for recording state"""
	# Add some spacing at the top
	self.vbox.pack_start(Gtk.Box(), True, True, 0)

	# Recording label
	label = Gtk.Label(label="Recording...")
	label.set_markup("<big><b>Recording...</b></big>")
	self.vbox.pack_start(label, False, False, 10)

	# Stop recording button
	button = Gtk.Button(label="Stop Recording")
	button.connect("clicked", self.on_stop_recording)
	self.vbox.pack_start(button, False, False, 0)

	# Add some spacing at the bottom
	self.vbox.pack_start(Gtk.Box(), True, True, 0)

	def build_transcribing_ui(self):
	"""Build UI for transcribing state"""
	# Add some spacing at the top
	self.vbox.pack_start(Gtk.Box(), True, True, 0)

	# Spinner
	spinner = Gtk.Spinner()
	spinner.start()
	self.vbox.pack_start(spinner, False, False, 10)

	# Transcribing label
	label = Gtk.Label(label="Transcribing...")
	label.set_markup("<big><b>Transcribing...</b></big>")
	self.vbox.pack_start(label, False, False, 10)

	# Add some spacing at the bottom
	self.vbox.pack_start(Gtk.Box(), True, True, 0)

	def build_result_ui(self):
	"""Build UI for showing transcription result"""
	# Create scrolled window for text view
	scrolled_window = Gtk.ScrolledWindow()
	scrolled_window.set_hexpand(True)
	scrolled_window.set_vexpand(True)
	scrolled_window.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)

	# Create text view
	text_view = Gtk.TextView()
	text_view.set_wrap_mode(Gtk.WrapMode.WORD)
	text_view.set_editable(True)
	text_view.set_cursor_visible(True)
	text_view.set_left_margin(10)
	text_view.set_right_margin(10)
	text_view.set_top_margin(10)
	text_view.set_bottom_margin(10)

	# Set the text
	text_buffer = text_view.get_buffer()
	text_buffer.set_text(self.transcript_text)

	scrolled_window.add(text_view)
	self.vbox.pack_start(scrolled_window, True, True, 0)

	# Button box for actions
	button_box = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10)
	button_box.set_halign(Gtk.Align.CENTER)

	# Copy to clipboard button
	copy_button = Gtk.Button(label="Copy to Clipboard")
	copy_button.connect("clicked", self.on_copy_to_clipboard, text_view)
	button_box.pack_start(copy_button, False, False, 0)

	# New recording button
	new_button = Gtk.Button(label="New Recording")
	new_button.connect("clicked", self.on_new_recording)
	button_box.pack_start(new_button, False, False, 0)

	self.vbox.pack_start(button_box, False, False, 0)

	def on_start_recording(self, button):
	"""Start recording audio"""
	try:
	# Create temporary directory for this session
	self.temp_dir = tempfile.mkdtemp(prefix="dictat_")
	self.audio_file = Path(self.temp_dir) / "recording.wav"

	# Start pw-record process
	self.recording_process = subprocess.Popen(
	["pw-record", str(self.audio_file)],
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE
	)

	# Update state and UI
	self.state = AppState.RECORDING
	self.build_ui()

	except Exception as e:
	self.show_error(f"Failed to start recording: {e}")

	def on_stop_recording(self, button):
	"""Stop recording and start transcription"""
	if self.recording_process is None:
	return

	try:
	# Send SIGINT to pw-record
	self.recording_process.send_signal(signal.SIGINT)

	# Wait for process to exit
	self.recording_process.wait(timeout=5)
	self.recording_process = None

	# Update state to transcribing
	self.state = AppState.TRANSCRIBING
	self.build_ui()

	# Start transcription in background
	GLib.idle_add(self.transcribe_audio)

	except subprocess.TimeoutExpired:
	# Force kill if it doesn't stop gracefully
	self.recording_process.kill()
	self.recording_process.wait()
	self.recording_process = None
	self.show_error("Recording process did not stop gracefully")
	except Exception as e:
	self.show_error(f"Failed to stop recording: {e}")

	def transcribe_audio(self):
	"""Start transcribing the recorded audio using Whisper (non-blocking)"""
	try:
	# Start whisper process without blocking
	self.transcription_process = subprocess.Popen(
	[
	"whisper",
	"--model", "turbo",
	"--output_format", "txt",
	"--task", "transcribe",
	str(self.audio_file)
	],
	cwd=str(self.temp_dir),
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE
	)

	# Start polling to check when transcription is complete
	# Check every 200ms to keep UI responsive
	GLib.timeout_add(200, self.check_transcription_complete)

	except Exception as e:
	self.show_error(f"Failed to start transcription: {e}")

	return False # Don't repeat this idle callback

	def check_transcription_complete(self):
	"""Poll transcription process and update UI when complete"""
	if self.transcription_process is None:
	return False # Stop polling

	# Check if process has finished
	returncode = self.transcription_process.poll()

	if returncode is None:
	# Still running, keep polling
	return True

	# Process has finished
	try:
	# Get any error output
	_, stderr = self.transcription_process.communicate()

	# Check if whisper succeeded
	if returncode != 0:
	error_msg = stderr.decode() if stderr else "Unknown error"
	self.show_error(f"Transcription failed: {error_msg}")
	return False

	# Read the transcript file
	transcript_file = Path(self.temp_dir) / "recording.txt"
	if transcript_file.exists():
	self.transcript_text = transcript_file.read_text().strip()
	else:
	self.show_error("Transcript file not found")
	return False

	# Update state and UI to show result
	self.state = AppState.SHOWING_RESULT
	self.build_ui()

	except Exception as e:
	self.show_error(f"Transcription error: {e}")
	finally:
	self.transcription_process = None

	return False # Stop polling

	def on_copy_to_clipboard(self, button, text_view):
	"""Copy transcript to clipboard"""
	# Get the text from the text view
	text_buffer = text_view.get_buffer()
	start_iter = text_buffer.get_start_iter()
	end_iter = text_buffer.get_end_iter()
	text = text_buffer.get_text(start_iter, end_iter, True)

	# Copy to clipboard
	clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
	clipboard.set_text(text, -1)

	# Provide feedback
	original_label = button.get_label()
	button.set_label("Copied!")
	GLib.timeout_add(1000, lambda: button.set_label(original_label))

	def on_new_recording(self, button):
	"""Start a new recording session"""
	# Clean up previous session
	self.cleanup_temp_files()

	# Reset state
	self.transcript_text = ""
	self.state = AppState.IDLE
	self.build_ui()

	def show_error(self, message):
	"""Show error message and return to idle state"""
	dialog = Gtk.MessageDialog(
	transient_for=self,
	flags=0,
	message_type=Gtk.MessageType.ERROR,
	buttons=Gtk.ButtonsType.OK,
	text="Error",
	)
	dialog.format_secondary_text(message)
	dialog.run()
	dialog.destroy()

	# Clean up and return to idle
	self.cleanup_temp_files()
	self.state = AppState.IDLE
	self.build_ui()

	def cleanup_temp_files(self):
	"""Clean up temporary files"""
	if self.temp_dir and os.path.exists(self.temp_dir):
	try:
	shutil.rmtree(self.temp_dir)
	except Exception as e:
	print(f"Error cleaning up temp files: {e}")
	finally:
	self.temp_dir = None
	self.audio_file = None

	def on_destroy(self, widget):
	"""Handle window destruction"""
	# Kill any running processes
	if self.recording_process:
	try:
	self.recording_process.kill()
	self.recording_process.wait()
	except:
	pass

	if self.transcription_process:
	try:
	self.transcription_process.kill()
	self.transcription_process.wait()
	except:
	pass

	# Clean up temp files
	self.cleanup_temp_files()

	Gtk.main_quit()


	def main():
	"""Main entry point"""
	window = DictatWindow()
	Gtk.main()


	if __name__ == "__main__":
	main()
No results found