ericboehs · May 27, 2026 17:58 · ericboehs · May 27, 2026
diff --git a/kk b/kk
 #!/usr/bin/env bash
 # kk — thin client for the kokorod daemon, with auto-launch.
 #
 # Sends a speak request over /tmp/kokoro.sock. If the daemon isn't
 # running, launches it in the background and waits for the socket to
 # come up before sending.
 #
 # Usage:
 #   kk "Hello, world."
 #   echo "text" | kk
 #   kk -v bm_fable "Good morning."
 #   kk -s 1.2 "Slightly faster."
 #
 # Pause/stop: kokoro-pause / kokoro-stop
 # Stop the daemon: kk --stop-daemon

 set -euo pipefail

 SOCKET="/tmp/kokoro.sock"
 VOICE="af_heart"
 SPEED="1.0"
 LAUNCH_TIMEOUT="${KK_LAUNCH_TIMEOUT:-60}"

 SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
 KOKOROD="${SCRIPT_DIR}/kokorod"
 LOG_DIR="${HOME}/.cache/kokorod"
 LOG_FILE="${LOG_DIR}/daemon.log"

 usage() {
    cat <<EOF
 kk — thin client for the kokorod daemon.

 Usage:
  kk "text"               speak text
  echo "text" | kk        pipe input
  kk -v VOICE "text"      voice override (default: af_heart)
  kk -s SPEED "text"      speed multiplier (default: 1.0)
  kk --stop-daemon        stop the running daemon

 If the daemon isn't running, kk launches it automatically (first call
 takes ~5s for model load + audio device engagement). Subsequent calls
 are near-instant.

 Pause/stop playback: kokoro-pause / kokoro-stop
 Daemon log: ${LOG_FILE}
 EOF
 }

 stop_daemon() {
    if [[ ! -S "$SOCKET" ]]; then
        echo "kk: daemon is not running" >&2
        return 1
    fi
    # Find the kokorod python process by socket. Simpler: pgrep the script.
    if pkill -f "kokorod\.py" 2>/dev/null; then
        echo "kk: stopped daemon" >&2
    else
        echo "kk: no kokorod process found" >&2
    fi
    rm -f "$SOCKET"
 }

 ensure_daemon() {
    [[ -S "$SOCKET" ]] && return 0

    [[ -x "$KOKOROD" ]] || {
        echo "kk: daemon binary not found at $KOKOROD" >&2
        exit 1
    }

    mkdir -p "$LOG_DIR"
    echo "kk: launching kokorod (log: $LOG_FILE)..." >&2
    HF_HUB_OFFLINE=1 nohup "$KOKOROD" >>"$LOG_FILE" 2>&1 </dev/null &
    disown

    # Wait for socket to appear (daemon creates it as the last startup step).
    local waited=0
    while (( waited < LAUNCH_TIMEOUT )); do
        if [[ -S "$SOCKET" ]]; then
            echo "kk: daemon ready after ${waited}s" >&2
            return 0
        fi
        sleep 1
        waited=$((waited + 1))
    done

    echo "kk: daemon did not start within ${LAUNCH_TIMEOUT}s — see $LOG_FILE" >&2
    exit 1
 }

 while [[ $# -gt 0 ]]; do
    case "$1" in
        -v|--voice) VOICE="$2"; shift 2 ;;
        -s|--speed) SPEED="$2"; shift 2 ;;
        -h|--help) usage; exit 0 ;;
        --stop-daemon) stop_daemon; exit $? ;;
        --) shift; break ;;
        -*) echo "kk: unknown flag $1" >&2; exit 2 ;;
        *) break ;;
    esac
 done

 if [[ $# -gt 0 ]]; then
    TEXT="$*"
 else
    TEXT="$(cat)"
 fi

 [[ -n "$TEXT" ]] || { echo "kk: empty text" >&2; exit 1; }

 ensure_daemon

 exec /usr/bin/python3 - "$SOCKET" "$VOICE" "$SPEED" "$TEXT" <<'PY'
 import json, socket, sys
 sock_path, voice, speed, text = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
 req = json.dumps({"text": text, "voice": voice, "speed": float(speed)})
 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
 s.connect(sock_path)
 s.sendall((req + "\n").encode("utf-8"))
 resp = s.makefile("r").readline().strip()
 try:
    body = json.loads(resp)
 except json.JSONDecodeError:
    print(resp); sys.exit(1)
 if body.get("ok"):
    sys.exit(0)
 print(f"kk: {body.get('error', 'unknown error')}", file=sys.stderr)
 sys.exit(1)
 PY
diff --git a/kokorod b/kokorod
 #!/usr/bin/env bash
 # kokorod — launcher for the Kokoro TTS daemon.
 #
 # Keeps the model resident in memory and listens on /tmp/kokoro.sock.
 # Run in a long-lived shell / tmux pane. Clients: `kk` (thin socket client).

 set -euo pipefail

 SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null || echo "${BASH_SOURCE[0]}")")" && pwd)"
 VENV_PYTHON="${SCRIPT_DIR}/.venv/bin/python3.12"

 [[ -x "${VENV_PYTHON}" ]] || {
    echo "kokorod: venv not found at ${VENV_PYTHON}" >&2
    exit 1
 }

 exec "${VENV_PYTHON}" "${SCRIPT_DIR}/kokorod.py" "$@"
diff --git a/kokorod.py b/kokorod.py
 #!/usr/bin/env python3
 """kokorod — long-running daemon that keeps Kokoro-82M resident in memory.

 Two layers of warm-up so each speak request starts immediately:

  1. Model + KokoroPipeline loaded once at startup (eliminates 3-4s cold
     start of the `kokoro` CLI).
  2. A single sd.OutputStream is opened once and reused across every
     speak. macOS Bluetooth audio (AirPods) takes 2-3s to engage on each
     fresh stream open; keeping the stream alive eliminates that too.

 Protocol: newline-terminated JSON over /tmp/kokoro.sock.
  Request:  {"text": "...", "voice": "af_heart", "speed": 1.0}
  Response: {"ok": true, "msg": "..."} or {"ok": false, "error": "..."}

 Pause / resume / stop still work via the existing sentinel files and
 the kokoro-pause / kokoro-stop scripts. Sending a new speak request
 while audio is playing interrupts the previous playback.
 """

 import json
 import os
 import queue
 import socketserver
 import threading
 import time

 os.environ.setdefault("HF_HUB_OFFLINE", "1")

 import contextlib
 import sys

 import numpy as np
 import sounddevice as sd

 import mcp_server
 from mcp_server import (
    DEFAULT_SPEED,
    DEFAULT_VOICE,
    SAMPLE_RATE,
    SENTINEL,
    SHORT_TEXT_PAD,
    SHORT_TEXT_THRESHOLD,
    STOP_SENTINEL,
    _generate_audio,
    _get_model,
    _lang_code,
    _preprocess_for_tts,
 )

 SOCKET_PATH = "/tmp/kokoro.sock"
 BLOCK_SIZE = 2048

 # Idle shutdown so the ~700 MB resident process doesn't sit forever
 # after a single morning use. `kk` re-launches on next request.
 IDLE_TIMEOUT_SECONDS = int(os.environ.get("KOKOROD_TIMEOUT_MINUTES", "30")) * 60

 # Producer (socket handler threads) → consumer (main thread).
 _play_queue: "queue.Queue[tuple[str, str, float, float]]" = queue.Queue()

 # Set to interrupt the currently-playing request when a new one arrives.
 _interrupt = threading.Event()

 # Last time we received or completed a request — drives idle shutdown.
 _last_activity = time.time()
 _last_activity_lock = threading.Lock()


 def _touch_activity():
    global _last_activity
    with _last_activity_lock:
        _last_activity = time.time()

 # Lazily-opened persistent output stream. Created on first speak so we
 # inherit whatever output device is active at that moment.
 _persistent_stream: "sd.OutputStream | None" = None


 def _ensure_stream() -> "sd.OutputStream":
    global _persistent_stream
    if _persistent_stream is None:
        _persistent_stream = sd.OutputStream(
            samplerate=SAMPLE_RATE,
            channels=1,
            blocksize=BLOCK_SIZE,
            dtype="float32",
            latency="high",
        )
        _persistent_stream.start()
        print(
            f"kokorod: persistent OutputStream opened "
            f"(sr={SAMPLE_RATE}, blocksize={BLOCK_SIZE}, latency=high)",
            flush=True,
        )
    return _persistent_stream


 def _play_one(text: str, voice: str, speed: float, t_enqueued: float, t_dequeued: float) -> None:
    """Generate audio for one request and play it through the persistent stream.

    Replaces mcp_server._generate_and_play so we can reuse the persistent
    OutputStream. Honors the existing pause and stop sentinel files.
    """
    text = _preprocess_for_tts(text)
    model = _get_model()
    stream = _ensure_stream()

    # Clean up stale sentinels from a previous session.
    for f in (SENTINEL, STOP_SENTINEL):
        try:
            os.remove(f)
        except FileNotFoundError:
            pass
    _interrupt.clear()
    mcp_server._set_state("playing")

    first_audio = False
    try:
        with contextlib.redirect_stdout(sys.stderr):
            for result in model.generate(
                text=text, voice=voice, speed=speed, lang_code=_lang_code(voice),
            ):
                if _interrupt.is_set() or os.path.exists(STOP_SENTINEL):
                    break

                # Pause sentinel — block until removed or stopped.
                if os.path.exists(SENTINEL):
                    mcp_server._set_state("paused")
                    while os.path.exists(SENTINEL):
                        if _interrupt.is_set() or os.path.exists(STOP_SENTINEL):
                            break
                        time.sleep(0.1)
                    if _interrupt.is_set() or os.path.exists(STOP_SENTINEL):
                        break
                    mcp_server._set_state("playing")

                audio = np.array(result.audio)
                if len(audio) == 0:
                    continue

                if not first_audio:
                    first_audio = True
                    ttfa = time.time() - t_enqueued
                    gen = time.time() - t_dequeued
                    print(
                        f"kokorod: TTFA {ttfa:.2f}s (first-chunk gen {gen:.2f}s)",
                        flush=True,
                    )
                # Write the entire model result in one go. Slicing into
                # BLOCK_SIZE pieces caused audible crackling at chunk
                # boundaries when model.generate yielded slower than the
                # output stream drained.
                stream.write(audio.reshape(-1, 1))
    finally:
        mcp_server._set_state("idle")
        try:
            os.remove(SENTINEL)
        except FileNotFoundError:
            pass


 class KokoroHandler(socketserver.StreamRequestHandler):
    def handle(self):
        line = self.rfile.readline().decode("utf-8").strip()
        if not line:
            return
        try:
            req = json.loads(line)
        except json.JSONDecodeError as e:
            self._respond(False, error=f"invalid JSON: {e}")
            return

        text = (req.get("text") or "").strip()
        voice = req.get("voice", DEFAULT_VOICE)
        speed = float(req.get("speed", DEFAULT_SPEED))

        if not text:
            self._respond(False, error="empty text")
            return

        if len(text) < SHORT_TEXT_THRESHOLD:
            text = text + SHORT_TEXT_PAD

        # Tell whatever is currently playing to bail out so this request
        # can take over. The consumer loop will dequeue this next.
        _interrupt.set()

        _touch_activity()
        word_count = len(text.split())
        _play_queue.put((text, voice, speed, time.time()))
        print(f"kokorod: queued {word_count} words ({voice} @ {speed}x)", flush=True)
        self._respond(True, msg=f"Queued {word_count} words ({voice} @ {speed}x)")

    def _respond(self, ok, msg=None, error=None):
        body = {"ok": ok}
        if msg:
            body["msg"] = msg
        if error:
            body["error"] = error
        self.wfile.write((json.dumps(body) + "\n").encode("utf-8"))


 class ThreadedUnixServer(socketserver.ThreadingMixIn, socketserver.UnixStreamServer):
    daemon_threads = True
    allow_reuse_address = True


 def _prewarm():
    """Force KokoroPipeline init so the first user speak doesn't pay it."""
    t0 = time.time()
    _generate_audio("warm up.", DEFAULT_VOICE, DEFAULT_SPEED)
    print(f"kokorod: pipeline pre-warmed in {time.time() - t0:.2f}s", flush=True)


 def _idle_watcher():
    """Background thread: exit the daemon if no requests for IDLE_TIMEOUT_SECONDS."""
    while True:
        time.sleep(60)
        with _last_activity_lock:
            idle = time.time() - _last_activity
        if idle >= IDLE_TIMEOUT_SECONDS:
            print(
                f"kokorod: idle for {idle / 60:.1f} min "
                f"(limit {IDLE_TIMEOUT_SECONDS / 60:.0f}); shutting down",
                flush=True,
            )
            try:
                os.unlink(SOCKET_PATH)
            except FileNotFoundError:
                pass
            os._exit(0)


 def _prewarm_audio():
    """Open the persistent OutputStream and write a tiny silence buffer
    so AirPods / CoreAudio finish engaging before the first real speak."""
    stream = _ensure_stream()
    silence = np.zeros((BLOCK_SIZE * 4, 1), dtype=np.float32)
    stream.write(silence)
    print("kokorod: audio device engaged with silence prebuffer", flush=True)


 def main():
    t_start = time.time()
    print("kokorod: loading Kokoro-82M model...", flush=True)
    _get_model()
    print(f"kokorod: model loaded in {time.time() - t_start:.2f}s", flush=True)

    _prewarm()
    _prewarm_audio()

    try:
        os.unlink(SOCKET_PATH)
    except FileNotFoundError:
        pass

    server = ThreadedUnixServer(SOCKET_PATH, KokoroHandler)
    os.chmod(SOCKET_PATH, 0o600)
    print(
        f"kokorod: listening on {SOCKET_PATH} "
        f"(ready in {time.time() - t_start:.2f}s, "
        f"idle timeout {IDLE_TIMEOUT_SECONDS // 60} min)",
        flush=True,
    )

    server_thread = threading.Thread(
        target=server.serve_forever, daemon=True, name="kokorod-socket"
    )
    server_thread.start()

    idle_thread = threading.Thread(
        target=_idle_watcher, daemon=True, name="kokorod-idle"
    )
    idle_thread.start()

    _touch_activity()

    # Main thread owns audio playback. macOS CoreAudio silently no-ops
    # sd.OutputStream operations started from transient worker threads
    # in a long-running socket-server process — the CLI works because it
    # naturally runs on the main thread.
    try:
        while True:
            text, voice, speed, t_enqueued = _play_queue.get()
            t_dequeued = time.time()
            mcp_server._next_playback_session()
            mcp_server._playback_thread = threading.current_thread()
            try:
                _play_one(text, voice, speed, t_enqueued, t_dequeued)
                print(
                    f"kokorod: done; total {time.time() - t_enqueued:.2f}s",
                    flush=True,
                )
            except Exception as e:
                print(f"kokorod: playback error: {e}", flush=True)
            finally:
                _touch_activity()
    except KeyboardInterrupt:
        print("\nkokorod: shutting down", flush=True)
    finally:
        try:
            os.unlink(SOCKET_PATH)
        except FileNotFoundError:
            pass
        if _persistent_stream is not None:
            try:
                _persistent_stream.stop()
                _persistent_stream.close()
            except Exception:
                pass


 if __name__ == "__main__":
    main()
	#!/usr/bin/env bash
	# kk — thin client for the kokorod daemon, with auto-launch.
	#
	# Sends a speak request over /tmp/kokoro.sock. If the daemon isn't
	# running, launches it in the background and waits for the socket to
	# come up before sending.
	#
	# Usage:
	# kk "Hello, world."
	# echo "text" \| kk
	# kk -v bm_fable "Good morning."
	# kk -s 1.2 "Slightly faster."
	#
	# Pause/stop: kokoro-pause / kokoro-stop
	# Stop the daemon: kk --stop-daemon

	set -euo pipefail

	SOCKET="/tmp/kokoro.sock"
	VOICE="af_heart"
	SPEED="1.0"
	LAUNCH_TIMEOUT="${KK_LAUNCH_TIMEOUT:-60}"

	SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null \|\| echo "${BASH_SOURCE[0]}")")" && pwd)"
	KOKOROD="${SCRIPT_DIR}/kokorod"
	LOG_DIR="${HOME}/.cache/kokorod"
	LOG_FILE="${LOG_DIR}/daemon.log"

	usage() {
	cat <<EOF
	kk — thin client for the kokorod daemon.

	Usage:
	kk "text" speak text
	echo "text" \| kk pipe input
	kk -v VOICE "text" voice override (default: af_heart)
	kk -s SPEED "text" speed multiplier (default: 1.0)
	kk --stop-daemon stop the running daemon

	If the daemon isn't running, kk launches it automatically (first call
	takes ~5s for model load + audio device engagement). Subsequent calls
	are near-instant.

	Pause/stop playback: kokoro-pause / kokoro-stop
	Daemon log: ${LOG_FILE}
	EOF
	}

	stop_daemon() {
	if [[ ! -S "$SOCKET" ]]; then
	echo "kk: daemon is not running" >&2
	return 1
	fi
	# Find the kokorod python process by socket. Simpler: pgrep the script.
	if pkill -f "kokorod\.py" 2>/dev/null; then
	echo "kk: stopped daemon" >&2
	else
	echo "kk: no kokorod process found" >&2
	fi
	rm -f "$SOCKET"
	}

	ensure_daemon() {
	[[ -S "$SOCKET" ]] && return 0

	[[ -x "$KOKOROD" ]] \|\| {
	echo "kk: daemon binary not found at $KOKOROD" >&2
	exit 1
	}

	mkdir -p "$LOG_DIR"
	echo "kk: launching kokorod (log: $LOG_FILE)..." >&2
	HF_HUB_OFFLINE=1 nohup "$KOKOROD" >>"$LOG_FILE" 2>&1 </dev/null &
	disown

	# Wait for socket to appear (daemon creates it as the last startup step).
	local waited=0
	while (( waited < LAUNCH_TIMEOUT )); do
	if [[ -S "$SOCKET" ]]; then
	echo "kk: daemon ready after ${waited}s" >&2
	return 0
	fi
	sleep 1
	waited=$((waited + 1))
	done

	echo "kk: daemon did not start within ${LAUNCH_TIMEOUT}s — see $LOG_FILE" >&2
	exit 1
	}

	while [[ $# -gt 0 ]]; do
	case "$1" in
	-v\|--voice) VOICE="$2"; shift 2 ;;
	-s\|--speed) SPEED="$2"; shift 2 ;;
	-h\|--help) usage; exit 0 ;;
	--stop-daemon) stop_daemon; exit $? ;;
	--) shift; break ;;
	-*) echo "kk: unknown flag $1" >&2; exit 2 ;;
	*) break ;;
	esac
	done

	if [[ $# -gt 0 ]]; then
	TEXT="$*"
	else
	TEXT="$(cat)"
	fi

	[[ -n "$TEXT" ]] \|\| { echo "kk: empty text" >&2; exit 1; }

	ensure_daemon

	exec /usr/bin/python3 - "$SOCKET" "$VOICE" "$SPEED" "$TEXT" <<'PY'
	import json, socket, sys
	sock_path, voice, speed, text = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
	req = json.dumps({"text": text, "voice": voice, "speed": float(speed)})
	s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
	s.connect(sock_path)
	s.sendall((req + "\n").encode("utf-8"))
	resp = s.makefile("r").readline().strip()
	try:
	body = json.loads(resp)
	except json.JSONDecodeError:
	print(resp); sys.exit(1)
	if body.get("ok"):
	sys.exit(0)
	print(f"kk: {body.get('error', 'unknown error')}", file=sys.stderr)
	sys.exit(1)
	PY
	#!/usr/bin/env bash
	# kokorod — launcher for the Kokoro TTS daemon.
	#
	# Keeps the model resident in memory and listens on /tmp/kokoro.sock.
	# Run in a long-lived shell / tmux pane. Clients: `kk` (thin socket client).

	set -euo pipefail

	SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}" 2>/dev/null \|\| echo "${BASH_SOURCE[0]}")")" && pwd)"
	VENV_PYTHON="${SCRIPT_DIR}/.venv/bin/python3.12"

	[[ -x "${VENV_PYTHON}" ]] \|\| {
	echo "kokorod: venv not found at ${VENV_PYTHON}" >&2
	exit 1
	}

	exec "${VENV_PYTHON}" "${SCRIPT_DIR}/kokorod.py" "$@"
	#!/usr/bin/env python3
	"""kokorod — long-running daemon that keeps Kokoro-82M resident in memory.

	Two layers of warm-up so each speak request starts immediately:

	1. Model + KokoroPipeline loaded once at startup (eliminates 3-4s cold
	start of the `kokoro` CLI).
	2. A single sd.OutputStream is opened once and reused across every
	speak. macOS Bluetooth audio (AirPods) takes 2-3s to engage on each
	fresh stream open; keeping the stream alive eliminates that too.

	Protocol: newline-terminated JSON over /tmp/kokoro.sock.
	Request: {"text": "...", "voice": "af_heart", "speed": 1.0}
	Response: {"ok": true, "msg": "..."} or {"ok": false, "error": "..."}

	Pause / resume / stop still work via the existing sentinel files and
	the kokoro-pause / kokoro-stop scripts. Sending a new speak request
	while audio is playing interrupts the previous playback.
	"""

	import json
	import os
	import queue
	import socketserver
	import threading
	import time

	os.environ.setdefault("HF_HUB_OFFLINE", "1")

	import contextlib
	import sys

	import numpy as np
	import sounddevice as sd

	import mcp_server
	from mcp_server import (
	DEFAULT_SPEED,
	DEFAULT_VOICE,
	SAMPLE_RATE,
	SENTINEL,
	SHORT_TEXT_PAD,
	SHORT_TEXT_THRESHOLD,
	STOP_SENTINEL,
	_generate_audio,
	_get_model,
	_lang_code,
	_preprocess_for_tts,
	)

	SOCKET_PATH = "/tmp/kokoro.sock"
	BLOCK_SIZE = 2048

	# Idle shutdown so the ~700 MB resident process doesn't sit forever
	# after a single morning use. `kk` re-launches on next request.
	IDLE_TIMEOUT_SECONDS = int(os.environ.get("KOKOROD_TIMEOUT_MINUTES", "30")) * 60

	# Producer (socket handler threads) → consumer (main thread).
	_play_queue: "queue.Queue[tuple[str, str, float, float]]" = queue.Queue()

	# Set to interrupt the currently-playing request when a new one arrives.
	_interrupt = threading.Event()

	# Last time we received or completed a request — drives idle shutdown.
	_last_activity = time.time()
	_last_activity_lock = threading.Lock()


	def _touch_activity():
	global _last_activity
	with _last_activity_lock:
	_last_activity = time.time()

	# Lazily-opened persistent output stream. Created on first speak so we
	# inherit whatever output device is active at that moment.
	_persistent_stream: "sd.OutputStream \| None" = None


	def _ensure_stream() -> "sd.OutputStream":
	global _persistent_stream
	if _persistent_stream is None:
	_persistent_stream = sd.OutputStream(
	samplerate=SAMPLE_RATE,
	channels=1,
	blocksize=BLOCK_SIZE,
	dtype="float32",
	latency="high",
	)
	_persistent_stream.start()
	print(
	f"kokorod: persistent OutputStream opened "
	f"(sr={SAMPLE_RATE}, blocksize={BLOCK_SIZE}, latency=high)",
	flush=True,
	)
	return _persistent_stream


	def _play_one(text: str, voice: str, speed: float, t_enqueued: float, t_dequeued: float) -> None:
	"""Generate audio for one request and play it through the persistent stream.

	Replaces mcp_server._generate_and_play so we can reuse the persistent
	OutputStream. Honors the existing pause and stop sentinel files.
	"""
	text = _preprocess_for_tts(text)
	model = _get_model()
	stream = _ensure_stream()

	# Clean up stale sentinels from a previous session.
	for f in (SENTINEL, STOP_SENTINEL):
	try:
	os.remove(f)
	except FileNotFoundError:
	pass
	_interrupt.clear()
	mcp_server._set_state("playing")

	first_audio = False
	try:
	with contextlib.redirect_stdout(sys.stderr):
	for result in model.generate(
	text=text, voice=voice, speed=speed, lang_code=_lang_code(voice),
	):
	if _interrupt.is_set() or os.path.exists(STOP_SENTINEL):
	break

	# Pause sentinel — block until removed or stopped.
	if os.path.exists(SENTINEL):
	mcp_server._set_state("paused")
	while os.path.exists(SENTINEL):
	if _interrupt.is_set() or os.path.exists(STOP_SENTINEL):
	break
	time.sleep(0.1)
	if _interrupt.is_set() or os.path.exists(STOP_SENTINEL):
	break
	mcp_server._set_state("playing")

	audio = np.array(result.audio)
	if len(audio) == 0:
	continue

	if not first_audio:
	first_audio = True
	ttfa = time.time() - t_enqueued
	gen = time.time() - t_dequeued
	print(
	f"kokorod: TTFA {ttfa:.2f}s (first-chunk gen {gen:.2f}s)",
	flush=True,
	)
	# Write the entire model result in one go. Slicing into
	# BLOCK_SIZE pieces caused audible crackling at chunk
	# boundaries when model.generate yielded slower than the
	# output stream drained.
	stream.write(audio.reshape(-1, 1))
	finally:
	mcp_server._set_state("idle")
	try:
	os.remove(SENTINEL)
	except FileNotFoundError:
	pass


	class KokoroHandler(socketserver.StreamRequestHandler):
	def handle(self):
	line = self.rfile.readline().decode("utf-8").strip()
	if not line:
	return
	try:
	req = json.loads(line)
	except json.JSONDecodeError as e:
	self._respond(False, error=f"invalid JSON: {e}")
	return

	text = (req.get("text") or "").strip()
	voice = req.get("voice", DEFAULT_VOICE)
	speed = float(req.get("speed", DEFAULT_SPEED))

	if not text:
	self._respond(False, error="empty text")
	return

	if len(text) < SHORT_TEXT_THRESHOLD:
	text = text + SHORT_TEXT_PAD

	# Tell whatever is currently playing to bail out so this request
	# can take over. The consumer loop will dequeue this next.
	_interrupt.set()

	_touch_activity()
	word_count = len(text.split())
	_play_queue.put((text, voice, speed, time.time()))
	print(f"kokorod: queued {word_count} words ({voice} @ {speed}x)", flush=True)
	self._respond(True, msg=f"Queued {word_count} words ({voice} @ {speed}x)")

	def _respond(self, ok, msg=None, error=None):
	body = {"ok": ok}
	if msg:
	body["msg"] = msg
	if error:
	body["error"] = error
	self.wfile.write((json.dumps(body) + "\n").encode("utf-8"))


	class ThreadedUnixServer(socketserver.ThreadingMixIn, socketserver.UnixStreamServer):
	daemon_threads = True
	allow_reuse_address = True


	def _prewarm():
	"""Force KokoroPipeline init so the first user speak doesn't pay it."""
	t0 = time.time()
	_generate_audio("warm up.", DEFAULT_VOICE, DEFAULT_SPEED)
	print(f"kokorod: pipeline pre-warmed in {time.time() - t0:.2f}s", flush=True)


	def _idle_watcher():
	"""Background thread: exit the daemon if no requests for IDLE_TIMEOUT_SECONDS."""
	while True:
	time.sleep(60)
	with _last_activity_lock:
	idle = time.time() - _last_activity
	if idle >= IDLE_TIMEOUT_SECONDS:
	print(
	f"kokorod: idle for {idle / 60:.1f} min "
	f"(limit {IDLE_TIMEOUT_SECONDS / 60:.0f}); shutting down",
	flush=True,
	)
	try:
	os.unlink(SOCKET_PATH)
	except FileNotFoundError:
	pass
	os._exit(0)


	def _prewarm_audio():
	"""Open the persistent OutputStream and write a tiny silence buffer
	so AirPods / CoreAudio finish engaging before the first real speak."""
	stream = _ensure_stream()
	silence = np.zeros((BLOCK_SIZE * 4, 1), dtype=np.float32)
	stream.write(silence)
	print("kokorod: audio device engaged with silence prebuffer", flush=True)


	def main():
	t_start = time.time()
	print("kokorod: loading Kokoro-82M model...", flush=True)
	_get_model()
	print(f"kokorod: model loaded in {time.time() - t_start:.2f}s", flush=True)

	_prewarm()
	_prewarm_audio()

	try:
	os.unlink(SOCKET_PATH)
	except FileNotFoundError:
	pass

	server = ThreadedUnixServer(SOCKET_PATH, KokoroHandler)
	os.chmod(SOCKET_PATH, 0o600)
	print(
	f"kokorod: listening on {SOCKET_PATH} "
	f"(ready in {time.time() - t_start:.2f}s, "
	f"idle timeout {IDLE_TIMEOUT_SECONDS // 60} min)",
	flush=True,
	)

	server_thread = threading.Thread(
	target=server.serve_forever, daemon=True, name="kokorod-socket"
	)
	server_thread.start()

	idle_thread = threading.Thread(
	target=_idle_watcher, daemon=True, name="kokorod-idle"
	)
	idle_thread.start()

	_touch_activity()

	# Main thread owns audio playback. macOS CoreAudio silently no-ops
	# sd.OutputStream operations started from transient worker threads
	# in a long-running socket-server process — the CLI works because it
	# naturally runs on the main thread.
	try:
	while True:
	text, voice, speed, t_enqueued = _play_queue.get()
	t_dequeued = time.time()
	mcp_server._next_playback_session()
	mcp_server._playback_thread = threading.current_thread()
	try:
	_play_one(text, voice, speed, t_enqueued, t_dequeued)
	print(
	f"kokorod: done; total {time.time() - t_enqueued:.2f}s",
	flush=True,
	)
	except Exception as e:
	print(f"kokorod: playback error: {e}", flush=True)
	finally:
	_touch_activity()
	except KeyboardInterrupt:
	print("\nkokorod: shutting down", flush=True)
	finally:
	try:
	os.unlink(SOCKET_PATH)
	except FileNotFoundError:
	pass
	if _persistent_stream is not None:
	try:
	_persistent_stream.stop()
	_persistent_stream.close()
	except Exception:
	pass


	if __name__ == "__main__":
	main()