LeeBergstrand · March 27, 2026 04:49
diff --git a/claude-local.sh b/claude-local.sh
 #!/bin/zsh

 set -euo pipefail

 # User-tunable defaults for the local Claude Code launcher.
 MODEL_KEY="${CLAUDE_LOCAL_MODEL_KEY:-${CLAUDE_LOCAL_MODEL:-qwen3.5:35b-a3b}}"
 CONTEXT_LENGTH="${CLAUDE_LOCAL_CONTEXT_LENGTH:-98304}"
 CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW="${CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW:-200000}"
 TARGET_CONTEXT_UTILIZATION="${TARGET_CONTEXT_UTILIZATION:-0.75}"
 PORT="${CLAUDE_LOCAL_PORT:-11434}"
 OLLAMA_BIN="${OLLAMA_BIN:-}"
 STATE_DIR="${XDG_STATE_HOME:-$HOME/.local/state}/claude-local"
 OLLAMA_LOG="${STATE_DIR}/ollama.log"
 SERVER_STARTED=0

 log() {
  printf 'claude-local: %s\n' "$*" >&2
 }

 die() {
  log "$*"
  exit 1
 }

 # Ollama exposes model inventory at /api/tags once the server is ready.
 server_ready() {
  curl -fsS "http://127.0.0.1:${PORT}/api/tags" >/dev/null 2>&1
 }

 # Check whether the requested model is already present before prompting/pulling.
 model_available() {
  curl -fsS "http://127.0.0.1:${PORT}/api/tags" 2>/dev/null \
    | tr -d '[:space:]' \
    | grep -Fq "\"name\":\"${MODEL_KEY}\""
 }

 pull_model() {
  log "pulling model '${MODEL_KEY}'"
  "$OLLAMA_BIN" pull "$MODEL_KEY" >&2 || die "pull failed; check the model name"
 }

 stop_server() {
  local -a pids
  local pid
  local cmd

  pids=("${(@f)$(lsof -nP -tiTCP:${PORT} -sTCP:LISTEN 2>/dev/null || true)}")
  if (( ${#pids[@]} == 0 )); then
    return 1
  fi

  for pid in "${pids[@]}"; do
    cmd="$(ps -o comm= -p "$pid" 2>/dev/null || true)"
    if [[ "$cmd" != *ollama* ]]; then
      die "refusing to stop non-Ollama process on port ${PORT}: pid ${pid} (${cmd:-unknown})"
    fi
  done

  log "stopping Ollama on port ${PORT}: ${pids[*]}"
  kill "${pids[@]}" 2>/dev/null || return 1
  return 0
 }

 wait_for_server() {
  local i

  for i in {1..20}; do
    if server_ready; then
      printf '\n' >&2
      return 0
    fi
    printf '\rclaude-local: waiting for Ollama... %ds' "$i" >&2
    sleep 1
  done

  printf '\n' >&2
  return 1
 }

 if [[ -z "$OLLAMA_BIN" ]]; then
  OLLAMA_BIN="$(command -v ollama 2>/dev/null || true)"
 fi

 if [[ -z "${OLLAMA_BIN:-}" || ! -x "$OLLAMA_BIN" ]]; then
  die "could not find ollama"
 fi

 mkdir -p "$STATE_DIR"
 log "using Ollama binary: ${OLLAMA_BIN}"

 if [[ "$PWD" == "$HOME" ]]; then
  log 'warning: launching from $HOME is slow and noisy; use a project directory for testing'
 fi

 if server_ready; then
  log "restarting the running Ollama server to apply context ${CONTEXT_LENGTH}"
  stop_server || die "failed to stop the running Ollama server on port ${PORT}"
  for _ in {1..10}; do
    if ! server_ready; then
      break
    fi
    sleep 1
  done
  server_ready && die "server on port ${PORT} is still running after restart request"
 fi

 log "starting Ollama server on port ${PORT} (context ${CONTEXT_LENGTH})"
 OLLAMA_CONTEXT_LENGTH="$CONTEXT_LENGTH" OLLAMA_HOST="127.0.0.1:${PORT}" nohup "$OLLAMA_BIN" serve >"$OLLAMA_LOG" 2>&1 &
 SERVER_STARTED=1
 wait_for_server || die "Ollama did not become ready; check ${OLLAMA_LOG}"
 log "Ollama is ready at http://127.0.0.1:${PORT}"

 if ! model_available; then
  if [[ -t 0 ]]; then
    printf 'claude-local: model "%s" not found locally. Pull it now? [Y/n] ' "$MODEL_KEY" >&2
    read -r reply
    if [[ -z "${reply:-}" || "${reply}" == [Yy] || "${reply}" == [Yy][Ee][Ss] ]]; then
      pull_model
    else
      die "model not available"
    fi
  else
    pull_model
  fi
 else
  log "model '${MODEL_KEY}' is already available locally"
 fi

 export CLAUDE_CODE_ATTRIBUTION_HEADER="${CLAUDE_CODE_ATTRIBUTION_HEADER:-0}"
 export DISABLE_PROMPT_CACHING="${DISABLE_PROMPT_CACHING:-1}"
 export OLLAMA_HOST="127.0.0.1:${PORT}"

 # Translate the local Ollama window into Claude Code's assumed 200k budgeting.
 TARGET_LOCAL_TOKENS="$(
  printf '%.0f' "$(( CONTEXT_LENGTH * TARGET_CONTEXT_UTILIZATION ))"
 )"
 COMPUTED_AUTOCOMPACT_PCT="$(
  printf '%.0f' "$(( CONTEXT_LENGTH * TARGET_CONTEXT_UTILIZATION * 100.0 / CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW ))"
 )"
 AUTOCOMPACT_PCT="${CLAUDE_AUTOCOMPACT_PCT_OVERRIDE:-$COMPUTED_AUTOCOMPACT_PCT}"
 export CLAUDE_AUTOCOMPACT_PCT_OVERRIDE="$AUTOCOMPACT_PCT"
 log "Claude autocompact threshold: ${AUTOCOMPACT_PCT}% of Claude Code's assumed ${CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW}-token window; this is intended to compact around ${TARGET_CONTEXT_UTILIZATION} of your local ${CONTEXT_LENGTH}-token Ollama context (~${TARGET_LOCAL_TOKENS} local tokens)"

 launch_args=("launch" "claude" "--model" "$MODEL_KEY")
 if (( $# > 0 )); then
  launch_args+=("--" "$@")
 fi
 log "launching Claude Code with model '${MODEL_KEY}'"
 exec "$OLLAMA_BIN" "${launch_args[@]}"
	#!/bin/zsh

	set -euo pipefail

	# User-tunable defaults for the local Claude Code launcher.
	MODEL_KEY="${CLAUDE_LOCAL_MODEL_KEY:-${CLAUDE_LOCAL_MODEL:-qwen3.5:35b-a3b}}"
	CONTEXT_LENGTH="${CLAUDE_LOCAL_CONTEXT_LENGTH:-98304}"
	CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW="${CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW:-200000}"
	TARGET_CONTEXT_UTILIZATION="${TARGET_CONTEXT_UTILIZATION:-0.75}"
	PORT="${CLAUDE_LOCAL_PORT:-11434}"
	OLLAMA_BIN="${OLLAMA_BIN:-}"
	STATE_DIR="${XDG_STATE_HOME:-$HOME/.local/state}/claude-local"
	OLLAMA_LOG="${STATE_DIR}/ollama.log"
	SERVER_STARTED=0

	log() {
	printf 'claude-local: %s\n' "$*" >&2
	}

	die() {
	log "$*"
	exit 1
	}

	# Ollama exposes model inventory at /api/tags once the server is ready.
	server_ready() {
	curl -fsS "http://127.0.0.1:${PORT}/api/tags" >/dev/null 2>&1
	}

	# Check whether the requested model is already present before prompting/pulling.
	model_available() {
	curl -fsS "http://127.0.0.1:${PORT}/api/tags" 2>/dev/null \
	\| tr -d '[:space:]' \
	\| grep -Fq "\"name\":\"${MODEL_KEY}\""
	}

	pull_model() {
	log "pulling model '${MODEL_KEY}'"
	"$OLLAMA_BIN" pull "$MODEL_KEY" >&2 \|\| die "pull failed; check the model name"
	}

	stop_server() {
	local -a pids
	local pid
	local cmd

	pids=("${(@f)$(lsof -nP -tiTCP:${PORT} -sTCP:LISTEN 2>/dev/null \|\| true)}")
	if (( ${#pids[@]} == 0 )); then
	return 1
	fi

	for pid in "${pids[@]}"; do
	cmd="$(ps -o comm= -p "$pid" 2>/dev/null \|\| true)"
	if [[ "$cmd" != ollama ]]; then
	die "refusing to stop non-Ollama process on port ${PORT}: pid ${pid} (${cmd:-unknown})"
	fi
	done

	log "stopping Ollama on port ${PORT}: ${pids[*]}"
	kill "${pids[@]}" 2>/dev/null \|\| return 1
	return 0
	}

	wait_for_server() {
	local i

	for i in {1..20}; do
	if server_ready; then
	printf '\n' >&2
	return 0
	fi
	printf '\rclaude-local: waiting for Ollama... %ds' "$i" >&2
	sleep 1
	done

	printf '\n' >&2
	return 1
	}

	if [[ -z "$OLLAMA_BIN" ]]; then
	OLLAMA_BIN="$(command -v ollama 2>/dev/null \|\| true)"
	fi

	if [[ -z "${OLLAMA_BIN:-}" \|\| ! -x "$OLLAMA_BIN" ]]; then
	die "could not find ollama"
	fi

	mkdir -p "$STATE_DIR"
	log "using Ollama binary: ${OLLAMA_BIN}"

	if [[ "$PWD" == "$HOME" ]]; then
	log 'warning: launching from $HOME is slow and noisy; use a project directory for testing'
	fi

	if server_ready; then
	log "restarting the running Ollama server to apply context ${CONTEXT_LENGTH}"
	stop_server \|\| die "failed to stop the running Ollama server on port ${PORT}"
	for _ in {1..10}; do
	if ! server_ready; then
	break
	fi
	sleep 1
	done
	server_ready && die "server on port ${PORT} is still running after restart request"
	fi

	log "starting Ollama server on port ${PORT} (context ${CONTEXT_LENGTH})"
	OLLAMA_CONTEXT_LENGTH="$CONTEXT_LENGTH" OLLAMA_HOST="127.0.0.1:${PORT}" nohup "$OLLAMA_BIN" serve >"$OLLAMA_LOG" 2>&1 &
	SERVER_STARTED=1
	wait_for_server \|\| die "Ollama did not become ready; check ${OLLAMA_LOG}"
	log "Ollama is ready at http://127.0.0.1:${PORT}"

	if ! model_available; then
	if [[ -t 0 ]]; then
	printf 'claude-local: model "%s" not found locally. Pull it now? [Y/n] ' "$MODEL_KEY" >&2
	read -r reply
	if [[ -z "${reply:-}" \|\| "${reply}" == [Yy] \|\| "${reply}" == [Yy][Ee][Ss] ]]; then
	pull_model
	else
	die "model not available"
	fi
	else
	pull_model
	fi
	else
	log "model '${MODEL_KEY}' is already available locally"
	fi

	export CLAUDE_CODE_ATTRIBUTION_HEADER="${CLAUDE_CODE_ATTRIBUTION_HEADER:-0}"
	export DISABLE_PROMPT_CACHING="${DISABLE_PROMPT_CACHING:-1}"
	export OLLAMA_HOST="127.0.0.1:${PORT}"

	# Translate the local Ollama window into Claude Code's assumed 200k budgeting.
	TARGET_LOCAL_TOKENS="$(
	printf '%.0f' "$(( CONTEXT_LENGTH * TARGET_CONTEXT_UTILIZATION ))"
	)"
	COMPUTED_AUTOCOMPACT_PCT="$(
	printf '%.0f' "$(( CONTEXT_LENGTH * TARGET_CONTEXT_UTILIZATION * 100.0 / CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW ))"
	)"
	AUTOCOMPACT_PCT="${CLAUDE_AUTOCOMPACT_PCT_OVERRIDE:-$COMPUTED_AUTOCOMPACT_PCT}"
	export CLAUDE_AUTOCOMPACT_PCT_OVERRIDE="$AUTOCOMPACT_PCT"
	log "Claude autocompact threshold: ${AUTOCOMPACT_PCT}% of Claude Code's assumed ${CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW}-token window; this is intended to compact around ${TARGET_CONTEXT_UTILIZATION} of your local ${CONTEXT_LENGTH}-token Ollama context (~${TARGET_LOCAL_TOKENS} local tokens)"

	launch_args=("launch" "claude" "--model" "$MODEL_KEY")
	if (( $# > 0 )); then
	launch_args+=("--" "$@")
	fi
	log "launching Claude Code with model '${MODEL_KEY}'"
	exec "$OLLAMA_BIN" "${launch_args[@]}"
No results found