Skip to content

Instantly share code, notes, and snippets.

@LeeBergstrand
Created March 27, 2026 04:49
Show Gist options
  • Select an option

  • Save LeeBergstrand/c0bceb42fa8101b38e026efde558f7e3 to your computer and use it in GitHub Desktop.

Select an option

Save LeeBergstrand/c0bceb42fa8101b38e026efde558f7e3 to your computer and use it in GitHub Desktop.
Claude Code Ollama Wrapper
#!/bin/zsh
set -euo pipefail
# User-tunable defaults for the local Claude Code launcher.
MODEL_KEY="${CLAUDE_LOCAL_MODEL_KEY:-${CLAUDE_LOCAL_MODEL:-qwen3.5:35b-a3b}}"
CONTEXT_LENGTH="${CLAUDE_LOCAL_CONTEXT_LENGTH:-98304}"
CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW="${CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW:-200000}"
TARGET_CONTEXT_UTILIZATION="${TARGET_CONTEXT_UTILIZATION:-0.75}"
PORT="${CLAUDE_LOCAL_PORT:-11434}"
OLLAMA_BIN="${OLLAMA_BIN:-}"
STATE_DIR="${XDG_STATE_HOME:-$HOME/.local/state}/claude-local"
OLLAMA_LOG="${STATE_DIR}/ollama.log"
SERVER_STARTED=0
log() {
printf 'claude-local: %s\n' "$*" >&2
}
die() {
log "$*"
exit 1
}
# Ollama exposes model inventory at /api/tags once the server is ready.
server_ready() {
curl -fsS "http://127.0.0.1:${PORT}/api/tags" >/dev/null 2>&1
}
# Check whether the requested model is already present before prompting/pulling.
model_available() {
curl -fsS "http://127.0.0.1:${PORT}/api/tags" 2>/dev/null \
| tr -d '[:space:]' \
| grep -Fq "\"name\":\"${MODEL_KEY}\""
}
pull_model() {
log "pulling model '${MODEL_KEY}'"
"$OLLAMA_BIN" pull "$MODEL_KEY" >&2 || die "pull failed; check the model name"
}
stop_server() {
local -a pids
local pid
local cmd
pids=("${(@f)$(lsof -nP -tiTCP:${PORT} -sTCP:LISTEN 2>/dev/null || true)}")
if (( ${#pids[@]} == 0 )); then
return 1
fi
for pid in "${pids[@]}"; do
cmd="$(ps -o comm= -p "$pid" 2>/dev/null || true)"
if [[ "$cmd" != *ollama* ]]; then
die "refusing to stop non-Ollama process on port ${PORT}: pid ${pid} (${cmd:-unknown})"
fi
done
log "stopping Ollama on port ${PORT}: ${pids[*]}"
kill "${pids[@]}" 2>/dev/null || return 1
return 0
}
wait_for_server() {
local i
for i in {1..20}; do
if server_ready; then
printf '\n' >&2
return 0
fi
printf '\rclaude-local: waiting for Ollama... %ds' "$i" >&2
sleep 1
done
printf '\n' >&2
return 1
}
if [[ -z "$OLLAMA_BIN" ]]; then
OLLAMA_BIN="$(command -v ollama 2>/dev/null || true)"
fi
if [[ -z "${OLLAMA_BIN:-}" || ! -x "$OLLAMA_BIN" ]]; then
die "could not find ollama"
fi
mkdir -p "$STATE_DIR"
log "using Ollama binary: ${OLLAMA_BIN}"
if [[ "$PWD" == "$HOME" ]]; then
log 'warning: launching from $HOME is slow and noisy; use a project directory for testing'
fi
if server_ready; then
log "restarting the running Ollama server to apply context ${CONTEXT_LENGTH}"
stop_server || die "failed to stop the running Ollama server on port ${PORT}"
for _ in {1..10}; do
if ! server_ready; then
break
fi
sleep 1
done
server_ready && die "server on port ${PORT} is still running after restart request"
fi
log "starting Ollama server on port ${PORT} (context ${CONTEXT_LENGTH})"
OLLAMA_CONTEXT_LENGTH="$CONTEXT_LENGTH" OLLAMA_HOST="127.0.0.1:${PORT}" nohup "$OLLAMA_BIN" serve >"$OLLAMA_LOG" 2>&1 &
SERVER_STARTED=1
wait_for_server || die "Ollama did not become ready; check ${OLLAMA_LOG}"
log "Ollama is ready at http://127.0.0.1:${PORT}"
if ! model_available; then
if [[ -t 0 ]]; then
printf 'claude-local: model "%s" not found locally. Pull it now? [Y/n] ' "$MODEL_KEY" >&2
read -r reply
if [[ -z "${reply:-}" || "${reply}" == [Yy] || "${reply}" == [Yy][Ee][Ss] ]]; then
pull_model
else
die "model not available"
fi
else
pull_model
fi
else
log "model '${MODEL_KEY}' is already available locally"
fi
export CLAUDE_CODE_ATTRIBUTION_HEADER="${CLAUDE_CODE_ATTRIBUTION_HEADER:-0}"
export DISABLE_PROMPT_CACHING="${DISABLE_PROMPT_CACHING:-1}"
export OLLAMA_HOST="127.0.0.1:${PORT}"
# Translate the local Ollama window into Claude Code's assumed 200k budgeting.
TARGET_LOCAL_TOKENS="$(
printf '%.0f' "$(( CONTEXT_LENGTH * TARGET_CONTEXT_UTILIZATION ))"
)"
COMPUTED_AUTOCOMPACT_PCT="$(
printf '%.0f' "$(( CONTEXT_LENGTH * TARGET_CONTEXT_UTILIZATION * 100.0 / CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW ))"
)"
AUTOCOMPACT_PCT="${CLAUDE_AUTOCOMPACT_PCT_OVERRIDE:-$COMPUTED_AUTOCOMPACT_PCT}"
export CLAUDE_AUTOCOMPACT_PCT_OVERRIDE="$AUTOCOMPACT_PCT"
log "Claude autocompact threshold: ${AUTOCOMPACT_PCT}% of Claude Code's assumed ${CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW}-token window; this is intended to compact around ${TARGET_CONTEXT_UTILIZATION} of your local ${CONTEXT_LENGTH}-token Ollama context (~${TARGET_LOCAL_TOKENS} local tokens)"
launch_args=("launch" "claude" "--model" "$MODEL_KEY")
if (( $# > 0 )); then
launch_args+=("--" "$@")
fi
log "launching Claude Code with model '${MODEL_KEY}'"
exec "$OLLAMA_BIN" "${launch_args[@]}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment