Created
March 27, 2026 04:49
-
-
Save LeeBergstrand/c0bceb42fa8101b38e026efde558f7e3 to your computer and use it in GitHub Desktop.
Claude Code Ollama Wrapper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/zsh | |
| set -euo pipefail | |
| # User-tunable defaults for the local Claude Code launcher. | |
| MODEL_KEY="${CLAUDE_LOCAL_MODEL_KEY:-${CLAUDE_LOCAL_MODEL:-qwen3.5:35b-a3b}}" | |
| CONTEXT_LENGTH="${CLAUDE_LOCAL_CONTEXT_LENGTH:-98304}" | |
| CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW="${CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW:-200000}" | |
| TARGET_CONTEXT_UTILIZATION="${TARGET_CONTEXT_UTILIZATION:-0.75}" | |
| PORT="${CLAUDE_LOCAL_PORT:-11434}" | |
| OLLAMA_BIN="${OLLAMA_BIN:-}" | |
| STATE_DIR="${XDG_STATE_HOME:-$HOME/.local/state}/claude-local" | |
| OLLAMA_LOG="${STATE_DIR}/ollama.log" | |
| SERVER_STARTED=0 | |
| log() { | |
| printf 'claude-local: %s\n' "$*" >&2 | |
| } | |
| die() { | |
| log "$*" | |
| exit 1 | |
| } | |
| # Ollama exposes model inventory at /api/tags once the server is ready. | |
| server_ready() { | |
| curl -fsS "http://127.0.0.1:${PORT}/api/tags" >/dev/null 2>&1 | |
| } | |
| # Check whether the requested model is already present before prompting/pulling. | |
| model_available() { | |
| curl -fsS "http://127.0.0.1:${PORT}/api/tags" 2>/dev/null \ | |
| | tr -d '[:space:]' \ | |
| | grep -Fq "\"name\":\"${MODEL_KEY}\"" | |
| } | |
| pull_model() { | |
| log "pulling model '${MODEL_KEY}'" | |
| "$OLLAMA_BIN" pull "$MODEL_KEY" >&2 || die "pull failed; check the model name" | |
| } | |
| stop_server() { | |
| local -a pids | |
| local pid | |
| local cmd | |
| pids=("${(@f)$(lsof -nP -tiTCP:${PORT} -sTCP:LISTEN 2>/dev/null || true)}") | |
| if (( ${#pids[@]} == 0 )); then | |
| return 1 | |
| fi | |
| for pid in "${pids[@]}"; do | |
| cmd="$(ps -o comm= -p "$pid" 2>/dev/null || true)" | |
| if [[ "$cmd" != *ollama* ]]; then | |
| die "refusing to stop non-Ollama process on port ${PORT}: pid ${pid} (${cmd:-unknown})" | |
| fi | |
| done | |
| log "stopping Ollama on port ${PORT}: ${pids[*]}" | |
| kill "${pids[@]}" 2>/dev/null || return 1 | |
| return 0 | |
| } | |
| wait_for_server() { | |
| local i | |
| for i in {1..20}; do | |
| if server_ready; then | |
| printf '\n' >&2 | |
| return 0 | |
| fi | |
| printf '\rclaude-local: waiting for Ollama... %ds' "$i" >&2 | |
| sleep 1 | |
| done | |
| printf '\n' >&2 | |
| return 1 | |
| } | |
| if [[ -z "$OLLAMA_BIN" ]]; then | |
| OLLAMA_BIN="$(command -v ollama 2>/dev/null || true)" | |
| fi | |
| if [[ -z "${OLLAMA_BIN:-}" || ! -x "$OLLAMA_BIN" ]]; then | |
| die "could not find ollama" | |
| fi | |
| mkdir -p "$STATE_DIR" | |
| log "using Ollama binary: ${OLLAMA_BIN}" | |
| if [[ "$PWD" == "$HOME" ]]; then | |
| log 'warning: launching from $HOME is slow and noisy; use a project directory for testing' | |
| fi | |
| if server_ready; then | |
| log "restarting the running Ollama server to apply context ${CONTEXT_LENGTH}" | |
| stop_server || die "failed to stop the running Ollama server on port ${PORT}" | |
| for _ in {1..10}; do | |
| if ! server_ready; then | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| server_ready && die "server on port ${PORT} is still running after restart request" | |
| fi | |
| log "starting Ollama server on port ${PORT} (context ${CONTEXT_LENGTH})" | |
| OLLAMA_CONTEXT_LENGTH="$CONTEXT_LENGTH" OLLAMA_HOST="127.0.0.1:${PORT}" nohup "$OLLAMA_BIN" serve >"$OLLAMA_LOG" 2>&1 & | |
| SERVER_STARTED=1 | |
| wait_for_server || die "Ollama did not become ready; check ${OLLAMA_LOG}" | |
| log "Ollama is ready at http://127.0.0.1:${PORT}" | |
| if ! model_available; then | |
| if [[ -t 0 ]]; then | |
| printf 'claude-local: model "%s" not found locally. Pull it now? [Y/n] ' "$MODEL_KEY" >&2 | |
| read -r reply | |
| if [[ -z "${reply:-}" || "${reply}" == [Yy] || "${reply}" == [Yy][Ee][Ss] ]]; then | |
| pull_model | |
| else | |
| die "model not available" | |
| fi | |
| else | |
| pull_model | |
| fi | |
| else | |
| log "model '${MODEL_KEY}' is already available locally" | |
| fi | |
| export CLAUDE_CODE_ATTRIBUTION_HEADER="${CLAUDE_CODE_ATTRIBUTION_HEADER:-0}" | |
| export DISABLE_PROMPT_CACHING="${DISABLE_PROMPT_CACHING:-1}" | |
| export OLLAMA_HOST="127.0.0.1:${PORT}" | |
| # Translate the local Ollama window into Claude Code's assumed 200k budgeting. | |
| TARGET_LOCAL_TOKENS="$( | |
| printf '%.0f' "$(( CONTEXT_LENGTH * TARGET_CONTEXT_UTILIZATION ))" | |
| )" | |
| COMPUTED_AUTOCOMPACT_PCT="$( | |
| printf '%.0f' "$(( CONTEXT_LENGTH * TARGET_CONTEXT_UTILIZATION * 100.0 / CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW ))" | |
| )" | |
| AUTOCOMPACT_PCT="${CLAUDE_AUTOCOMPACT_PCT_OVERRIDE:-$COMPUTED_AUTOCOMPACT_PCT}" | |
| export CLAUDE_AUTOCOMPACT_PCT_OVERRIDE="$AUTOCOMPACT_PCT" | |
| log "Claude autocompact threshold: ${AUTOCOMPACT_PCT}% of Claude Code's assumed ${CLAUDE_CODE_ASSUMED_CONTEXT_WINDOW}-token window; this is intended to compact around ${TARGET_CONTEXT_UTILIZATION} of your local ${CONTEXT_LENGTH}-token Ollama context (~${TARGET_LOCAL_TOKENS} local tokens)" | |
| launch_args=("launch" "claude" "--model" "$MODEL_KEY") | |
| if (( $# > 0 )); then | |
| launch_args+=("--" "$@") | |
| fi | |
| log "launching Claude Code with model '${MODEL_KEY}'" | |
| exec "$OLLAMA_BIN" "${launch_args[@]}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment