cjbarker · May 25, 2026 22:16
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/bin/bash

 # Llama.cpp Process Watchdog Script
 # This script monitors a Llama.cpp server process and checks its health status.
 # If the service fails to respond to API queries, it can optionally restart the process.
 #
 # Usage:
 #   ./watchdog-llama.sh [process_name]
 #   - If process_name is provided, it will monitor that specific process
 #   - If not provided, it defaults to "/home/cj/tmp-llama/llama-server"
 #   --verbose: Enable verbose output to STDOUT in addition to log file
 #   --loop: Run continuously (default: run once and exit)
 #   --log-level=[DEBUG|INFO|WARN|ERROR]: Set log level

 set -euo pipefail

 # Configuration
 WATCHDOG_NAME="llama-watchdog"
 LOG_FILE="${HOME}/.local/log/llama-watchdog.log"
 CHECK_INTERVAL=30  # seconds
 MAX_RETRIES=3
 RESTART_ENABLED=true
 LLAMA_PID_FILE="/tmp/llama.pid"  # Optional: specify where the pid file is stored
 LLAMA_SWAP_URL="http://localhost:8080"
 LLAMA_SWAP_MODEL_ID="Qwen3.6-35B"

 # Initialize flags
 VERBOSE=false
 LOOP_MODE=false
 LOG_LEVEL="WARN"  # Default log level: WARN (equivalent to level 3)

 # Parse command line arguments
 while [[ $# -gt 0 ]]; do
    case $1 in
        --verbose)
            VERBOSE=true
            LOG_LEVEL="DEBUG"  # When verbose is enabled, set log level to DEBUG
            shift
            ;;
        --loop)
            LOOP_MODE=true
            shift
            ;;
        --log-level=*)
            LOG_LEVEL="${1#*=}"
            shift
            ;;
        --log-level)
            LOG_LEVEL="$2"
            shift 2
            ;;
        *)
            # First non-flag argument is the process name
            if [ -z "${PROCESS_NAME:-}" ]; then
                PROCESS_NAME="$1"
            fi
            shift
            ;;
    esac
 done

 # Set default process name if not provided
 if [ -z "${PROCESS_NAME:-}" ]; then
    PROCESS_NAME="/home/cj/tmp-llama/llama-server"
 fi

 # Log level constants
 DEBUG_LEVEL=1
 INFO_LEVEL=2
 WARN_LEVEL=3
 ERROR_LEVEL=4

 # Get numeric value for log level
 get_log_level_value() {
    case "$1" in
        "DEBUG") echo $DEBUG_LEVEL ;;
        "INFO")  echo $INFO_LEVEL  ;;
        "WARN")  echo $WARN_LEVEL  ;;
        "ERROR") echo $ERROR_LEVEL ;;
        *)       echo $WARN_LEVEL ;;  # Default to WARN if invalid level
    esac
 }

 # Logging function with level filtering
 log() {
    local level="$1"
    local message="$2"

    # Get the numeric value of the current log level
    local current_level_value=$(get_log_level_value "$LOG_LEVEL")

    # Get the numeric value of the message level
    local message_level_value=0
    case "$level" in
        "DEBUG") message_level_value=$DEBUG_LEVEL ;;
        "INFO")  message_level_value=$INFO_LEVEL  ;;
        "WARN")  message_level_value=$WARN_LEVEL  ;;
        "ERROR") message_level_value=$ERROR_LEVEL ;;
        *)       message_level_value=$INFO_LEVEL  ;;  # Default to INFO for unknown levels
    esac

    # Only log if message level is >= current log level
    if [ $message_level_value -ge $current_level_value ]; then
        local formatted_message="[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message"
        if [ "$VERBOSE" = true ]; then
            echo "$formatted_message" | tee -a "$LOG_FILE"
        else
            echo "$formatted_message" >> "$LOG_FILE"
        fi
    fi
 }

 # Signal handler for graceful shutdown
 cleanup() {
    log "INFO" "Received shutdown signal. Cleaning up..."
    exit 0
 }

 # Set up signal traps
 trap cleanup SIGTERM SIGINT

 # Check if curl is available
 if ! command -v curl &> /dev/null; then
    log "ERROR" "curl is not installed. Please install curl to use this watchdog."
    exit 1
 fi

 # Function to check if Llama.cpp process is running
 is_process_running() {
    local pid=$1
    if [ -z "$pid" ] || ! kill -0 "$pid" 2>/dev/null; then
        return 1
    fi
    return 0
 }

 # Function to find Llama.cpp processes
 find_llama_processes() {
    # Look for processes containing the specified process name in their command line
    log "DEBUG" "Finding process running: $PROCESS_NAME" >&2
    pgrep -f "$PROCESS_NAME" 2>/dev/null | grep -xv "$$"  # Exclude current process (exact match)
 }

 # Function to test API health
 test_api_health() {
    local base_url="${1:-http://localhost:8080}"
    local endpoints=("/health" "/v1/models" "/v1/chat/completions")
    local all_healthy=true

    log "INFO" "Testing API health at $base_url"

    for endpoint in "${endpoints[@]}"; do
        local full_url="${base_url}${endpoint}"
        log "DEBUG" "Checking endpoint: $full_url"

        local response_code=0
        local retries=0

        # Try up to MAX_RETRIES times with exponential backoff
        while [ $retries -lt $MAX_RETRIES ]; do
            if [ "$endpoint" = "/v1/chat/completions" ]; then
                # Use POST request with JSON body for chat/completions endpoint
                response_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \
                    -X POST \
                    -H "Content-Type: application/json" \
                    -d "{\"model\": \"${LLAMA_SWAP_MODEL_ID}\", \"messages\": [{\"role\":\"system\", \"content\":\"You are a helpful assistant.\"},{\"role\":\"user\",\"content\":\"hello!\"}]}" \
                    "$full_url" 2>/dev/null || echo "000")
            else
                # Using curl with timeout to avoid hanging for other endpoints
                response_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$full_url" 2>/dev/null || echo "000")
            fi

            # Special handling for /health endpoint - allow 200 and 503 status codes
            if [ "$endpoint" = "/health" ]; then
                if [ "$response_code" = "200" ] || [ "$response_code" = "503" ]; then
                    log "DEBUG" "Endpoint $endpoint returned HTTP $response_code"
                    break  # Endpoint is healthy, move to next endpoint
                elif [ "$response_code" = "000" ]; then
                    # Connection timeout or network error
                    log "WARN" "Connection failed to $endpoint (HTTP $response_code)"
                    retries=$((retries + 1))
                    if [ $retries -lt $MAX_RETRIES ]; then
                        sleep $((2 ** retries))  # Exponential backoff
                    fi
                else
                    # For /health, any other status code is considered unhealthy
                    log "WARN" "Endpoint $endpoint returned HTTP $response_code (only 200 and 503 are acceptable for health)"
                    all_healthy=false
                    break  # Endpoint is unhealthy, move to next endpoint
                fi
            elif [ "$endpoint" = "/v1/chat/completions" ]; then
                # For chat/completions endpoint, only 200 is considered successful
                if [ "$response_code" = "200" ]; then
                    log "DEBUG" "Endpoint $endpoint returned HTTP $response_code"
                    break  # Endpoint is healthy, move to next endpoint
                elif [ "$response_code" = "000" ]; then
                    # Connection timeout or network error
                    log "WARN" "Connection failed to $endpoint (HTTP $response_code)"
                    retries=$((retries + 1))
                    if [ $retries -lt $MAX_RETRIES ]; then
                        sleep $((2 ** retries))  # Exponential backoff
                    fi
                else
                    # Any other response code is considered a failure for chat/completions
                    log "ERROR" "Endpoint $endpoint returned HTTP $response_code (expected 200)"
                    all_healthy=false
                    break  # Endpoint is unhealthy, move to next endpoint
                fi
            else
                # For all other endpoints, use the original logic
                if [ "$response_code" -ge 200 ] && [ "$response_code" -lt 400 ]; then
                    log "DEBUG" "Endpoint $endpoint returned HTTP $response_code"
                    break  # Endpoint is healthy, move to next endpoint
                elif [ "$response_code" = "000" ]; then
                    # Connection timeout or network error
                    log "WARN" "Connection failed to $endpoint (HTTP $response_code)"
                    retries=$((retries + 1))
                    if [ $retries -lt $MAX_RETRIES ]; then
                        sleep $((2 ** retries))  # Exponential backoff
                    fi
                else
                    # Other HTTP errors (4xx, 5xx, etc.)
                    log "DEBUG" "Endpoint $endpoint returned HTTP $response_code"
                    break  # Endpoint is healthy (got a response), move to next endpoint
                fi
            fi
        done

        # If we exhausted retries for this endpoint, mark as unhealthy
        if [ $retries -ge $MAX_RETRIES ]; then
            log "ERROR" "Failed to reach endpoint $endpoint after $MAX_RETRIES attempts"
            all_healthy=false
        fi
    done

    # Return the overall health status
    if [ "$all_healthy" = true ]; then
        log "INFO" "All health check endpoints passed"
        return 0
    else
        log "WARN" "One or more health check endpoints failed"
        return 1
    fi
 }

 # Function to reload model via llama-swap API (unload then load)
 reload_model() {
    log "INFO" "Attempting to reload model '$LLAMA_SWAP_MODEL_ID' via llama-swap API at $LLAMA_SWAP_URL"

    # Step 1: Check if model is currently loaded
    local check_url="${LLAMA_SWAP_URL}/upstream/${LLAMA_SWAP_MODEL_ID}/v1/models"
    log "INFO" "Checking if model is loaded: GET $check_url"
    local check_response
    check_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 -L \
        "$check_url" 2>/dev/null || echo "000")

    if [ "$check_response" -ge 200 ] && [ "$check_response" -lt 400 ]; then
        # Model is loaded, unload it first
        local unload_url="${LLAMA_SWAP_URL}/api/models/unload/${LLAMA_SWAP_MODEL_ID}"
        log "INFO" "Model is loaded, unloading: POST $unload_url"
        local unload_response
        unload_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 \
            -X POST "$unload_url" 2>/dev/null || echo "000")

        if [ "$unload_response" = "200" ]; then
            log "INFO" "Model '$LLAMA_SWAP_MODEL_ID' unloaded successfully (HTTP $unload_response)"
        else
            log "ERROR" "Failed to unload model '$LLAMA_SWAP_MODEL_ID' (HTTP $unload_response)"
            return 1
        fi

        # Brief pause to allow cleanup
        sleep 3
    else
        log "INFO" "Model '$LLAMA_SWAP_MODEL_ID' is not loaded (HTTP $check_response), skipping unload"
    fi

    # Step 2: Load the model by hitting its upstream endpoint
    local upstream_url="${LLAMA_SWAP_URL}/upstream/${LLAMA_SWAP_MODEL_ID}"
    log "INFO" "Loading model: GET $upstream_url"
    local load_response
    load_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 60 -L \
        "${upstream_url}/v1/models" 2>/dev/null || echo "000")

    if [ "$load_response" -ge 200 ] && [ "$load_response" -lt 400 ]; then
        log "INFO" "Model '$LLAMA_SWAP_MODEL_ID' loaded successfully (HTTP $load_response)"
    else
        log "ERROR" "Failed to load model '$LLAMA_SWAP_MODEL_ID' (HTTP $load_response)"
        return 1
    fi

    log "INFO" "Model '$LLAMA_SWAP_MODEL_ID' reloaded successfully"
    return 0
 }

 # Function to restart Llama.cpp (if enabled)
 restart_llama() {
    log "INFO" "Attempting to restart Llama.cpp service (process: $PROCESS_NAME)"

    # Kill any existing processes
    local pids=$(find_llama_processes)
    if [ -n "$pids" ]; then
        log "INFO" "Killing existing Llama.cpp processes: $pids"
        echo "$pids" | xargs kill -TERM 2>/dev/null || true
        sleep 5  # Wait for processes to terminate

        # Force kill if needed
        echo "$pids" | xargs kill -9 2>/dev/null || true
    fi

    # Start the Llama.cpp server with the specified command
    # Using a more generic approach for the restart command
    log "INFO" "Starting Llama.cpp with command: $PROCESS_NAME --host 0.0.0.0 --port 8080"
    $PROCESS_NAME -m ~/.cache/huggingface/hub/models--unsloth--Qwen3-Coder-30B-A3B-Instruct-GGUF/snapshots/b17cb02dd882d5b6ab62fc777ad2995f19668350/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf -c 131072 --host 0.0.0.0 --port 8080  -ngl 999 --threads 16 --jinja > ~/llama-server-output.log 2>&1 &
    echo $! > "$LLAMA_PID_FILE"

    log "INFO" "Llama.cpp restarted successfully"
 }

 # Run a single health check cycle
 run_check() {
    local pids=$(find_llama_processes)

    if [ -z "$pids" ]; then
        log "INFO" "No Llama.cpp processes found running"
        return
    fi

    log "INFO" "Found Llama.cpp processes with PIDs: $pids"

    for pid in $pids; do
        if ! is_process_running "$pid"; then
            log "WARN" "Process $pid is not running but still in pgrep results"
            continue
        fi

        log "INFO" "Checking health for process $pid"

        if test_api_health "http://localhost:8080"; then
            log "DEBUG" "Process $pid is healthy"
        else
            log "ERROR" "Process $pid is unhealthy"

            if [ "$RESTART_ENABLED" = true ]; then
                log "INFO" "Restart functionality enabled, attempting model reload via llama-swap"
                reload_model
            else
                log "INFO" "Restart functionality disabled, just logging failure"
            fi
        fi
    done
 }

 # Main entry point
 main() {
    log "INFO" "Starting $WATCHDOG_NAME for Llama.cpp monitoring (watching: $PROCESS_NAME)"

    if [ "$LOOP_MODE" = true ]; then
        log "INFO" "Running in loop mode (interval: ${CHECK_INTERVAL}s)"
        while true; do
            run_check
            sleep $CHECK_INTERVAL
        done
    else
        run_check
    fi
 }

 # If run directly (not sourced), execute main function
 if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    main "$@"
 fi
	#!/bin/bash

	# Llama.cpp Process Watchdog Script
	# This script monitors a Llama.cpp server process and checks its health status.
	# If the service fails to respond to API queries, it can optionally restart the process.
	#
	# Usage:
	# ./watchdog-llama.sh [process_name]
	# - If process_name is provided, it will monitor that specific process
	# - If not provided, it defaults to "/home/cj/tmp-llama/llama-server"
	# --verbose: Enable verbose output to STDOUT in addition to log file
	# --loop: Run continuously (default: run once and exit)
	# --log-level=[DEBUG\|INFO\|WARN\|ERROR]: Set log level

	set -euo pipefail

	# Configuration
	WATCHDOG_NAME="llama-watchdog"
	LOG_FILE="${HOME}/.local/log/llama-watchdog.log"
	CHECK_INTERVAL=30 # seconds
	MAX_RETRIES=3
	RESTART_ENABLED=true
	LLAMA_PID_FILE="/tmp/llama.pid" # Optional: specify where the pid file is stored
	LLAMA_SWAP_URL="http://localhost:8080"
	LLAMA_SWAP_MODEL_ID="Qwen3.6-35B"

	# Initialize flags
	VERBOSE=false
	LOOP_MODE=false
	LOG_LEVEL="WARN" # Default log level: WARN (equivalent to level 3)

	# Parse command line arguments
	while [[ $# -gt 0 ]]; do
	case $1 in
	--verbose)
	VERBOSE=true
	LOG_LEVEL="DEBUG" # When verbose is enabled, set log level to DEBUG
	shift
	;;
	--loop)
	LOOP_MODE=true
	shift
	;;
	--log-level=*)
	LOG_LEVEL="${1#*=}"
	shift
	;;
	--log-level)
	LOG_LEVEL="$2"
	shift 2
	;;
	*)
	# First non-flag argument is the process name
	if [ -z "${PROCESS_NAME:-}" ]; then
	PROCESS_NAME="$1"
	fi
	shift
	;;
	esac
	done

	# Set default process name if not provided
	if [ -z "${PROCESS_NAME:-}" ]; then
	PROCESS_NAME="/home/cj/tmp-llama/llama-server"
	fi

	# Log level constants
	DEBUG_LEVEL=1
	INFO_LEVEL=2
	WARN_LEVEL=3
	ERROR_LEVEL=4

	# Get numeric value for log level
	get_log_level_value() {
	case "$1" in
	"DEBUG") echo $DEBUG_LEVEL ;;
	"INFO") echo $INFO_LEVEL ;;
	"WARN") echo $WARN_LEVEL ;;
	"ERROR") echo $ERROR_LEVEL ;;
	*) echo $WARN_LEVEL ;; # Default to WARN if invalid level
	esac
	}

	# Logging function with level filtering
	log() {
	local level="$1"
	local message="$2"

	# Get the numeric value of the current log level
	local current_level_value=$(get_log_level_value "$LOG_LEVEL")

	# Get the numeric value of the message level
	local message_level_value=0
	case "$level" in
	"DEBUG") message_level_value=$DEBUG_LEVEL ;;
	"INFO") message_level_value=$INFO_LEVEL ;;
	"WARN") message_level_value=$WARN_LEVEL ;;
	"ERROR") message_level_value=$ERROR_LEVEL ;;
	*) message_level_value=$INFO_LEVEL ;; # Default to INFO for unknown levels
	esac

	# Only log if message level is >= current log level
	if [ $message_level_value -ge $current_level_value ]; then
	local formatted_message="[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message"
	if [ "$VERBOSE" = true ]; then
	echo "$formatted_message" \| tee -a "$LOG_FILE"
	else
	echo "$formatted_message" >> "$LOG_FILE"
	fi
	fi
	}

	# Signal handler for graceful shutdown
	cleanup() {
	log "INFO" "Received shutdown signal. Cleaning up..."
	exit 0
	}

	# Set up signal traps
	trap cleanup SIGTERM SIGINT

	# Check if curl is available
	if ! command -v curl &> /dev/null; then
	log "ERROR" "curl is not installed. Please install curl to use this watchdog."
	exit 1
	fi

	# Function to check if Llama.cpp process is running
	is_process_running() {
	local pid=$1
	if [ -z "$pid" ] \|\| ! kill -0 "$pid" 2>/dev/null; then
	return 1
	fi
	return 0
	}

	# Function to find Llama.cpp processes
	find_llama_processes() {
	# Look for processes containing the specified process name in their command line
	log "DEBUG" "Finding process running: $PROCESS_NAME" >&2
	pgrep -f "$PROCESS_NAME" 2>/dev/null \| grep -xv "$$" # Exclude current process (exact match)
	}

	# Function to test API health
	test_api_health() {
	local base_url="${1:-http://localhost:8080}"
	local endpoints=("/health" "/v1/models" "/v1/chat/completions")
	local all_healthy=true

	log "INFO" "Testing API health at $base_url"

	for endpoint in "${endpoints[@]}"; do
	local full_url="${base_url}${endpoint}"
	log "DEBUG" "Checking endpoint: $full_url"

	local response_code=0
	local retries=0

	# Try up to MAX_RETRIES times with exponential backoff
	while [ $retries -lt $MAX_RETRIES ]; do
	if [ "$endpoint" = "/v1/chat/completions" ]; then
	# Use POST request with JSON body for chat/completions endpoint
	response_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \
	-X POST \
	-H "Content-Type: application/json" \
	-d "{\"model\": \"${LLAMA_SWAP_MODEL_ID}\", \"messages\": [{\"role\":\"system\", \"content\":\"You are a helpful assistant.\"},{\"role\":\"user\",\"content\":\"hello!\"}]}" \
	"$full_url" 2>/dev/null \|\| echo "000")
	else
	# Using curl with timeout to avoid hanging for other endpoints
	response_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$full_url" 2>/dev/null \|\| echo "000")
	fi

	# Special handling for /health endpoint - allow 200 and 503 status codes
	if [ "$endpoint" = "/health" ]; then
	if [ "$response_code" = "200" ] \|\| [ "$response_code" = "503" ]; then
	log "DEBUG" "Endpoint $endpoint returned HTTP $response_code"
	break # Endpoint is healthy, move to next endpoint
	elif [ "$response_code" = "000" ]; then
	# Connection timeout or network error
	log "WARN" "Connection failed to $endpoint (HTTP $response_code)"
	retries=$((retries + 1))
	if [ $retries -lt $MAX_RETRIES ]; then
	sleep $((2 ** retries)) # Exponential backoff
	fi
	else
	# For /health, any other status code is considered unhealthy
	log "WARN" "Endpoint $endpoint returned HTTP $response_code (only 200 and 503 are acceptable for health)"
	all_healthy=false
	break # Endpoint is unhealthy, move to next endpoint
	fi
	elif [ "$endpoint" = "/v1/chat/completions" ]; then
	# For chat/completions endpoint, only 200 is considered successful
	if [ "$response_code" = "200" ]; then
	log "DEBUG" "Endpoint $endpoint returned HTTP $response_code"
	break # Endpoint is healthy, move to next endpoint
	elif [ "$response_code" = "000" ]; then
	# Connection timeout or network error
	log "WARN" "Connection failed to $endpoint (HTTP $response_code)"
	retries=$((retries + 1))
	if [ $retries -lt $MAX_RETRIES ]; then
	sleep $((2 ** retries)) # Exponential backoff
	fi
	else
	# Any other response code is considered a failure for chat/completions
	log "ERROR" "Endpoint $endpoint returned HTTP $response_code (expected 200)"
	all_healthy=false
	break # Endpoint is unhealthy, move to next endpoint
	fi
	else
	# For all other endpoints, use the original logic
	if [ "$response_code" -ge 200 ] && [ "$response_code" -lt 400 ]; then
	log "DEBUG" "Endpoint $endpoint returned HTTP $response_code"
	break # Endpoint is healthy, move to next endpoint
	elif [ "$response_code" = "000" ]; then
	# Connection timeout or network error
	log "WARN" "Connection failed to $endpoint (HTTP $response_code)"
	retries=$((retries + 1))
	if [ $retries -lt $MAX_RETRIES ]; then
	sleep $((2 ** retries)) # Exponential backoff
	fi
	else
	# Other HTTP errors (4xx, 5xx, etc.)
	log "DEBUG" "Endpoint $endpoint returned HTTP $response_code"
	break # Endpoint is healthy (got a response), move to next endpoint
	fi
	fi
	done

	# If we exhausted retries for this endpoint, mark as unhealthy
	if [ $retries -ge $MAX_RETRIES ]; then
	log "ERROR" "Failed to reach endpoint $endpoint after $MAX_RETRIES attempts"
	all_healthy=false
	fi
	done

	# Return the overall health status
	if [ "$all_healthy" = true ]; then
	log "INFO" "All health check endpoints passed"
	return 0
	else
	log "WARN" "One or more health check endpoints failed"
	return 1
	fi
	}

	# Function to reload model via llama-swap API (unload then load)
	reload_model() {
	log "INFO" "Attempting to reload model '$LLAMA_SWAP_MODEL_ID' via llama-swap API at $LLAMA_SWAP_URL"

	# Step 1: Check if model is currently loaded
	local check_url="${LLAMA_SWAP_URL}/upstream/${LLAMA_SWAP_MODEL_ID}/v1/models"
	log "INFO" "Checking if model is loaded: GET $check_url"
	local check_response
	check_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 -L \
	"$check_url" 2>/dev/null \|\| echo "000")

	if [ "$check_response" -ge 200 ] && [ "$check_response" -lt 400 ]; then
	# Model is loaded, unload it first
	local unload_url="${LLAMA_SWAP_URL}/api/models/unload/${LLAMA_SWAP_MODEL_ID}"
	log "INFO" "Model is loaded, unloading: POST $unload_url"
	local unload_response
	unload_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 \
	-X POST "$unload_url" 2>/dev/null \|\| echo "000")

	if [ "$unload_response" = "200" ]; then
	log "INFO" "Model '$LLAMA_SWAP_MODEL_ID' unloaded successfully (HTTP $unload_response)"
	else
	log "ERROR" "Failed to unload model '$LLAMA_SWAP_MODEL_ID' (HTTP $unload_response)"
	return 1
	fi

	# Brief pause to allow cleanup
	sleep 3
	else
	log "INFO" "Model '$LLAMA_SWAP_MODEL_ID' is not loaded (HTTP $check_response), skipping unload"
	fi

	# Step 2: Load the model by hitting its upstream endpoint
	local upstream_url="${LLAMA_SWAP_URL}/upstream/${LLAMA_SWAP_MODEL_ID}"
	log "INFO" "Loading model: GET $upstream_url"
	local load_response
	load_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 60 -L \
	"${upstream_url}/v1/models" 2>/dev/null \|\| echo "000")

	if [ "$load_response" -ge 200 ] && [ "$load_response" -lt 400 ]; then
	log "INFO" "Model '$LLAMA_SWAP_MODEL_ID' loaded successfully (HTTP $load_response)"
	else
	log "ERROR" "Failed to load model '$LLAMA_SWAP_MODEL_ID' (HTTP $load_response)"
	return 1
	fi

	log "INFO" "Model '$LLAMA_SWAP_MODEL_ID' reloaded successfully"
	return 0
	}

	# Function to restart Llama.cpp (if enabled)
	restart_llama() {
	log "INFO" "Attempting to restart Llama.cpp service (process: $PROCESS_NAME)"

	# Kill any existing processes
	local pids=$(find_llama_processes)
	if [ -n "$pids" ]; then
	log "INFO" "Killing existing Llama.cpp processes: $pids"
	echo "$pids" \| xargs kill -TERM 2>/dev/null \|\| true
	sleep 5 # Wait for processes to terminate

	# Force kill if needed
	echo "$pids" \| xargs kill -9 2>/dev/null \|\| true
	fi

	# Start the Llama.cpp server with the specified command
	# Using a more generic approach for the restart command
	log "INFO" "Starting Llama.cpp with command: $PROCESS_NAME --host 0.0.0.0 --port 8080"
	$PROCESS_NAME -m ~/.cache/huggingface/hub/models--unsloth--Qwen3-Coder-30B-A3B-Instruct-GGUF/snapshots/b17cb02dd882d5b6ab62fc777ad2995f19668350/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf -c 131072 --host 0.0.0.0 --port 8080 -ngl 999 --threads 16 --jinja > ~/llama-server-output.log 2>&1 &
	echo $! > "$LLAMA_PID_FILE"

	log "INFO" "Llama.cpp restarted successfully"
	}

	# Run a single health check cycle
	run_check() {
	local pids=$(find_llama_processes)

	if [ -z "$pids" ]; then
	log "INFO" "No Llama.cpp processes found running"
	return
	fi

	log "INFO" "Found Llama.cpp processes with PIDs: $pids"

	for pid in $pids; do
	if ! is_process_running "$pid"; then
	log "WARN" "Process $pid is not running but still in pgrep results"
	continue
	fi

	log "INFO" "Checking health for process $pid"

	if test_api_health "http://localhost:8080"; then
	log "DEBUG" "Process $pid is healthy"
	else
	log "ERROR" "Process $pid is unhealthy"

	if [ "$RESTART_ENABLED" = true ]; then
	log "INFO" "Restart functionality enabled, attempting model reload via llama-swap"
	reload_model
	else
	log "INFO" "Restart functionality disabled, just logging failure"
	fi
	fi
	done
	}

	# Main entry point
	main() {
	log "INFO" "Starting $WATCHDOG_NAME for Llama.cpp monitoring (watching: $PROCESS_NAME)"

	if [ "$LOOP_MODE" = true ]; then
	log "INFO" "Running in loop mode (interval: ${CHECK_INTERVAL}s)"
	while true; do
	run_check
	sleep $CHECK_INTERVAL
	done
	else
	run_check
	fi
	}

	# If run directly (not sourced), execute main function
	if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
	main "$@"
	fi
No results found