aleenprd · July 26, 2025 19:03
diff --git a/start-llamacpp-server-docker.sh b/start-llamacpp-server-docker.sh
 #!/bin/bash

 # Llama.cpp Docker Server Launcher
 # 
 # This script can be configured using environment variables and/or command-line arguments.
 # Command-line arguments take precedence over environment variables.
 #
 # Environment variables:
 #   LLAMA_HOST           - Server host (default: 0.0.0.0)
 #   LLAMA_PORT           - Server port (default: 8000)
 #   LLAMA_MODELS_PATH    - Path to model files (default: /models)
 #   LLAMA_CONTEXT_SIZE   - Context size (default: 512)
 #   LLAMA_GPU_LAYERS     - GPU layers (default: 99)
 #   LLAMA_LOG_FILE       - Log file name (default: llama-server.log)
 #   LLAMA_CPP_IMAGE      - Docker image (default: ghcr.io/ggml-org/llama.cpp:full)
 #
 # Command-line arguments (override environment variables):
 #   --host               - Server host
 #   -p, --port           - Server port
 #   -m, --models-path    - Path to model files
 #   -c, --context-size   - Context size
 #   -g, --gpu-layers     - GPU layers
 #   -l, --log-file       - Log file name
 #   -i, --image          - Docker image
 #   -h, --help           - Show this help message
 #
 # Example usage:
 #   ./start-docker-server.sh -c 1024 -g 50 -l my-server.log
 #   LLAMA_PORT=8080 ./start-docker-server.sh --context-size 2048 --port 9000

 # Check if .env file exists and ask user if they want to source it
 if [ -f ".env" ]; then
    echo "Found .env file in current directory."
    echo "Do you want to source the .env file to load environment variables? (y/n):"
    read -r source_env
    if [[ "$source_env" =~ ^[Yy]$ ]]; then
        source .env
        echo "Environment variables loaded from .env file."
    else
        echo "Skipping .env file."
    fi
    echo ""
 fi

 # Set default values from environment variables with fallbacks
 HOST=${LLAMA_HOST:-0.0.0.0}
 SERVER_PORT=${LLAMA_PORT:-8000}
 MODELS_PATH=${LLAMA_MODELS_PATH:-/models}
 CONTEXT_SIZE=${LLAMA_CONTEXT_SIZE:-512}
 GPU_LAYERS=${LLAMA_GPU_LAYERS:-99}
 LOG_FILE=${LLAMA_LOG_FILE:-llama-server.log}
 LLAMA_CPP_IMAGE=${LLAMA_CPP_IMAGE:-ghcr.io/ggml-org/llama.cpp:full}

 # Parse command-line arguments (these override environment variables)
 while [[ $# -gt 0 ]]; do
    case $1 in
        --host)
            HOST="$2"
            shift 2
            ;;
        -p|--port)
            SERVER_PORT="$2"
            shift 2
            ;;
        -m|--models-path)
            MODELS_PATH="$2"
            shift 2
            ;;
        -c|--context-size)
            CONTEXT_SIZE="$2"
            shift 2
            ;;
        -g|--gpu-layers)
            GPU_LAYERS="$2"
            shift 2
            ;;
        -l|--log-file)
            LOG_FILE="$2"
            shift 2
            ;;
        -i|--image)
            LLAMA_CPP_IMAGE="$2"
            shift 2
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo ""
            echo "Options:"
            echo "  --host               Server host (default: 0.0.0.0)"
            echo "  -p, --port           Server port (default: 8000)"
            echo "  -m, --models-path    Path to model files (default: /models)"
            echo "  -c, --context-size   Context size (default: 512)"
            echo "  -g, --gpu-layers     GPU layers (default: 99)"
            echo "  -l, --log-file       Log file name (default: llama-server.log)"
            echo "  -i, --image          Docker image (default: ghcr.io/ggml-org/llama.cpp:full)"
            echo "  -h, --help           Show this help message"
            echo ""
            echo "Environment variables (overridden by command-line arguments):"
            echo "  LLAMA_HOST           Server host"
            echo "  LLAMA_PORT           Server port"
            echo "  LLAMA_MODELS_PATH    Path to model files"
            echo "  LLAMA_CONTEXT_SIZE   Context size"
            echo "  LLAMA_GPU_LAYERS     GPU layers"
            echo "  LLAMA_LOG_FILE       Log file name"
            echo "  LLAMA_CPP_IMAGE      Docker image"
            echo ""
            echo "Examples:"
            echo "  $0 -c 1024 -g 50 -l my-server.log"
            echo "  LLAMA_PORT=8080 $0 --context-size 2048 --port 9000"
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            echo "Use -h or --help for usage information."
            exit 1
            ;;
    esac
 done

 # Display current configuration
 echo "Configuration:"
 echo "  Host: $HOST"
 echo "  Port: $SERVER_PORT"
 echo "  Models Path: $MODELS_PATH"
 echo "  Context Size: $CONTEXT_SIZE"
 echo "  GPU Layers: $GPU_LAYERS"
 echo "  Log File: $LOG_FILE"
 echo ""

 # List the available model families (subdirectories) in the specified directory
 echo "Available model families in $MODELS_PATH:"
 echo ""

 # Create an array of model family directories
 families=()
 family_paths=()
 while IFS= read -r -d '' dir; do
    if [ -d "$dir" ]; then
        families+=("$(basename "$dir")")
        family_paths+=("$dir")
    fi
 done < <(find "$MODELS_PATH" -maxdepth 1 -type d ! -path "$MODELS_PATH" -print0 2>/dev/null)

 # Check if any model families were found
 if [ ${#families[@]} -eq 0 ]; then
    echo "No model family directories found in $MODELS_PATH"
    exit 1
 fi

 # Display numbered list of model families
 for i in "${!families[@]}"; do
    # Count models in this family
    model_count=$(find "${family_paths[i]}" -maxdepth 1 -type f -name "*.gguf" 2>/dev/null | wc -l)
    echo "$((i+1)). ${families[i]} ($model_count models)"
 done

 echo ""
 echo "Please select a model family by entering its number (1-${#families[@]}):"
 read -r family_selection

 # Validate family selection
 if ! [[ "$family_selection" =~ ^[0-9]+$ ]] || [ "$family_selection" -lt 1 ] || [ "$family_selection" -gt ${#families[@]} ]; then
    echo "Invalid selection. Please run the script again and select a number between 1 and ${#families[@]}."
    exit 1
 fi

 # Store the selected family path
 SELECTED_FAMILY_PATH="${family_paths[$((family_selection-1))]}"
 SELECTED_FAMILY_NAME="${families[$((family_selection-1))]}"
 echo "Selected model family: $SELECTED_FAMILY_NAME"
 echo ""

 # Now list the available models in the selected family directory
 echo "Available models in $SELECTED_FAMILY_NAME:"
 echo ""

 # Create an array of model files in the selected family
 models=()
 model_paths=()
 while IFS= read -r -d '' file; do
    models+=("$(basename "$file")")
    model_paths+=("$file")
 done < <(find "$SELECTED_FAMILY_PATH" -maxdepth 1 -type f -name "*.gguf" -print0 2>/dev/null)

 # Check if any models were found in the selected family
 if [ ${#models[@]} -eq 0 ]; then
    echo "No .gguf model files found in $SELECTED_FAMILY_PATH"
    exit 1
 fi

 # Function to format file size
 format_size() {
    local size=$1
    if [ $size -ge 1073741824 ]; then
        echo "$(echo "scale=1; $size / 1073741824" | bc)GB"
    elif [ $size -ge 1048576 ]; then
        echo "$(echo "scale=1; $size / 1048576" | bc)MB"
    elif [ $size -ge 1024 ]; then
        echo "$(echo "scale=1; $size / 1024" | bc)KB"
    else
        echo "${size}B"
    fi
 }

 # Display numbered list of models with file sizes
 for i in "${!models[@]}"; do
    file_size=$(stat -c%s "${model_paths[i]}" 2>/dev/null || stat -f%z "${model_paths[i]}" 2>/dev/null || echo "0")
    formatted_size=$(format_size $file_size)
    echo "$((i+1)). ${models[i]} ($formatted_size)"
 done

 echo ""
 echo "Please select a model by entering its number (1-${#models[@]}):"
 read -r selection

 # Validate selection
 if ! [[ "$selection" =~ ^[0-9]+$ ]] || [ "$selection" -lt 1 ] || [ "$selection" -gt ${#models[@]} ]; then
    echo "Invalid selection. Please run the script again and select a number between 1 and ${#models[@]}."
    exit 1
 fi

 # Store the selected model name and relative path for Docker
 MODEL_NAME="${models[$((selection-1))]}"
 MODEL_RELATIVE_PATH="$SELECTED_FAMILY_NAME/$MODEL_NAME"
 echo "Selected model: $MODEL_RELATIVE_PATH"

 # Ask user for run mode preference
 echo ""
 echo "How would you like to run the server?"
 echo "1. Foreground mode (press Ctrl+C to stop)"
 echo "2. Detached mode (runs in background)"
 echo ""
 echo "Please select run mode (1-2):"
 read -r run_mode

 # Validate run mode selection
 if ! [[ "$run_mode" =~ ^[0-9]+$ ]] || [ "$run_mode" -lt 1 ] || [ "$run_mode" -gt 2 ]; then
    echo "Invalid selection. Please run the script again and select 1 or 2."
    exit 1
 fi

 # This script starts the server and waits for it to be ready.
 echo ""
 echo "Starting the server with model: $MODEL_NAME"
 echo ""

 # Check if Docker daemon is running
 if ! docker info >/dev/null 2>&1; then
    echo "Docker daemon is not running."
    echo "Attempting to start Docker daemon..."
    
    # Try to start Docker using systemctl (most common)
    if command -v systemctl >/dev/null 2>&1; then
        sudo systemctl start docker
        if [ $? -eq 0 ]; then
            echo "Docker daemon started successfully."
            # Wait a moment for Docker to fully initialize
            sleep 2
        else
            echo "Failed to start Docker daemon using systemctl."
            echo "Please start Docker manually and try again."
            exit 1
        fi
    # Try service command as fallback
    elif command -v service >/dev/null 2>&1; then
        sudo service docker start
        if [ $? -eq 0 ]; then
            echo "Docker daemon started successfully."
            sleep 2
        else
            echo "Failed to start Docker daemon using service command."
            echo "Please start Docker manually and try again."
            exit 1
        fi
    else
        echo "Cannot automatically start Docker daemon."
        echo "Please start Docker manually using one of these commands:"
        echo "  sudo systemctl start docker"
        echo "  sudo service docker start"
        echo "  sudo dockerd"
        exit 1
    fi
    
    # Verify Docker is now running
    if ! docker info >/dev/null 2>&1; then
        echo "Docker daemon is still not accessible after start attempt."
        echo "Please check Docker installation and permissions."
        exit 1
    fi
 fi

 # Check if container already exists
 if docker ps -a --format "table {{.Names}}" | grep -q "^llama-cpp-server$"; then
    echo "A container named 'llama-cpp-server' already exists."
    
    # Check if it's running
    if docker ps --format "table {{.Names}}" | grep -q "^llama-cpp-server$"; then
        echo "The container is currently running."
        echo "Do you want to stop and remove it to start a new one? (y/n):"
    else
        echo "The container is stopped."
        echo "Do you want to remove it to start a new one? (y/n):"
    fi
    
    read -r remove_choice
    
    if [[ "$remove_choice" =~ ^[Yy]$ ]]; then
        echo "Stopping and removing existing container..."
        docker stop llama-cpp-server 2>/dev/null || true
        docker rm llama-cpp-server 2>/dev/null || true
        echo "Existing container removed."
    else
        echo "Cannot start new container. Exiting."
        exit 1
    fi
 fi

 if [ "$run_mode" -eq 1 ]; then
    # Run in foreground mode
    echo "Starting server in foreground mode (press Ctrl+C to stop)..."
    docker run --name llama-cpp-server --gpus all \
        -v $MODELS_PATH:/models -p $SERVER_PORT:$SERVER_PORT $LLAMA_CPP_IMAGE \
        -m /models/$MODEL_RELATIVE_PATH --port $SERVER_PORT --host $HOST -n $CONTEXT_SIZE -ngl $GPU_LAYERS
 else
    # Run in detached mode
    echo "Starting server in detached mode..."
    CONTAINER_ID=$(docker run -d --gpus all --name llama-cpp-server \
        -v $MODELS_PATH:/models -p $SERVER_PORT:$SERVER_PORT $LLAMA_CPP_IMAGE \
        -m /models/$MODEL_RELATIVE_PATH --port $SERVER_PORT --host $HOST -n $CONTEXT_SIZE -ngl $GPU_LAYERS)
    
    echo "Server started in background with container ID: $CONTAINER_ID"
    echo "Server will be available at http://localhost:$SERVER_PORT"
    echo ""
    echo "To check server logs: docker logs llama-cpp-server"
    echo "To follow logs: docker logs -f llama-cpp-server"
    echo "To stop the server: docker stop llama-cpp-server"
    echo "To remove the container: docker rm llama-cpp-server"
 fi
 fi
	#!/bin/bash

	# Llama.cpp Docker Server Launcher
	#
	# This script can be configured using environment variables and/or command-line arguments.
	# Command-line arguments take precedence over environment variables.
	#
	# Environment variables:
	# LLAMA_HOST - Server host (default: 0.0.0.0)
	# LLAMA_PORT - Server port (default: 8000)
	# LLAMA_MODELS_PATH - Path to model files (default: /models)
	# LLAMA_CONTEXT_SIZE - Context size (default: 512)
	# LLAMA_GPU_LAYERS - GPU layers (default: 99)
	# LLAMA_LOG_FILE - Log file name (default: llama-server.log)
	# LLAMA_CPP_IMAGE - Docker image (default: ghcr.io/ggml-org/llama.cpp:full)
	#
	# Command-line arguments (override environment variables):
	# --host - Server host
	# -p, --port - Server port
	# -m, --models-path - Path to model files
	# -c, --context-size - Context size
	# -g, --gpu-layers - GPU layers
	# -l, --log-file - Log file name
	# -i, --image - Docker image
	# -h, --help - Show this help message
	#
	# Example usage:
	# ./start-docker-server.sh -c 1024 -g 50 -l my-server.log
	# LLAMA_PORT=8080 ./start-docker-server.sh --context-size 2048 --port 9000

	# Check if .env file exists and ask user if they want to source it
	if [ -f ".env" ]; then
	echo "Found .env file in current directory."
	echo "Do you want to source the .env file to load environment variables? (y/n):"
	read -r source_env
	if [[ "$source_env" =~ ^[Yy]$ ]]; then
	source .env
	echo "Environment variables loaded from .env file."
	else
	echo "Skipping .env file."
	fi
	echo ""
	fi

	# Set default values from environment variables with fallbacks
	HOST=${LLAMA_HOST:-0.0.0.0}
	SERVER_PORT=${LLAMA_PORT:-8000}
	MODELS_PATH=${LLAMA_MODELS_PATH:-/models}
	CONTEXT_SIZE=${LLAMA_CONTEXT_SIZE:-512}
	GPU_LAYERS=${LLAMA_GPU_LAYERS:-99}
	LOG_FILE=${LLAMA_LOG_FILE:-llama-server.log}
	LLAMA_CPP_IMAGE=${LLAMA_CPP_IMAGE:-ghcr.io/ggml-org/llama.cpp:full}

	# Parse command-line arguments (these override environment variables)
	while [[ $# -gt 0 ]]; do
	case $1 in
	--host)
	HOST="$2"
	shift 2
	;;
	-p\|--port)
	SERVER_PORT="$2"
	shift 2
	;;
	-m\|--models-path)
	MODELS_PATH="$2"
	shift 2
	;;
	-c\|--context-size)
	CONTEXT_SIZE="$2"
	shift 2
	;;
	-g\|--gpu-layers)
	GPU_LAYERS="$2"
	shift 2
	;;
	-l\|--log-file)
	LOG_FILE="$2"
	shift 2
	;;
	-i\|--image)
	LLAMA_CPP_IMAGE="$2"
	shift 2
	;;
	-h\|--help)
	echo "Usage: $0 [OPTIONS]"
	echo ""
	echo "Options:"
	echo " --host Server host (default: 0.0.0.0)"
	echo " -p, --port Server port (default: 8000)"
	echo " -m, --models-path Path to model files (default: /models)"
	echo " -c, --context-size Context size (default: 512)"
	echo " -g, --gpu-layers GPU layers (default: 99)"
	echo " -l, --log-file Log file name (default: llama-server.log)"
	echo " -i, --image Docker image (default: ghcr.io/ggml-org/llama.cpp:full)"
	echo " -h, --help Show this help message"
	echo ""
	echo "Environment variables (overridden by command-line arguments):"
	echo " LLAMA_HOST Server host"
	echo " LLAMA_PORT Server port"
	echo " LLAMA_MODELS_PATH Path to model files"
	echo " LLAMA_CONTEXT_SIZE Context size"
	echo " LLAMA_GPU_LAYERS GPU layers"
	echo " LLAMA_LOG_FILE Log file name"
	echo " LLAMA_CPP_IMAGE Docker image"
	echo ""
	echo "Examples:"
	echo " $0 -c 1024 -g 50 -l my-server.log"
	echo " LLAMA_PORT=8080 $0 --context-size 2048 --port 9000"
	exit 0
	;;
	*)
	echo "Unknown option: $1"
	echo "Use -h or --help for usage information."
	exit 1
	;;
	esac
	done

	# Display current configuration
	echo "Configuration:"
	echo " Host: $HOST"
	echo " Port: $SERVER_PORT"
	echo " Models Path: $MODELS_PATH"
	echo " Context Size: $CONTEXT_SIZE"
	echo " GPU Layers: $GPU_LAYERS"
	echo " Log File: $LOG_FILE"
	echo ""

	# List the available model families (subdirectories) in the specified directory
	echo "Available model families in $MODELS_PATH:"
	echo ""

	# Create an array of model family directories
	families=()
	family_paths=()
	while IFS= read -r -d '' dir; do
	if [ -d "$dir" ]; then
	families+=("$(basename "$dir")")
	family_paths+=("$dir")
	fi
	done < <(find "$MODELS_PATH" -maxdepth 1 -type d ! -path "$MODELS_PATH" -print0 2>/dev/null)

	# Check if any model families were found
	if [ ${#families[@]} -eq 0 ]; then
	echo "No model family directories found in $MODELS_PATH"
	exit 1
	fi

	# Display numbered list of model families
	for i in "${!families[@]}"; do
	# Count models in this family
	model_count=$(find "${family_paths[i]}" -maxdepth 1 -type f -name "*.gguf" 2>/dev/null \| wc -l)
	echo "$((i+1)). ${families[i]} ($model_count models)"
	done

	echo ""
	echo "Please select a model family by entering its number (1-${#families[@]}):"
	read -r family_selection

	# Validate family selection
	if ! [[ "$family_selection" =~ ^[0-9]+$ ]] \|\| [ "$family_selection" -lt 1 ] \|\| [ "$family_selection" -gt ${#families[@]} ]; then
	echo "Invalid selection. Please run the script again and select a number between 1 and ${#families[@]}."
	exit 1
	fi

	# Store the selected family path
	SELECTED_FAMILY_PATH="${family_paths[$((family_selection-1))]}"
	SELECTED_FAMILY_NAME="${families[$((family_selection-1))]}"
	echo "Selected model family: $SELECTED_FAMILY_NAME"
	echo ""

	# Now list the available models in the selected family directory
	echo "Available models in $SELECTED_FAMILY_NAME:"
	echo ""

	# Create an array of model files in the selected family
	models=()
	model_paths=()
	while IFS= read -r -d '' file; do
	models+=("$(basename "$file")")
	model_paths+=("$file")
	done < <(find "$SELECTED_FAMILY_PATH" -maxdepth 1 -type f -name "*.gguf" -print0 2>/dev/null)

	# Check if any models were found in the selected family
	if [ ${#models[@]} -eq 0 ]; then
	echo "No .gguf model files found in $SELECTED_FAMILY_PATH"
	exit 1
	fi

	# Function to format file size
	format_size() {
	local size=$1
	if [ $size -ge 1073741824 ]; then
	echo "$(echo "scale=1; $size / 1073741824" \| bc)GB"
	elif [ $size -ge 1048576 ]; then
	echo "$(echo "scale=1; $size / 1048576" \| bc)MB"
	elif [ $size -ge 1024 ]; then
	echo "$(echo "scale=1; $size / 1024" \| bc)KB"
	else
	echo "${size}B"
	fi
	}

	# Display numbered list of models with file sizes
	for i in "${!models[@]}"; do
	file_size=$(stat -c%s "${model_paths[i]}" 2>/dev/null \|\| stat -f%z "${model_paths[i]}" 2>/dev/null \|\| echo "0")
	formatted_size=$(format_size $file_size)
	echo "$((i+1)). ${models[i]} ($formatted_size)"
	done

	echo ""
	echo "Please select a model by entering its number (1-${#models[@]}):"
	read -r selection

	# Validate selection
	if ! [[ "$selection" =~ ^[0-9]+$ ]] \|\| [ "$selection" -lt 1 ] \|\| [ "$selection" -gt ${#models[@]} ]; then
	echo "Invalid selection. Please run the script again and select a number between 1 and ${#models[@]}."
	exit 1
	fi

	# Store the selected model name and relative path for Docker
	MODEL_NAME="${models[$((selection-1))]}"
	MODEL_RELATIVE_PATH="$SELECTED_FAMILY_NAME/$MODEL_NAME"
	echo "Selected model: $MODEL_RELATIVE_PATH"

	# Ask user for run mode preference
	echo ""
	echo "How would you like to run the server?"
	echo "1. Foreground mode (press Ctrl+C to stop)"
	echo "2. Detached mode (runs in background)"
	echo ""
	echo "Please select run mode (1-2):"
	read -r run_mode

	# Validate run mode selection
	if ! [[ "$run_mode" =~ ^[0-9]+$ ]] \|\| [ "$run_mode" -lt 1 ] \|\| [ "$run_mode" -gt 2 ]; then
	echo "Invalid selection. Please run the script again and select 1 or 2."
	exit 1
	fi

	# This script starts the server and waits for it to be ready.
	echo ""
	echo "Starting the server with model: $MODEL_NAME"
	echo ""

	# Check if Docker daemon is running
	if ! docker info >/dev/null 2>&1; then
	echo "Docker daemon is not running."
	echo "Attempting to start Docker daemon..."

	# Try to start Docker using systemctl (most common)
	if command -v systemctl >/dev/null 2>&1; then
	sudo systemctl start docker
	if [ $? -eq 0 ]; then
	echo "Docker daemon started successfully."
	# Wait a moment for Docker to fully initialize
	sleep 2
	else
	echo "Failed to start Docker daemon using systemctl."
	echo "Please start Docker manually and try again."
	exit 1
	fi
	# Try service command as fallback
	elif command -v service >/dev/null 2>&1; then
	sudo service docker start
	if [ $? -eq 0 ]; then
	echo "Docker daemon started successfully."
	sleep 2
	else
	echo "Failed to start Docker daemon using service command."
	echo "Please start Docker manually and try again."
	exit 1
	fi
	else
	echo "Cannot automatically start Docker daemon."
	echo "Please start Docker manually using one of these commands:"
	echo " sudo systemctl start docker"
	echo " sudo service docker start"
	echo " sudo dockerd"
	exit 1
	fi

	# Verify Docker is now running
	if ! docker info >/dev/null 2>&1; then
	echo "Docker daemon is still not accessible after start attempt."
	echo "Please check Docker installation and permissions."
	exit 1
	fi
	fi

	# Check if container already exists
	if docker ps -a --format "table {{.Names}}" \| grep -q "^llama-cpp-server$"; then
	echo "A container named 'llama-cpp-server' already exists."

	# Check if it's running
	if docker ps --format "table {{.Names}}" \| grep -q "^llama-cpp-server$"; then
	echo "The container is currently running."
	echo "Do you want to stop and remove it to start a new one? (y/n):"
	else
	echo "The container is stopped."
	echo "Do you want to remove it to start a new one? (y/n):"
	fi

	read -r remove_choice

	if [[ "$remove_choice" =~ ^[Yy]$ ]]; then
	echo "Stopping and removing existing container..."
	docker stop llama-cpp-server 2>/dev/null \|\| true
	docker rm llama-cpp-server 2>/dev/null \|\| true
	echo "Existing container removed."
	else
	echo "Cannot start new container. Exiting."
	exit 1
	fi
	fi

	if [ "$run_mode" -eq 1 ]; then
	# Run in foreground mode
	echo "Starting server in foreground mode (press Ctrl+C to stop)..."
	docker run --name llama-cpp-server --gpus all \
	-v $MODELS_PATH:/models -p $SERVER_PORT:$SERVER_PORT $LLAMA_CPP_IMAGE \
	-m /models/$MODEL_RELATIVE_PATH --port $SERVER_PORT --host $HOST -n $CONTEXT_SIZE -ngl $GPU_LAYERS
	else
	# Run in detached mode
	echo "Starting server in detached mode..."
	CONTAINER_ID=$(docker run -d --gpus all --name llama-cpp-server \
	-v $MODELS_PATH:/models -p $SERVER_PORT:$SERVER_PORT $LLAMA_CPP_IMAGE \
	-m /models/$MODEL_RELATIVE_PATH --port $SERVER_PORT --host $HOST -n $CONTEXT_SIZE -ngl $GPU_LAYERS)

	echo "Server started in background with container ID: $CONTAINER_ID"
	echo "Server will be available at http://localhost:$SERVER_PORT"
	echo ""
	echo "To check server logs: docker logs llama-cpp-server"
	echo "To follow logs: docker logs -f llama-cpp-server"
	echo "To stop the server: docker stop llama-cpp-server"
	echo "To remove the container: docker rm llama-cpp-server"
	fi
	fi