Skip to content

Instantly share code, notes, and snippets.

@aleenprd
Created July 26, 2025 19:03
Show Gist options
  • Save aleenprd/3b3905606653b75b43c37542181bc073 to your computer and use it in GitHub Desktop.
Save aleenprd/3b3905606653b75b43c37542181bc073 to your computer and use it in GitHub Desktop.
Start a docker server for llama.cpp
#!/bin/bash
# Llama.cpp Docker Server Launcher
#
# This script can be configured using environment variables and/or command-line arguments.
# Command-line arguments take precedence over environment variables.
#
# Environment variables:
# LLAMA_HOST - Server host (default: 0.0.0.0)
# LLAMA_PORT - Server port (default: 8000)
# LLAMA_MODELS_PATH - Path to model files (default: /models)
# LLAMA_CONTEXT_SIZE - Context size (default: 512)
# LLAMA_GPU_LAYERS - GPU layers (default: 99)
# LLAMA_LOG_FILE - Log file name (default: llama-server.log)
# LLAMA_CPP_IMAGE - Docker image (default: ghcr.io/ggml-org/llama.cpp:full)
#
# Command-line arguments (override environment variables):
# --host - Server host
# -p, --port - Server port
# -m, --models-path - Path to model files
# -c, --context-size - Context size
# -g, --gpu-layers - GPU layers
# -l, --log-file - Log file name
# -i, --image - Docker image
# -h, --help - Show this help message
#
# Example usage:
# ./start-docker-server.sh -c 1024 -g 50 -l my-server.log
# LLAMA_PORT=8080 ./start-docker-server.sh --context-size 2048 --port 9000
# Check if .env file exists and ask user if they want to source it
if [ -f ".env" ]; then
echo "Found .env file in current directory."
echo "Do you want to source the .env file to load environment variables? (y/n):"
read -r source_env
if [[ "$source_env" =~ ^[Yy]$ ]]; then
source .env
echo "Environment variables loaded from .env file."
else
echo "Skipping .env file."
fi
echo ""
fi
# Set default values from environment variables with fallbacks
HOST=${LLAMA_HOST:-0.0.0.0}
SERVER_PORT=${LLAMA_PORT:-8000}
MODELS_PATH=${LLAMA_MODELS_PATH:-/models}
CONTEXT_SIZE=${LLAMA_CONTEXT_SIZE:-512}
GPU_LAYERS=${LLAMA_GPU_LAYERS:-99}
LOG_FILE=${LLAMA_LOG_FILE:-llama-server.log}
LLAMA_CPP_IMAGE=${LLAMA_CPP_IMAGE:-ghcr.io/ggml-org/llama.cpp:full}
# Parse command-line arguments (these override environment variables)
while [[ $# -gt 0 ]]; do
case $1 in
--host)
HOST="$2"
shift 2
;;
-p|--port)
SERVER_PORT="$2"
shift 2
;;
-m|--models-path)
MODELS_PATH="$2"
shift 2
;;
-c|--context-size)
CONTEXT_SIZE="$2"
shift 2
;;
-g|--gpu-layers)
GPU_LAYERS="$2"
shift 2
;;
-l|--log-file)
LOG_FILE="$2"
shift 2
;;
-i|--image)
LLAMA_CPP_IMAGE="$2"
shift 2
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " --host Server host (default: 0.0.0.0)"
echo " -p, --port Server port (default: 8000)"
echo " -m, --models-path Path to model files (default: /models)"
echo " -c, --context-size Context size (default: 512)"
echo " -g, --gpu-layers GPU layers (default: 99)"
echo " -l, --log-file Log file name (default: llama-server.log)"
echo " -i, --image Docker image (default: ghcr.io/ggml-org/llama.cpp:full)"
echo " -h, --help Show this help message"
echo ""
echo "Environment variables (overridden by command-line arguments):"
echo " LLAMA_HOST Server host"
echo " LLAMA_PORT Server port"
echo " LLAMA_MODELS_PATH Path to model files"
echo " LLAMA_CONTEXT_SIZE Context size"
echo " LLAMA_GPU_LAYERS GPU layers"
echo " LLAMA_LOG_FILE Log file name"
echo " LLAMA_CPP_IMAGE Docker image"
echo ""
echo "Examples:"
echo " $0 -c 1024 -g 50 -l my-server.log"
echo " LLAMA_PORT=8080 $0 --context-size 2048 --port 9000"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use -h or --help for usage information."
exit 1
;;
esac
done
# Display current configuration
echo "Configuration:"
echo " Host: $HOST"
echo " Port: $SERVER_PORT"
echo " Models Path: $MODELS_PATH"
echo " Context Size: $CONTEXT_SIZE"
echo " GPU Layers: $GPU_LAYERS"
echo " Log File: $LOG_FILE"
echo ""
# List the available model families (subdirectories) in the specified directory
echo "Available model families in $MODELS_PATH:"
echo ""
# Create an array of model family directories
families=()
family_paths=()
while IFS= read -r -d '' dir; do
if [ -d "$dir" ]; then
families+=("$(basename "$dir")")
family_paths+=("$dir")
fi
done < <(find "$MODELS_PATH" -maxdepth 1 -type d ! -path "$MODELS_PATH" -print0 2>/dev/null)
# Check if any model families were found
if [ ${#families[@]} -eq 0 ]; then
echo "No model family directories found in $MODELS_PATH"
exit 1
fi
# Display numbered list of model families
for i in "${!families[@]}"; do
# Count models in this family
model_count=$(find "${family_paths[i]}" -maxdepth 1 -type f -name "*.gguf" 2>/dev/null | wc -l)
echo "$((i+1)). ${families[i]} ($model_count models)"
done
echo ""
echo "Please select a model family by entering its number (1-${#families[@]}):"
read -r family_selection
# Validate family selection
if ! [[ "$family_selection" =~ ^[0-9]+$ ]] || [ "$family_selection" -lt 1 ] || [ "$family_selection" -gt ${#families[@]} ]; then
echo "Invalid selection. Please run the script again and select a number between 1 and ${#families[@]}."
exit 1
fi
# Store the selected family path
SELECTED_FAMILY_PATH="${family_paths[$((family_selection-1))]}"
SELECTED_FAMILY_NAME="${families[$((family_selection-1))]}"
echo "Selected model family: $SELECTED_FAMILY_NAME"
echo ""
# Now list the available models in the selected family directory
echo "Available models in $SELECTED_FAMILY_NAME:"
echo ""
# Create an array of model files in the selected family
models=()
model_paths=()
while IFS= read -r -d '' file; do
models+=("$(basename "$file")")
model_paths+=("$file")
done < <(find "$SELECTED_FAMILY_PATH" -maxdepth 1 -type f -name "*.gguf" -print0 2>/dev/null)
# Check if any models were found in the selected family
if [ ${#models[@]} -eq 0 ]; then
echo "No .gguf model files found in $SELECTED_FAMILY_PATH"
exit 1
fi
# Function to format file size
format_size() {
local size=$1
if [ $size -ge 1073741824 ]; then
echo "$(echo "scale=1; $size / 1073741824" | bc)GB"
elif [ $size -ge 1048576 ]; then
echo "$(echo "scale=1; $size / 1048576" | bc)MB"
elif [ $size -ge 1024 ]; then
echo "$(echo "scale=1; $size / 1024" | bc)KB"
else
echo "${size}B"
fi
}
# Display numbered list of models with file sizes
for i in "${!models[@]}"; do
file_size=$(stat -c%s "${model_paths[i]}" 2>/dev/null || stat -f%z "${model_paths[i]}" 2>/dev/null || echo "0")
formatted_size=$(format_size $file_size)
echo "$((i+1)). ${models[i]} ($formatted_size)"
done
echo ""
echo "Please select a model by entering its number (1-${#models[@]}):"
read -r selection
# Validate selection
if ! [[ "$selection" =~ ^[0-9]+$ ]] || [ "$selection" -lt 1 ] || [ "$selection" -gt ${#models[@]} ]; then
echo "Invalid selection. Please run the script again and select a number between 1 and ${#models[@]}."
exit 1
fi
# Store the selected model name and relative path for Docker
MODEL_NAME="${models[$((selection-1))]}"
MODEL_RELATIVE_PATH="$SELECTED_FAMILY_NAME/$MODEL_NAME"
echo "Selected model: $MODEL_RELATIVE_PATH"
# Ask user for run mode preference
echo ""
echo "How would you like to run the server?"
echo "1. Foreground mode (press Ctrl+C to stop)"
echo "2. Detached mode (runs in background)"
echo ""
echo "Please select run mode (1-2):"
read -r run_mode
# Validate run mode selection
if ! [[ "$run_mode" =~ ^[0-9]+$ ]] || [ "$run_mode" -lt 1 ] || [ "$run_mode" -gt 2 ]; then
echo "Invalid selection. Please run the script again and select 1 or 2."
exit 1
fi
# This script starts the server and waits for it to be ready.
echo ""
echo "Starting the server with model: $MODEL_NAME"
echo ""
# Check if Docker daemon is running
if ! docker info >/dev/null 2>&1; then
echo "Docker daemon is not running."
echo "Attempting to start Docker daemon..."
# Try to start Docker using systemctl (most common)
if command -v systemctl >/dev/null 2>&1; then
sudo systemctl start docker
if [ $? -eq 0 ]; then
echo "Docker daemon started successfully."
# Wait a moment for Docker to fully initialize
sleep 2
else
echo "Failed to start Docker daemon using systemctl."
echo "Please start Docker manually and try again."
exit 1
fi
# Try service command as fallback
elif command -v service >/dev/null 2>&1; then
sudo service docker start
if [ $? -eq 0 ]; then
echo "Docker daemon started successfully."
sleep 2
else
echo "Failed to start Docker daemon using service command."
echo "Please start Docker manually and try again."
exit 1
fi
else
echo "Cannot automatically start Docker daemon."
echo "Please start Docker manually using one of these commands:"
echo " sudo systemctl start docker"
echo " sudo service docker start"
echo " sudo dockerd"
exit 1
fi
# Verify Docker is now running
if ! docker info >/dev/null 2>&1; then
echo "Docker daemon is still not accessible after start attempt."
echo "Please check Docker installation and permissions."
exit 1
fi
fi
# Check if container already exists
if docker ps -a --format "table {{.Names}}" | grep -q "^llama-cpp-server$"; then
echo "A container named 'llama-cpp-server' already exists."
# Check if it's running
if docker ps --format "table {{.Names}}" | grep -q "^llama-cpp-server$"; then
echo "The container is currently running."
echo "Do you want to stop and remove it to start a new one? (y/n):"
else
echo "The container is stopped."
echo "Do you want to remove it to start a new one? (y/n):"
fi
read -r remove_choice
if [[ "$remove_choice" =~ ^[Yy]$ ]]; then
echo "Stopping and removing existing container..."
docker stop llama-cpp-server 2>/dev/null || true
docker rm llama-cpp-server 2>/dev/null || true
echo "Existing container removed."
else
echo "Cannot start new container. Exiting."
exit 1
fi
fi
if [ "$run_mode" -eq 1 ]; then
# Run in foreground mode
echo "Starting server in foreground mode (press Ctrl+C to stop)..."
docker run --name llama-cpp-server --gpus all \
-v $MODELS_PATH:/models -p $SERVER_PORT:$SERVER_PORT $LLAMA_CPP_IMAGE \
-m /models/$MODEL_RELATIVE_PATH --port $SERVER_PORT --host $HOST -n $CONTEXT_SIZE -ngl $GPU_LAYERS
else
# Run in detached mode
echo "Starting server in detached mode..."
CONTAINER_ID=$(docker run -d --gpus all --name llama-cpp-server \
-v $MODELS_PATH:/models -p $SERVER_PORT:$SERVER_PORT $LLAMA_CPP_IMAGE \
-m /models/$MODEL_RELATIVE_PATH --port $SERVER_PORT --host $HOST -n $CONTEXT_SIZE -ngl $GPU_LAYERS)
echo "Server started in background with container ID: $CONTAINER_ID"
echo "Server will be available at http://localhost:$SERVER_PORT"
echo ""
echo "To check server logs: docker logs llama-cpp-server"
echo "To follow logs: docker logs -f llama-cpp-server"
echo "To stop the server: docker stop llama-cpp-server"
echo "To remove the container: docker rm llama-cpp-server"
fi
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment