Created
July 26, 2025 19:03
-
-
Save aleenprd/3b3905606653b75b43c37542181bc073 to your computer and use it in GitHub Desktop.
Start a docker server for llama.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Llama.cpp Docker Server Launcher | |
# | |
# This script can be configured using environment variables and/or command-line arguments. | |
# Command-line arguments take precedence over environment variables. | |
# | |
# Environment variables: | |
# LLAMA_HOST - Server host (default: 0.0.0.0) | |
# LLAMA_PORT - Server port (default: 8000) | |
# LLAMA_MODELS_PATH - Path to model files (default: /models) | |
# LLAMA_CONTEXT_SIZE - Context size (default: 512) | |
# LLAMA_GPU_LAYERS - GPU layers (default: 99) | |
# LLAMA_LOG_FILE - Log file name (default: llama-server.log) | |
# LLAMA_CPP_IMAGE - Docker image (default: ghcr.io/ggml-org/llama.cpp:full) | |
# | |
# Command-line arguments (override environment variables): | |
# --host - Server host | |
# -p, --port - Server port | |
# -m, --models-path - Path to model files | |
# -c, --context-size - Context size | |
# -g, --gpu-layers - GPU layers | |
# -l, --log-file - Log file name | |
# -i, --image - Docker image | |
# -h, --help - Show this help message | |
# | |
# Example usage: | |
# ./start-docker-server.sh -c 1024 -g 50 -l my-server.log | |
# LLAMA_PORT=8080 ./start-docker-server.sh --context-size 2048 --port 9000 | |
# Check if .env file exists and ask user if they want to source it | |
if [ -f ".env" ]; then | |
echo "Found .env file in current directory." | |
echo "Do you want to source the .env file to load environment variables? (y/n):" | |
read -r source_env | |
if [[ "$source_env" =~ ^[Yy]$ ]]; then | |
source .env | |
echo "Environment variables loaded from .env file." | |
else | |
echo "Skipping .env file." | |
fi | |
echo "" | |
fi | |
# Set default values from environment variables with fallbacks | |
HOST=${LLAMA_HOST:-0.0.0.0} | |
SERVER_PORT=${LLAMA_PORT:-8000} | |
MODELS_PATH=${LLAMA_MODELS_PATH:-/models} | |
CONTEXT_SIZE=${LLAMA_CONTEXT_SIZE:-512} | |
GPU_LAYERS=${LLAMA_GPU_LAYERS:-99} | |
LOG_FILE=${LLAMA_LOG_FILE:-llama-server.log} | |
LLAMA_CPP_IMAGE=${LLAMA_CPP_IMAGE:-ghcr.io/ggml-org/llama.cpp:full} | |
# Parse command-line arguments (these override environment variables) | |
while [[ $# -gt 0 ]]; do | |
case $1 in | |
--host) | |
HOST="$2" | |
shift 2 | |
;; | |
-p|--port) | |
SERVER_PORT="$2" | |
shift 2 | |
;; | |
-m|--models-path) | |
MODELS_PATH="$2" | |
shift 2 | |
;; | |
-c|--context-size) | |
CONTEXT_SIZE="$2" | |
shift 2 | |
;; | |
-g|--gpu-layers) | |
GPU_LAYERS="$2" | |
shift 2 | |
;; | |
-l|--log-file) | |
LOG_FILE="$2" | |
shift 2 | |
;; | |
-i|--image) | |
LLAMA_CPP_IMAGE="$2" | |
shift 2 | |
;; | |
-h|--help) | |
echo "Usage: $0 [OPTIONS]" | |
echo "" | |
echo "Options:" | |
echo " --host Server host (default: 0.0.0.0)" | |
echo " -p, --port Server port (default: 8000)" | |
echo " -m, --models-path Path to model files (default: /models)" | |
echo " -c, --context-size Context size (default: 512)" | |
echo " -g, --gpu-layers GPU layers (default: 99)" | |
echo " -l, --log-file Log file name (default: llama-server.log)" | |
echo " -i, --image Docker image (default: ghcr.io/ggml-org/llama.cpp:full)" | |
echo " -h, --help Show this help message" | |
echo "" | |
echo "Environment variables (overridden by command-line arguments):" | |
echo " LLAMA_HOST Server host" | |
echo " LLAMA_PORT Server port" | |
echo " LLAMA_MODELS_PATH Path to model files" | |
echo " LLAMA_CONTEXT_SIZE Context size" | |
echo " LLAMA_GPU_LAYERS GPU layers" | |
echo " LLAMA_LOG_FILE Log file name" | |
echo " LLAMA_CPP_IMAGE Docker image" | |
echo "" | |
echo "Examples:" | |
echo " $0 -c 1024 -g 50 -l my-server.log" | |
echo " LLAMA_PORT=8080 $0 --context-size 2048 --port 9000" | |
exit 0 | |
;; | |
*) | |
echo "Unknown option: $1" | |
echo "Use -h or --help for usage information." | |
exit 1 | |
;; | |
esac | |
done | |
# Display current configuration | |
echo "Configuration:" | |
echo " Host: $HOST" | |
echo " Port: $SERVER_PORT" | |
echo " Models Path: $MODELS_PATH" | |
echo " Context Size: $CONTEXT_SIZE" | |
echo " GPU Layers: $GPU_LAYERS" | |
echo " Log File: $LOG_FILE" | |
echo "" | |
# List the available model families (subdirectories) in the specified directory | |
echo "Available model families in $MODELS_PATH:" | |
echo "" | |
# Create an array of model family directories | |
families=() | |
family_paths=() | |
while IFS= read -r -d '' dir; do | |
if [ -d "$dir" ]; then | |
families+=("$(basename "$dir")") | |
family_paths+=("$dir") | |
fi | |
done < <(find "$MODELS_PATH" -maxdepth 1 -type d ! -path "$MODELS_PATH" -print0 2>/dev/null) | |
# Check if any model families were found | |
if [ ${#families[@]} -eq 0 ]; then | |
echo "No model family directories found in $MODELS_PATH" | |
exit 1 | |
fi | |
# Display numbered list of model families | |
for i in "${!families[@]}"; do | |
# Count models in this family | |
model_count=$(find "${family_paths[i]}" -maxdepth 1 -type f -name "*.gguf" 2>/dev/null | wc -l) | |
echo "$((i+1)). ${families[i]} ($model_count models)" | |
done | |
echo "" | |
echo "Please select a model family by entering its number (1-${#families[@]}):" | |
read -r family_selection | |
# Validate family selection | |
if ! [[ "$family_selection" =~ ^[0-9]+$ ]] || [ "$family_selection" -lt 1 ] || [ "$family_selection" -gt ${#families[@]} ]; then | |
echo "Invalid selection. Please run the script again and select a number between 1 and ${#families[@]}." | |
exit 1 | |
fi | |
# Store the selected family path | |
SELECTED_FAMILY_PATH="${family_paths[$((family_selection-1))]}" | |
SELECTED_FAMILY_NAME="${families[$((family_selection-1))]}" | |
echo "Selected model family: $SELECTED_FAMILY_NAME" | |
echo "" | |
# Now list the available models in the selected family directory | |
echo "Available models in $SELECTED_FAMILY_NAME:" | |
echo "" | |
# Create an array of model files in the selected family | |
models=() | |
model_paths=() | |
while IFS= read -r -d '' file; do | |
models+=("$(basename "$file")") | |
model_paths+=("$file") | |
done < <(find "$SELECTED_FAMILY_PATH" -maxdepth 1 -type f -name "*.gguf" -print0 2>/dev/null) | |
# Check if any models were found in the selected family | |
if [ ${#models[@]} -eq 0 ]; then | |
echo "No .gguf model files found in $SELECTED_FAMILY_PATH" | |
exit 1 | |
fi | |
# Function to format file size | |
format_size() { | |
local size=$1 | |
if [ $size -ge 1073741824 ]; then | |
echo "$(echo "scale=1; $size / 1073741824" | bc)GB" | |
elif [ $size -ge 1048576 ]; then | |
echo "$(echo "scale=1; $size / 1048576" | bc)MB" | |
elif [ $size -ge 1024 ]; then | |
echo "$(echo "scale=1; $size / 1024" | bc)KB" | |
else | |
echo "${size}B" | |
fi | |
} | |
# Display numbered list of models with file sizes | |
for i in "${!models[@]}"; do | |
file_size=$(stat -c%s "${model_paths[i]}" 2>/dev/null || stat -f%z "${model_paths[i]}" 2>/dev/null || echo "0") | |
formatted_size=$(format_size $file_size) | |
echo "$((i+1)). ${models[i]} ($formatted_size)" | |
done | |
echo "" | |
echo "Please select a model by entering its number (1-${#models[@]}):" | |
read -r selection | |
# Validate selection | |
if ! [[ "$selection" =~ ^[0-9]+$ ]] || [ "$selection" -lt 1 ] || [ "$selection" -gt ${#models[@]} ]; then | |
echo "Invalid selection. Please run the script again and select a number between 1 and ${#models[@]}." | |
exit 1 | |
fi | |
# Store the selected model name and relative path for Docker | |
MODEL_NAME="${models[$((selection-1))]}" | |
MODEL_RELATIVE_PATH="$SELECTED_FAMILY_NAME/$MODEL_NAME" | |
echo "Selected model: $MODEL_RELATIVE_PATH" | |
# Ask user for run mode preference | |
echo "" | |
echo "How would you like to run the server?" | |
echo "1. Foreground mode (press Ctrl+C to stop)" | |
echo "2. Detached mode (runs in background)" | |
echo "" | |
echo "Please select run mode (1-2):" | |
read -r run_mode | |
# Validate run mode selection | |
if ! [[ "$run_mode" =~ ^[0-9]+$ ]] || [ "$run_mode" -lt 1 ] || [ "$run_mode" -gt 2 ]; then | |
echo "Invalid selection. Please run the script again and select 1 or 2." | |
exit 1 | |
fi | |
# This script starts the server and waits for it to be ready. | |
echo "" | |
echo "Starting the server with model: $MODEL_NAME" | |
echo "" | |
# Check if Docker daemon is running | |
if ! docker info >/dev/null 2>&1; then | |
echo "Docker daemon is not running." | |
echo "Attempting to start Docker daemon..." | |
# Try to start Docker using systemctl (most common) | |
if command -v systemctl >/dev/null 2>&1; then | |
sudo systemctl start docker | |
if [ $? -eq 0 ]; then | |
echo "Docker daemon started successfully." | |
# Wait a moment for Docker to fully initialize | |
sleep 2 | |
else | |
echo "Failed to start Docker daemon using systemctl." | |
echo "Please start Docker manually and try again." | |
exit 1 | |
fi | |
# Try service command as fallback | |
elif command -v service >/dev/null 2>&1; then | |
sudo service docker start | |
if [ $? -eq 0 ]; then | |
echo "Docker daemon started successfully." | |
sleep 2 | |
else | |
echo "Failed to start Docker daemon using service command." | |
echo "Please start Docker manually and try again." | |
exit 1 | |
fi | |
else | |
echo "Cannot automatically start Docker daemon." | |
echo "Please start Docker manually using one of these commands:" | |
echo " sudo systemctl start docker" | |
echo " sudo service docker start" | |
echo " sudo dockerd" | |
exit 1 | |
fi | |
# Verify Docker is now running | |
if ! docker info >/dev/null 2>&1; then | |
echo "Docker daemon is still not accessible after start attempt." | |
echo "Please check Docker installation and permissions." | |
exit 1 | |
fi | |
fi | |
# Check if container already exists | |
if docker ps -a --format "table {{.Names}}" | grep -q "^llama-cpp-server$"; then | |
echo "A container named 'llama-cpp-server' already exists." | |
# Check if it's running | |
if docker ps --format "table {{.Names}}" | grep -q "^llama-cpp-server$"; then | |
echo "The container is currently running." | |
echo "Do you want to stop and remove it to start a new one? (y/n):" | |
else | |
echo "The container is stopped." | |
echo "Do you want to remove it to start a new one? (y/n):" | |
fi | |
read -r remove_choice | |
if [[ "$remove_choice" =~ ^[Yy]$ ]]; then | |
echo "Stopping and removing existing container..." | |
docker stop llama-cpp-server 2>/dev/null || true | |
docker rm llama-cpp-server 2>/dev/null || true | |
echo "Existing container removed." | |
else | |
echo "Cannot start new container. Exiting." | |
exit 1 | |
fi | |
fi | |
if [ "$run_mode" -eq 1 ]; then | |
# Run in foreground mode | |
echo "Starting server in foreground mode (press Ctrl+C to stop)..." | |
docker run --name llama-cpp-server --gpus all \ | |
-v $MODELS_PATH:/models -p $SERVER_PORT:$SERVER_PORT $LLAMA_CPP_IMAGE \ | |
-m /models/$MODEL_RELATIVE_PATH --port $SERVER_PORT --host $HOST -n $CONTEXT_SIZE -ngl $GPU_LAYERS | |
else | |
# Run in detached mode | |
echo "Starting server in detached mode..." | |
CONTAINER_ID=$(docker run -d --gpus all --name llama-cpp-server \ | |
-v $MODELS_PATH:/models -p $SERVER_PORT:$SERVER_PORT $LLAMA_CPP_IMAGE \ | |
-m /models/$MODEL_RELATIVE_PATH --port $SERVER_PORT --host $HOST -n $CONTEXT_SIZE -ngl $GPU_LAYERS) | |
echo "Server started in background with container ID: $CONTAINER_ID" | |
echo "Server will be available at http://localhost:$SERVER_PORT" | |
echo "" | |
echo "To check server logs: docker logs llama-cpp-server" | |
echo "To follow logs: docker logs -f llama-cpp-server" | |
echo "To stop the server: docker stop llama-cpp-server" | |
echo "To remove the container: docker rm llama-cpp-server" | |
fi | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment