Skip to content

Instantly share code, notes, and snippets.

@grahama1970
Last active February 5, 2025 21:19
Show Gist options
  • Select an option

  • Save grahama1970/5d5b1f23ca5e47a9e71f7d1503f79e2f to your computer and use it in GitHub Desktop.

Select an option

Save grahama1970/5d5b1f23ca5e47a9e71f7d1503f79e2f to your computer and use it in GitHub Desktop.
Trying to get Qwen2.5-VL-7B to work with CUDA 12.8.
services:
# ---------------------------
# SGLang Service
# ---------------------------
sglang-service:
# image: lmsysorg/sglang:latest
build:
context: .
dockerfile: Dockerfile_v2.sglang
container_name: sglang-service
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
restart: always
environment:
- HF_TOKEN=${HF_TOKEN}
- HF_HOME=/root/.cache/huggingface
command: >
bash -c "pip install git+https://github.com/huggingface/transformers.git &&
python3 -m sglang.launch_server
--model-path Qwen/Qwen2.5-VL-7B-Instruct
--host 0.0.0.0
--port 8000
--trust-remote-code"
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
networks:
- sglang-network
# ----------------------------------------
# FastAPI Proxy Service
# ----------------------------------------
fastapi-proxy:
build:
context: ./fastapi-proxy
dockerfile: Dockerfile
ports:
- "8000:8000"
environment:
- SGLANG_ENDPOINT=http://sglang-service:8000
- CODE_EXEC_ENDPOINT=http://code-exec:8001
- HF_HOME=/root/.cache/huggingface
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
depends_on:
- sglang-service
networks:
- sglang-network
# --------------------------------
# OpenWebUI - Front-End for Models
# --------------------------------
openwebui:
container_name: openwebui
image: ghcr.io/open-webui/open-webui:latest
depends_on:
- sglang-service
ports:
- "3000:8080"
volumes:
- webui-data:/app/backend/data
environment:
- OPENAI_API_BASE_URL=http://sglang-service:8000/v1
- OPENAI_API_KEY=EMPTY
restart: always
networks:
- sglang-network
# ----------------------------------------
# ArangoDB - Database
# ----------------------------------------
arangodb:
container_name: arangodb
image: arangodb:latest
command: [
"--server.storage-engine=rocksdb",
"--server.export-metrics-api=true",
"--experimental-vector-index=true",
"--server.descriptors-minimum=1024",
"--rocksdb.total-write-buffer-size=1G",
"--cache.size=2G",
"--server.session-timeout=7200"
]
environment:
- ARANGO_ROOT_PASSWORD=${ARANGO_ROOT_PASSWORD:?ARANGO_ROOT_PASSWORD is required}
volumes:
- ./arangodb_data:/var/lib/arangodb3
ports:
- "8529:8529"
restart: always
pull_policy: always
networks:
- sglang-network
# ----------------------------------------
# Redis - Caching and Message Broker
# ----------------------------------------
redis:
container_name: redis
image: redis:alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
command: redis-server
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
restart: always
networks:
- sglang-network
networks:
sglang-network:
driver: bridge
volumes:
webui-data: {}
redis_data: {}
ARG CUDA_VERSION=12.8.0
# 🔥 Switch from Ubuntu 20.04 to Ubuntu 22.04, which has Python 3.10
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
ARG BUILD_TYPE=all
ENV DEBIAN_FRONTEND=noninteractive
# Set timezone and install system dependencies
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
&& apt update -y \
&& apt install -y \
software-properties-common \
python3.10 python3.10-dev python3.10-distutils \
curl git sudo libibverbs-dev \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
&& curl -fsSL https://bootstrap.pypa.io/get-pip.py | python3 \
&& python3 -m pip install --upgrade pip setuptools wheel \
&& rm -rf /var/lib/apt/lists/* && apt clean
# Install additional dependencies
RUN pip3 install datamodel_code_generator
# Set up workspace
WORKDIR /sgl-workspace
# Clone the sglang repository
RUN git clone --depth=1 https://github.com/sgl-project/sglang.git
# Copy install_pytorch.py from the local project into the container
COPY install_pytorch.py /sgl-workspace/install_pytorch.py
# Set permissions
RUN chmod +x /sgl-workspace/install_pytorch.py
# Run PyTorch installation
RUN python3 /sgl-workspace/install_pytorch.py "$CUDA_VERSION"
# Set up workspace
WORKDIR /sgl-workspace
ARG CUDA_VERSION
# Clone sglang repository
RUN git clone --depth=1 https://github.com/sgl-project/sglang.git
# Set environment variables to ensure correct Python paths
ENV PYTHONPATH="/sgl-workspace/sglang:$PYTHONPATH"
# Debugging: Print Python path and check if sglang is installed
RUN python3 -c "import sys; print(sys.path)" \
&& python3 -c "import os; print(os.environ.get('PYTHONPATH'))"
# Install PyTorch based on CUDA version
RUN python3 /sgl-workspace/sglang/install_pytorch.py "$CUDA_VERSION"
# Install sglang dependencies based on BUILD_TYPE and CUDA version
WORKDIR /sgl-workspace/sglang
COPY install_sglang.sh /sgl-workspace/sglang/install_sglang.sh
RUN chmod +x install_sglang.sh && ./install_sglang.sh "$BUILD_TYPE" "$CUDA_VERSION"
# Install sglang manually to ensure it's in the correct environment
RUN python3 -m pip install -e .
# Verify installation
RUN python3 -c "import sglang" || echo 'SGLang import failed!'
# Clean up pip cache
RUN python3 -m pip cache purge
# Copy entrypoint script and set correct permissions
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Set entrypoint to ensure dependencies are installed before launching the server
ENTRYPOINT ["/entrypoint.sh"]
# Set interactive mode
ENV DEBIAN_FRONTEND=interactive
#!/bin/bash
set -e # Exit immediately if any command fails
echo "Starting container setup..."
# Ensure latest Hugging Face transformers are installed
pip install --no-cache-dir git+https://github.com/huggingface/transformers.git
# Debugging: Check installed Python packages
pip list | grep sglang || echo "SGlang is missing!"
# Launch server
echo "Launching SGLang Server..."
exec python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct --host 0.0.0.0 --port 8000 --trust-remote-code
#!/usr/bin/env python3
import sys
import os
CUDA_VERSION = sys.argv[1]
PYTORCH_INSTALLS = {
"12.8.0": "https://download.pytorch.org/whl/nightly/cu128",
"12.5.1": "https://download.pytorch.org/whl/cu124",
"12.4.1": "https://download.pytorch.org/whl/cu124",
"12.1.1": "https://download.pytorch.org/whl/cu121",
"11.8.0": "https://download.pytorch.org/whl/cu118",
}
if CUDA_VERSION in PYTORCH_INSTALLS:
os.system(f"python3 -m pip install torch --index-url {PYTORCH_INSTALLS[CUDA_VERSION]}")
if CUDA_VERSION == "11.8.0":
os.system("python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118")
else:
print(f"Unsupported CUDA version: {CUDA_VERSION}", file=sys.stderr)
sys.exit(1)
#!/bin/bash
BUILD_TYPE=$1
CUDA_VERSION=$2
echo "Installing SGLang for BUILD_TYPE=${BUILD_TYPE} and CUDA_VERSION=${CUDA_VERSION}..."
case "$CUDA_VERSION" in
"12.8.0")
python3 -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128
;;
"12.5.1" | "12.4.1")
python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/
;;
"12.1.1")
python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu121/torch2.4/flashinfer/
;;
"11.8.0")
python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu118/torch2.4/flashinfer/
python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118
;;
*)
echo "Unsupported CUDA version: ${CUDA_VERSION}" && exit 1
;;
esac
fastapi
pydantic
huggingface-hub
aiohttp
python-dotenv
loguru
2025-02-04 21:10:36 Traceback (most recent call last):
2025-02-04 21:10:36 File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
2025-02-04 21:10:36 return _run_code(code, main_globals, None,
2025-02-04 21:10:36 File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
2025-02-04 21:10:36 exec(code, run_globals)
2025-02-04 21:10:36 File "/sgl-workspace/sglang/python/sglang/launch_server.py", line 6, in <module>
2025-02-04 21:10:36 from sglang.srt.entrypoints.http_server import launch_server
2025-02-04 21:10:36 File "/sgl-workspace/sglang/python/sglang/srt/entrypoints/http_server.py", line 35, in <module>
2025-02-04 21:10:36 import uvicorn
2025-02-04 21:10:36 ModuleNotFoundError: No module named 'uvicorn'
2025-02-04 21:10:23 Building wheel for transformers (pyproject.toml): finished with status 'done'
2025-02-04 21:10:23 Created wheel for transformers: filename=transformers-4.49.0.dev0-py3-none-any.whl size=10676269 sha256=6512761fc4b2e7df6e7e80e3d2543a93e16bb0ae34468e27a489def764ca895d
2025-02-04 21:10:23 Stored in directory: /tmp/pip-ephem-wheel-cache-2hds0m25/wheels/e7/9c/5b/e1a9c8007c343041e61cc484433d512ea9274272e3fcbe7c16
2025-02-04 21:10:23 Successfully built transformers
2025-02-04 21:10:24 Installing collected packages: safetensors, regex, huggingface-hub, tokenizers, transformers
2025-02-04 21:10:34 Successfully installed huggingface-hub-0.28.1 regex-2024.11.6 safetensors-0.5.2 tokenizers-0.21.0 transformers-4.49.0.dev0
2025-02-04 21:10:35 sglang 0.4.2.post1
2025-02-04 21:10:35 Launching SGLang Server...
2025-02-04 21:10:39 Starting container setup...
2025-02-04 21:10:41 Collecting git+https://github.com/huggingface/transformers.git
2025-02-04 21:10:41 Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-c5l4uaha
2025-02-04 21:10:41 Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-c5l4uaha
2025-02-04 21:10:51 Resolved https://github.com/huggingface/transformers.git to commit fa56dcc2ab748a2d98218b4918742e25454ef0d2
2025-02-04 21:10:51 Installing build dependencies: started
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment