Last active
February 5, 2025 21:19
-
-
Save grahama1970/5d5b1f23ca5e47a9e71f7d1503f79e2f to your computer and use it in GitHub Desktop.
Trying to get Qwen2.5-VL-7B to work with CUDA 12.8.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| services: | |
| # --------------------------- | |
| # SGLang Service | |
| # --------------------------- | |
| sglang-service: | |
| # image: lmsysorg/sglang:latest | |
| build: | |
| context: . | |
| dockerfile: Dockerfile_v2.sglang | |
| container_name: sglang-service | |
| volumes: | |
| - ~/.cache/huggingface:/root/.cache/huggingface | |
| restart: always | |
| environment: | |
| - HF_TOKEN=${HF_TOKEN} | |
| - HF_HOME=/root/.cache/huggingface | |
| command: > | |
| bash -c "pip install git+https://github.com/huggingface/transformers.git && | |
| python3 -m sglang.launch_server | |
| --model-path Qwen/Qwen2.5-VL-7B-Instruct | |
| --host 0.0.0.0 | |
| --port 8000 | |
| --trust-remote-code" | |
| ulimits: | |
| memlock: -1 | |
| stack: 67108864 | |
| ipc: host | |
| healthcheck: | |
| test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"] | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| device_ids: ['0'] | |
| capabilities: [gpu] | |
| networks: | |
| - sglang-network | |
| # ---------------------------------------- | |
| # FastAPI Proxy Service | |
| # ---------------------------------------- | |
| fastapi-proxy: | |
| build: | |
| context: ./fastapi-proxy | |
| dockerfile: Dockerfile | |
| ports: | |
| - "8000:8000" | |
| environment: | |
| - SGLANG_ENDPOINT=http://sglang-service:8000 | |
| - CODE_EXEC_ENDPOINT=http://code-exec:8001 | |
| - HF_HOME=/root/.cache/huggingface | |
| volumes: | |
| - ~/.cache/huggingface:/root/.cache/huggingface | |
| depends_on: | |
| - sglang-service | |
| networks: | |
| - sglang-network | |
| # -------------------------------- | |
| # OpenWebUI - Front-End for Models | |
| # -------------------------------- | |
| openwebui: | |
| container_name: openwebui | |
| image: ghcr.io/open-webui/open-webui:latest | |
| depends_on: | |
| - sglang-service | |
| ports: | |
| - "3000:8080" | |
| volumes: | |
| - webui-data:/app/backend/data | |
| environment: | |
| - OPENAI_API_BASE_URL=http://sglang-service:8000/v1 | |
| - OPENAI_API_KEY=EMPTY | |
| restart: always | |
| networks: | |
| - sglang-network | |
| # ---------------------------------------- | |
| # ArangoDB - Database | |
| # ---------------------------------------- | |
| arangodb: | |
| container_name: arangodb | |
| image: arangodb:latest | |
| command: [ | |
| "--server.storage-engine=rocksdb", | |
| "--server.export-metrics-api=true", | |
| "--experimental-vector-index=true", | |
| "--server.descriptors-minimum=1024", | |
| "--rocksdb.total-write-buffer-size=1G", | |
| "--cache.size=2G", | |
| "--server.session-timeout=7200" | |
| ] | |
| environment: | |
| - ARANGO_ROOT_PASSWORD=${ARANGO_ROOT_PASSWORD:?ARANGO_ROOT_PASSWORD is required} | |
| volumes: | |
| - ./arangodb_data:/var/lib/arangodb3 | |
| ports: | |
| - "8529:8529" | |
| restart: always | |
| pull_policy: always | |
| networks: | |
| - sglang-network | |
| # ---------------------------------------- | |
| # Redis - Caching and Message Broker | |
| # ---------------------------------------- | |
| redis: | |
| container_name: redis | |
| image: redis:alpine | |
| ports: | |
| - "6379:6379" | |
| volumes: | |
| - redis_data:/data | |
| command: redis-server | |
| healthcheck: | |
| test: ["CMD", "redis-cli", "ping"] | |
| interval: 10s | |
| timeout: 5s | |
| retries: 5 | |
| restart: always | |
| networks: | |
| - sglang-network | |
| networks: | |
| sglang-network: | |
| driver: bridge | |
| volumes: | |
| webui-data: {} | |
| redis_data: {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ARG CUDA_VERSION=12.8.0 | |
| # 🔥 Switch from Ubuntu 20.04 to Ubuntu 22.04, which has Python 3.10 | |
| FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 | |
| ARG BUILD_TYPE=all | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| # Set timezone and install system dependencies | |
| RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ | |
| && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ | |
| && apt update -y \ | |
| && apt install -y \ | |
| software-properties-common \ | |
| python3.10 python3.10-dev python3.10-distutils \ | |
| curl git sudo libibverbs-dev \ | |
| && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \ | |
| && curl -fsSL https://bootstrap.pypa.io/get-pip.py | python3 \ | |
| && python3 -m pip install --upgrade pip setuptools wheel \ | |
| && rm -rf /var/lib/apt/lists/* && apt clean | |
| # Install additional dependencies | |
| RUN pip3 install datamodel_code_generator | |
| # Set up workspace | |
| WORKDIR /sgl-workspace | |
| # Clone the sglang repository | |
| RUN git clone --depth=1 https://github.com/sgl-project/sglang.git | |
| # Copy install_pytorch.py from the local project into the container | |
| COPY install_pytorch.py /sgl-workspace/install_pytorch.py | |
| # Set permissions | |
| RUN chmod +x /sgl-workspace/install_pytorch.py | |
| # Run PyTorch installation | |
| RUN python3 /sgl-workspace/install_pytorch.py "$CUDA_VERSION" | |
| # Set up workspace | |
| WORKDIR /sgl-workspace | |
| ARG CUDA_VERSION | |
| # Clone sglang repository | |
| RUN git clone --depth=1 https://github.com/sgl-project/sglang.git | |
| # Set environment variables to ensure correct Python paths | |
| ENV PYTHONPATH="/sgl-workspace/sglang:$PYTHONPATH" | |
| # Debugging: Print Python path and check if sglang is installed | |
| RUN python3 -c "import sys; print(sys.path)" \ | |
| && python3 -c "import os; print(os.environ.get('PYTHONPATH'))" | |
| # Install PyTorch based on CUDA version | |
| RUN python3 /sgl-workspace/sglang/install_pytorch.py "$CUDA_VERSION" | |
| # Install sglang dependencies based on BUILD_TYPE and CUDA version | |
| WORKDIR /sgl-workspace/sglang | |
| COPY install_sglang.sh /sgl-workspace/sglang/install_sglang.sh | |
| RUN chmod +x install_sglang.sh && ./install_sglang.sh "$BUILD_TYPE" "$CUDA_VERSION" | |
| # Install sglang manually to ensure it's in the correct environment | |
| RUN python3 -m pip install -e . | |
| # Verify installation | |
| RUN python3 -c "import sglang" || echo 'SGLang import failed!' | |
| # Clean up pip cache | |
| RUN python3 -m pip cache purge | |
| # Copy entrypoint script and set correct permissions | |
| COPY entrypoint.sh /entrypoint.sh | |
| RUN chmod +x /entrypoint.sh | |
| # Set entrypoint to ensure dependencies are installed before launching the server | |
| ENTRYPOINT ["/entrypoint.sh"] | |
| # Set interactive mode | |
| ENV DEBIAN_FRONTEND=interactive |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -e # Exit immediately if any command fails | |
| echo "Starting container setup..." | |
| # Ensure latest Hugging Face transformers are installed | |
| pip install --no-cache-dir git+https://github.com/huggingface/transformers.git | |
| # Debugging: Check installed Python packages | |
| pip list | grep sglang || echo "SGlang is missing!" | |
| # Launch server | |
| echo "Launching SGLang Server..." | |
| exec python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct --host 0.0.0.0 --port 8000 --trust-remote-code |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import sys | |
| import os | |
| CUDA_VERSION = sys.argv[1] | |
| PYTORCH_INSTALLS = { | |
| "12.8.0": "https://download.pytorch.org/whl/nightly/cu128", | |
| "12.5.1": "https://download.pytorch.org/whl/cu124", | |
| "12.4.1": "https://download.pytorch.org/whl/cu124", | |
| "12.1.1": "https://download.pytorch.org/whl/cu121", | |
| "11.8.0": "https://download.pytorch.org/whl/cu118", | |
| } | |
| if CUDA_VERSION in PYTORCH_INSTALLS: | |
| os.system(f"python3 -m pip install torch --index-url {PYTORCH_INSTALLS[CUDA_VERSION]}") | |
| if CUDA_VERSION == "11.8.0": | |
| os.system("python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118") | |
| else: | |
| print(f"Unsupported CUDA version: {CUDA_VERSION}", file=sys.stderr) | |
| sys.exit(1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| BUILD_TYPE=$1 | |
| CUDA_VERSION=$2 | |
| echo "Installing SGLang for BUILD_TYPE=${BUILD_TYPE} and CUDA_VERSION=${CUDA_VERSION}..." | |
| case "$CUDA_VERSION" in | |
| "12.8.0") | |
| python3 -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 | |
| ;; | |
| "12.5.1" | "12.4.1") | |
| python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu124/torch2.4/flashinfer/ | |
| ;; | |
| "12.1.1") | |
| python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu121/torch2.4/flashinfer/ | |
| ;; | |
| "11.8.0") | |
| python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu118/torch2.4/flashinfer/ | |
| python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118 | |
| ;; | |
| *) | |
| echo "Unsupported CUDA version: ${CUDA_VERSION}" && exit 1 | |
| ;; | |
| esac |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fastapi | |
| pydantic | |
| huggingface-hub | |
| aiohttp | |
| python-dotenv | |
| loguru |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 2025-02-04 21:10:36 Traceback (most recent call last): | |
| 2025-02-04 21:10:36 File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main | |
| 2025-02-04 21:10:36 return _run_code(code, main_globals, None, | |
| 2025-02-04 21:10:36 File "/usr/lib/python3.10/runpy.py", line 86, in _run_code | |
| 2025-02-04 21:10:36 exec(code, run_globals) | |
| 2025-02-04 21:10:36 File "/sgl-workspace/sglang/python/sglang/launch_server.py", line 6, in <module> | |
| 2025-02-04 21:10:36 from sglang.srt.entrypoints.http_server import launch_server | |
| 2025-02-04 21:10:36 File "/sgl-workspace/sglang/python/sglang/srt/entrypoints/http_server.py", line 35, in <module> | |
| 2025-02-04 21:10:36 import uvicorn | |
| 2025-02-04 21:10:36 ModuleNotFoundError: No module named 'uvicorn' | |
| 2025-02-04 21:10:23 Building wheel for transformers (pyproject.toml): finished with status 'done' | |
| 2025-02-04 21:10:23 Created wheel for transformers: filename=transformers-4.49.0.dev0-py3-none-any.whl size=10676269 sha256=6512761fc4b2e7df6e7e80e3d2543a93e16bb0ae34468e27a489def764ca895d | |
| 2025-02-04 21:10:23 Stored in directory: /tmp/pip-ephem-wheel-cache-2hds0m25/wheels/e7/9c/5b/e1a9c8007c343041e61cc484433d512ea9274272e3fcbe7c16 | |
| 2025-02-04 21:10:23 Successfully built transformers | |
| 2025-02-04 21:10:24 Installing collected packages: safetensors, regex, huggingface-hub, tokenizers, transformers | |
| 2025-02-04 21:10:34 Successfully installed huggingface-hub-0.28.1 regex-2024.11.6 safetensors-0.5.2 tokenizers-0.21.0 transformers-4.49.0.dev0 | |
| 2025-02-04 21:10:35 sglang 0.4.2.post1 | |
| 2025-02-04 21:10:35 Launching SGLang Server... | |
| 2025-02-04 21:10:39 Starting container setup... | |
| 2025-02-04 21:10:41 Collecting git+https://github.com/huggingface/transformers.git | |
| 2025-02-04 21:10:41 Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-c5l4uaha | |
| 2025-02-04 21:10:41 Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-c5l4uaha | |
| 2025-02-04 21:10:51 Resolved https://github.com/huggingface/transformers.git to commit fa56dcc2ab748a2d98218b4918742e25454ef0d2 | |
| 2025-02-04 21:10:51 Installing build dependencies: started |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment