luong-komorebi · August 1, 2025 07:33
diff --git a/docker-compose.yml b/docker-compose.yml
 version: '3.8'

 services:
  vllm:
    image: vllm/vllm-openai:latest
    container_name: deepseek-coder-vllm
    restart: unless-stopped
    expose:
      - "8000"
    volumes:
      - /root/.cache/huggingface:/root/.cache/huggingface
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    command: >
      --model deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct
      --port 8000
      --host 0.0.0.0
      --trust-remote-code
      --tensor-parallel-size 1
      --gpu-memory-utilization 0.85
      --max-model-len 16384
      --disable-log-requests
      --enable-auto-tool-choice
      --tool-call-parser hermes
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 300s

  nginx:
    image: nginx:alpine
    container_name: deepseek-coder-nginx
    restart: unless-stopped
    ports:
      - "80:80"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
    depends_on:
      vllm:
        condition: service_healthy
    healthcheck:
      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
      interval: 30s
      timeout: 10s
      retries: 3
	version: '3.8'

	services:
	vllm:
	image: vllm/vllm-openai:latest
	container_name: deepseek-coder-vllm
	restart: unless-stopped
	expose:
	- "8000"
	volumes:
	- /root/.cache/huggingface:/root/.cache/huggingface
	environment:
	- CUDA_VISIBLE_DEVICES=0
	- PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: 1
	capabilities: [gpu]
	command: >
	--model deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct
	--port 8000
	--host 0.0.0.0
	--trust-remote-code
	--tensor-parallel-size 1
	--gpu-memory-utilization 0.85
	--max-model-len 16384
	--disable-log-requests
	--enable-auto-tool-choice
	--tool-call-parser hermes
	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
	interval: 30s
	timeout: 10s
	retries: 3
	start_period: 300s

	nginx:
	image: nginx:alpine
	container_name: deepseek-coder-nginx
	restart: unless-stopped
	ports:
	- "80:80"
	volumes:
	- ./nginx.conf:/etc/nginx/nginx.conf:ro
	depends_on:
	vllm:
	condition: service_healthy
	healthcheck:
	test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
	interval: 30s
	timeout: 10s
	retries: 3
No results found