Skip to content

Instantly share code, notes, and snippets.

@Jeffwan
Created March 3, 2025 05:32
Show Gist options
  • Save Jeffwan/31c2dcb2b6c2d7246047465a7e813da0 to your computer and use it in GitHub Desktop.
Save Jeffwan/31c2dcb2b6c2d7246047465a7e813da0 to your computer and use it in GitHub Desktop.
deepseek-671b-multi-host
```
apiVersion: orchestration.aibrix.ai/v1alpha1
kind: RayClusterFleet
metadata:
labels:
app.kubernetes.io/name: aibrix
model.aibrix.ai/name: deepseek-r1-671b
name: deepseek-r1-671b
spec:
replicas: 1
selector:
matchLabels:
model.aibrix.ai/name: deepseek-r1-671b
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
labels:
model.aibrix.ai/name: deepseek-r1-671b
annotations:
ray.io/overwrite-container-cmd: "true"
spec:
rayVersion: '2.10.0'
headGroupSpec:
rayStartParams:
dashboard-host: '0.0.0.0'
block: 'false'
template:
metadata:
labels:
model.aibrix.ai/name: deepseek-r1-671b
annotations:
k8s.volcengine.com/pod-networks: |
[
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
}
]
spec:
containers:
- name: ray-head
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.7.3.self.post1
ports:
- containerPort: 6379
name: gcs-server
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
- containerPort: 8000
name: service
command: ["/bin/bash", "-lc", "--"]
args: ["ulimit -n 65536; echo head; $KUBERAY_GEN_RAY_START_CMD; GLOO_SOCKET_IFNAME=eth0 NCCL_SOCKET_IFNAME=eth0 NCCL_IB_DISABLE=0 NCCL_IB_HCA=mlx5_ vllm serve /models/deepseek --trust-remote-code --served-model-name deepseek-r1-671b --tensor-parallel-size 16 --distributed-executor-backend ray --uvicorn-log-level warning"]
env:
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_IB_HCA
value: mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1
resources:
limits:
nvidia.com/gpu: 8
vke.volcengine.com/rdma: "8"
requests:
nvidia.com/gpu: 8
vke.volcengine.com/rdma: "8"
securityContext:
capabilities:
add:
- IPC_LOCK
startupProbe:
httpGet:
path: /metrics
port: service
initialDelaySeconds: 180
failureThreshold: 150
periodSeconds: 10
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /models/deepseek
name: models
- mountPath: /dev/shm
name: shared-mem
volumes:
- name: models
persistentVolumeClaim:
claimName: deepseekr1
- name: shared-mem
emptyDir:
medium: Memory
workerGroupSpecs:
- replicas: 1
minReplicas: 1
maxReplicas: 1
groupName: worker-group
rayStartParams: {}
template:
metadata:
labels:
model.aibrix.ai/name: deepseek-r1-671b
annotations:
k8s.volcengine.com/pod-networks: |
[
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
},
{
"cniConf":{
"name":"rdma"
}
}
]
spec:
containers:
- name: ray-worker
image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.7.3.self.post1
command: ["/bin/bash", "-lc", "--"]
args: ["ulimit -n 65536; echo head; $KUBERAY_GEN_RAY_START_CMD;"]
env:
- name: GLOO_SOCKET_IFNAME
value: eth0
- name: NCCL_SOCKET_IFNAME
value: eth0
- name: NCCL_IB_DISABLE
value: "0"
- name: NCCL_IB_HCA
value: mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1
lifecycle:
preStop:
exec:
command: [ "/bin/sh","-c","ray stop" ]
resources:
limits:
nvidia.com/gpu: 8
vke.volcengine.com/rdma: "8"
requests:
nvidia.com/gpu: 8
vke.volcengine.com/rdma: "8"
securityContext:
capabilities:
add:
- IPC_LOCK
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /models/deepseek
name: models
- mountPath: /dev/shm
name: shared-mem
volumes:
- name: models
persistentVolumeClaim:
claimName: deepseekr1
- emptyDir:
medium: Memory
name: shared-mem
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment