Note: While it definitely works with RayCluster, using RayJob is much easier.
FROM vllm/vllm-openai:v0.5.2
RUN apt update && apt install -y wget # important for future healthcheck
RUN pip3 install ray[default] # important for future healthcheck
``` | |
apiVersion: orchestration.aibrix.ai/v1alpha1 | |
kind: RayClusterFleet | |
metadata: | |
labels: | |
app.kubernetes.io/name: aibrix | |
model.aibrix.ai/name: deepseek-r1-671b | |
name: deepseek-r1-671b | |
spec: | |
replicas: 1 |
``` | |
python3 -m vllm.entrypoints.openai.api_server --host "0.0.0.0" --port "8000" --model deepseek-ai/deepseek-coder-6.7b-instruct --max-model-len 32768 | |
``` | |
``` | |
git clone https://github.com/vllm-project/vllm.git | |
``` | |
``` | |
python3 benchmark_serving.py --backend openai --base-url http://127.0.0.1:8000 --dataset-name=random --model deepseek-ai/deepseek-coder-6.7b-instruct --seed 12345 |
Note: While it definitely works with RayCluster, using RayJob is much easier.
FROM vllm/vllm-openai:v0.5.2
RUN apt update && apt install -y wget # important for future healthcheck
RUN pip3 install ray[default] # important for future healthcheck
# Format | |
# If line starts with a '#' it is considered a comment | |
# DCGM FIELD, Prometheus metric type, help message | |
# Clocks | |
DCGM_FI_DEV_SM_CLOCK, gauge, SM clock frequency (in MHz). | |
DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz). | |
# Temperature | |
DCGM_FI_DEV_MEMORY_TEMP, gauge, Memory temperature (in C). |
ray-system
namespace. if not, kubectl create ns ray-systemnodeip:nodeport/kuberay/workspace
.Note: operator image and jupyter image can be used directly. I upload to my personal dockerhub. I will try to finish OSS process soon.
import os | |
import ray | |
import raydp | |
HEAD_SERVICE_IP_ENV = "EXAMPLE_CLUSTER_RAY_HEAD_SERVICE_HOST" | |
head_service_ip = os.environ[HEAD_SERVICE_IP_ENV] | |
ray.init(address=f"{head_service_ip}:6379") |
import os | |
import ray | |
from xgboost_ray import RayDMatrix, RayParams, train | |
from sklearn.datasets import load_breast_cancer | |
ray.init(address="auto") | |
train_x, train_y = load_breast_cancer(return_X_y=True) | |
train_set = RayDMatrix(train_x, train_y) |
import os | |
import ray | |
import raydp | |
HEAD_SERVICE_IP_ENV = "EXAMPLE_CLUSTER_RAY_HEAD_SERVICE_HOST" | |
HEAD_SERVICE_CLIENT_PORT_ENV = "EXAMPLE_CLUSTER_RAY_HEAD_SERVICE_PORT_CLIENT" | |
head_service_ip = os.environ[HEAD_SERVICE_IP_ENV] | |
client_port = os.environ[HEAD_SERVICE_CLIENT_PORT_ENV] |
import argparse | |
import tensorflow as tf | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Dense | |
import numpy as np | |
import os | |
import ray | |
from ray import tune | |
from ray.util.sgd.tf.tf_trainer import TFTrainer, TFTrainable |