Last active
August 7, 2025 14:27
-
-
Save bs10081/4ac953264a5ae4ac63f72af299fa98ee to your computer and use it in GitHub Desktop.
一鍵檢查、建構環境與啟動 vLLM(openai/gpt-oss-20b)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# start_vllm_gpt-oss-20b.sh | |
# 一鍵檢查、建構環境與啟動 vLLM(openai/gpt-oss-20b)。 | |
# 需求:Ubuntu 24.04、CUDA 12.6、NVIDIA Driver、Python 3.10+ | |
# 會自動: | |
# - 檢查/安裝缺的系統開發套件(含 math.h) | |
# - 檢查/安裝 Python 3.12(若不存在) | |
# - 安裝 uv(若不存在,使用 pip) | |
# - 創建虛擬環境(若不存在,使用 uv venv) | |
# - 安裝 vLLM 及其依賴(特定版本與 index-url) | |
# - 設定 GCC/G++ 12 當主機編譯器以相容 nvcc | |
# - 設定 CUDA 路徑(優先 /usr/local/cuda-12.6) | |
# - 清理 FlashInfer JIT 快取(避免舊錯誤殘留) | |
# - 啟動 vLLM OpenAI 風格 API | |
set -euo pipefail | |
# ---------- 預設參數 ---------- | |
HOST="0.0.0.0" | |
PORT="8000" | |
MODEL="openai/gpt-oss-20b" | |
VENV_DEFAULT="$HOME/vllm/.venv" | |
VENV="${VENV_DEFAULT}" | |
QUANT="mxfp4" # 可改:none / mxfp4 / awq / bitsandbytes(依 vLLM 支援) | |
GPU_ARCH="auto" # auto -> H100 預設 9.0;或手動指定如 "9.0" | |
AUTO_INSTALL="yes" # yes -> 需要就 sudo apt 安裝;no -> 只檢查不安裝 | |
EXTRA_ARGS=() # 傳給 vllm serve 的其他參數 | |
usage() { | |
cat <<EOF | |
用法:$(basename "$0") [選項] | |
選項: | |
--host HOST 監聽位址(預設 ${HOST}) | |
--port PORT 監聽連接埠(預設 ${PORT}) | |
--model NAME 模型名稱或路徑(預設 ${MODEL}) | |
--venv PATH Python 虛擬環境路徑(預設 ${VENV_DEFAULT}) | |
--quant MODE 量化模式(mxfp4|none,預設 ${QUANT}) | |
--gpu-arch VAL 指定 TORCH_CUDA_ARCH_LIST,如 9.0;auto 則依 GPU 判斷 | |
--no-sudo 僅檢查,不自動安裝缺失套件 | |
-- 後續所有參數直接傳給 \`vllm serve\` | |
例: | |
$(basename "$0") --host 0.0.0.0 --port 8000 --model openai/gpt-oss-20b | |
$(basename "$0") --quant none -- --max-model-len 131072 --max-num-seqs 8 | |
EOF | |
} | |
# ---------- 解析參數 ---------- | |
while [[ $# -gt 0 ]]; do | |
case "$1" in | |
--host) HOST="$2"; shift 2;; | |
--port) PORT="$2"; shift 2;; | |
--model) MODEL="$2"; shift 2;; | |
--venv) VENV="$2"; shift 2;; | |
--quant) QUANT="$2"; shift 2;; | |
--gpu-arch) GPU_ARCH="$2"; shift 2;; | |
--no-sudo) AUTO_INSTALL="no"; shift 1;; | |
--help|-h) usage; exit 0;; | |
--) shift; EXTRA_ARGS+=("$@"); break;; | |
*) echo "未知參數:$1"; usage; exit 1;; | |
esac | |
done | |
log() { echo -e "\e[1;34m[INFO]\e[0m $*"; } | |
warn() { echo -e "\e[1;33m[WARN]\e[0m $*"; } | |
err() { echo -e "\e[1;31m[ERR ]\e[0m $*"; } | |
need_cmd() { | |
if ! command -v "$1" &>/dev/null; then | |
return 1 | |
fi | |
} | |
apt_install() { | |
local pkgs=("$@") | |
if [[ "${AUTO_INSTALL}" != "yes" ]]; then | |
warn "缺套件:${pkgs[*]}(--no-sudo 模式不自動安裝)" | |
return 0 | |
fi | |
if ! need_cmd sudo; then | |
err "需要安裝 ${pkgs[*]} 但找不到 sudo。請手動安裝或移除 --no-sudo。" | |
exit 1 | |
fi | |
log "安裝套件:${pkgs[*]}" | |
sudo apt-get update -y | |
sudo apt-get install -y "${pkgs[@]}" | |
} | |
# ---------- 步驟 0:建議使用 CUDA 12.6 路徑 ---------- | |
if [[ -d "/usr/local/cuda-12.6" ]]; then | |
export PATH="/usr/local/cuda-12.6/bin:${PATH}" | |
export LD_LIBRARY_PATH="/usr/local/cuda-12.6/lib64:${LD_LIBRARY_PATH:-}" | |
log "已加入 CUDA 12.6 路徑:/usr/local/cuda-12.6" | |
elif [[ -d "/usr/local/cuda" ]]; then | |
export PATH="/usr/local/cuda/bin:${PATH}" | |
export LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH:-}" | |
warn "使用 /usr/local/cuda;如果非 12.6,仍可運作但建議指向 12.6。" | |
else | |
warn "/usr/local/cuda-12.6 與 /usr/local/cuda 不存在,之後編譯可能失敗(找不到 nvcc)。" | |
fi | |
# ---------- 步驟 1:基本工具檢查(包含 Python 3.12) ---------- | |
MISSING=() | |
for c in bash nvidia-smi; do | |
if ! need_cmd "$c"; then MISSING+=("$c"); fi | |
done | |
if (( ${#MISSING[@]} )); then | |
err "缺少必要指令:${MISSING[*]}" | |
exit 1 | |
fi | |
# 檢查 Python 3.12 | |
if ! need_cmd python3.12; then | |
log "未找到 python3.12,正在安裝。" | |
apt_install python3.12 python3.12-venv python3-pip | |
fi | |
# 確保 python3 指向 python3.12 或相容版本 | |
if ! need_cmd python3; then | |
sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 | |
fi | |
log "Python 環境已就緒。" | |
# nvcc 檢查(建議 12.4+,目標 12.6) | |
if need_cmd nvcc; then | |
NVCC_VER="$(nvcc --version | awk -F'V' '/release/{print $2}' | cut -d',' -f1 || true)" | |
log "偵測到 nvcc 版本:${NVCC_VER:-unknown}" | |
else | |
warn "找不到 nvcc,嘗試安裝 cuda-toolkit 或修正 PATH。" | |
fi | |
# ---------- 步驟 2:系統開發套件(含 math.h) ---------- | |
if [[ ! -f /usr/include/math.h ]]; then | |
log "未發現 /usr/include/math.h,安裝 libc6-dev 與相關工具。" | |
apt_install build-essential libc6-dev linux-libc-dev libstdc++-12-dev ninja-build python3.12-dev | |
else | |
log "已找到 /usr/include/math.h" | |
# 仍確保 ninja 存在 | |
if ! need_cmd ninja; then | |
apt_install ninja-build | |
fi | |
fi | |
# ---------- 步驟 3:編譯器版本(GCC/G++ 12) ---------- | |
if ! need_cmd gcc-12 || ! need_cmd g++-12; then | |
log "安裝 GCC/G++ 12 以相容 nvcc。" | |
apt_install gcc-12 g++-12 | |
fi | |
export CUDAHOSTCXX=/usr/bin/g++-12 | |
export CC=/usr/bin/gcc-12 | |
export CXX=/usr/bin/g++-12 | |
log "CUDAHOSTCXX=${CUDAHOSTCXX};CC=${CC};CXX=${CXX}" | |
# ---------- 步驟 4:排除環境變數干擾 ---------- | |
unset CPATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH INCLUDE | |
log "已清除可能干擾編譯的 include 環境變數。" | |
# ---------- 步驟 5:GPU 架構設定 ---------- | |
if [[ "${GPU_ARCH}" == "auto" ]]; then | |
# 粗略以 GPU 名稱判斷是否 H100 -> 9.0 | |
GPU_NAME="$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n1 || true)" | |
if echo "${GPU_NAME}" | grep -qi 'H100'; then | |
export TORCH_CUDA_ARCH_LIST="9.0" | |
else | |
# 若未知則不設,讓 PyTorch 自選(會較慢) | |
unset TORCH_CUDA_ARCH_LIST || true | |
fi | |
else | |
export TORCH_CUDA_ARCH_LIST="${GPU_ARCH}" | |
fi | |
if [[ -n "${TORCH_CUDA_ARCH_LIST:-}" ]]; then | |
log "TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}" | |
else | |
warn "未設定 TORCH_CUDA_ARCH_LIST(auto 無法判斷),首次 JIT 可能較慢。" | |
fi | |
# ---------- 步驟 6:檢查並安裝 uv ---------- | |
if ! need_cmd uv; then | |
log "未找到 uv,正在安裝 uv。" | |
python3 -m pip install uv | |
fi | |
log "uv 套件管理器已就緒。" | |
# ---------- 步驟 7:創建並啟用虛擬環境 ---------- | |
if [[ ! -d "${VENV}" ]]; then | |
log "虛擬環境未找到:${VENV},正在使用 uv 創建。" | |
uv venv "${VENV}" --python 3.12 --seed | |
fi | |
# shellcheck disable=SC1091 | |
source "${VENV}/bin/activate" | |
log "已啟用虛擬環境:${VENV}" | |
# ---------- 步驟 8:安裝 vLLM 及其依賴 ---------- | |
log "安裝 vLLM 特定版本與依賴。" | |
uv pip install --pre vllm==0.10.1+gptoss \ | |
--extra-index-url https://wheels.vllm.ai/gpt-oss/ \ | |
--extra-index-url https://download.pytorch.org/whl/nightly/cu128 \ | |
--index-strategy unsafe-best-match | |
# ---------- 步驟 9:清理 FlashInfer JIT 快取 ---------- | |
rm -rf "${HOME}/.cache/flashinfer" || true | |
log "已清除 ~/.cache/flashinfer" | |
# ---------- 步驟 10:啟動 vLLM 服務 ---------- | |
# 量化參數 | |
VLLM_QUANT_ARG=() | |
if [[ "${QUANT}" != "none" ]]; then | |
VLLM_QUANT_ARG=(--quantization "${QUANT}") | |
else | |
VLLM_QUANT_ARG=() | |
fi | |
log "啟動 vLLM:模型=${MODEL};位址=${HOST}:${PORT};量化=${QUANT}" | |
set -x | |
exec vllm serve "${MODEL}" \ | |
--host "${HOST}" \ | |
--port "${PORT}" \ | |
"${VLLM_QUANT_ARG[@]}" \ | |
"${EXTRA_ARGS[@]}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment