Ithanil · August 7, 2025 11:33 · Ithanil · Aug 7, 2025 · nhuytan · Aug 8, 2025
diff --git a/Dockerfile_gpt-oss b/Dockerfile_gpt-oss
 ARG CUDA_VERSION=12.8.1
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS vllm_gpt-oss
 ARG CUDA_VERSION
 ARG PYTHON_VERSION=3.12

 ### --- Stuff from default Dockerfile ---- ###

 # The PyPA get-pip.py script is a self contained script+zip file, that provides
 # both the installer script and the pip base85-encoded zip archive. This allows
 # bootstrapping pip in environment where a dsitribution package does not exist.
 #
 # By parameterizing the URL for get-pip.py installation script, we allow
 # third-party to use their own copy of the script stored in a private mirror.
 # We set the default value to the PyPA owned get-pip.py script.
 #
 # Reference: https://pip.pypa.io/en/stable/installation/#get-pip-py
 ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"

 # PIP supports fetching the packages from custom indexes, allowing third-party
 # to host the packages in private mirrors. The PIP_INDEX_URL and
 # PIP_EXTRA_INDEX_URL are standard PIP environment variables to override the
 # default indexes. By letting them empty by default, PIP will use its default
 # indexes if the build process doesn't override the indexes.
 #
 # Uv uses different variables. We set them by default to the same values as
 # PIP, but they can be overridden.
 ARG PIP_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL
 ARG UV_INDEX_URL=${PIP_INDEX_URL}
 ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}

 # PyTorch provides its own indexes for standard and nightly builds
 ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
 ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly

 # PIP supports multiple authentication schemes, including keyring
 # By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to
 # disabled by default, we allow third-party to use keyring authentication for
 # their private Python indexes, while not changing the default behavior which
 # is no authentication.
 #
 # Reference: https://pip.pypa.io/en/stable/topics/authentication/#keyring-support
 ARG PIP_KEYRING_PROVIDER=disabled
 ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}

 # Flag enables built-in KV-connector dependency libs into docker images
 ARG INSTALL_KV_CONNECTORS=false

 # prepare basic build environment
 ARG TARGETPLATFORM
 ARG INSTALL_KV_CONNECTORS=false
 ENV DEBIAN_FRONTEND=noninteractive

 ARG DEADSNAKES_MIRROR_URL
 ARG DEADSNAKES_GPGKEY_URL
 ARG GET_PIP_URL

 # Install Python and other dependencies
 RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
    && apt-get install -y ccache software-properties-common git curl sudo \
    && add-apt-repository -y ppa:deadsnakes/ppa \
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version

 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL
 ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER

 # Install uv for faster pip installs
 RUN --mount=type=cache,target=/root/.cache/uv \
    python3 -m pip install uv

 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy

 # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
 # as it was causing spam when compiling the CUTLASS kernels
 RUN apt-get install -y gcc-10 g++-10
 RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10
 RUN <<EOF
 gcc --version
 EOF

 # Workaround for https://github.com/openai/triton/issues/2507 and
 # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
 # this won't be needed for future versions of this docker image
 # or future versions of triton.
 RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/

 # max jobs used by Ninja to build extensions
 ARG max_jobs=2
 ENV MAX_JOBS=${max_jobs}
 # number of threads used by nvcc
 ARG nvcc_threads=8
 ENV NVCC_THREADS=$nvcc_threads
 ### ----------------------------------------------------- ###

 ### --- Build instructions for GPT-OSS on Ampere --- ###
 ### Translated from https://github.com/vllm-project/vllm/issues/22290#issuecomment-3162301278 ###

 ARG CCACHE_NOHASHDIR="true"

 COPY . /tmp
 WORKDIR /tmp
 RUN pip install uv 
 RUN uv pip install --system --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128
 RUN uv pip install --system "transformers[torch]"
 RUN python3 use_existing_torch.py
 RUN uv pip install --system -r requirements/build.txt
 RUN uv pip install --system --no-build-isolation -e . -v
 Run uv pip uninstall --system triton pytorch-triton
 RUN uv pip install --system triton==3.4.0 openai_harmony mcp
 RUN git clone https://github.com/openai/triton.git
 RUN uv pip install --system -e triton/python/triton_kernels --no-deps

 # Run
 ENV VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1 
 ENTRYPOINT ["vllm"]
	ARG CUDA_VERSION=12.8.1
	FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS vllm_gpt-oss
	ARG CUDA_VERSION
	ARG PYTHON_VERSION=3.12

	### --- Stuff from default Dockerfile ---- ###

	# The PyPA get-pip.py script is a self contained script+zip file, that provides
	# both the installer script and the pip base85-encoded zip archive. This allows
	# bootstrapping pip in environment where a dsitribution package does not exist.
	#
	# By parameterizing the URL for get-pip.py installation script, we allow
	# third-party to use their own copy of the script stored in a private mirror.
	# We set the default value to the PyPA owned get-pip.py script.
	#
	# Reference: https://pip.pypa.io/en/stable/installation/#get-pip-py
	ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"

	# PIP supports fetching the packages from custom indexes, allowing third-party
	# to host the packages in private mirrors. The PIP_INDEX_URL and
	# PIP_EXTRA_INDEX_URL are standard PIP environment variables to override the
	# default indexes. By letting them empty by default, PIP will use its default
	# indexes if the build process doesn't override the indexes.
	#
	# Uv uses different variables. We set them by default to the same values as
	# PIP, but they can be overridden.
	ARG PIP_INDEX_URL
	ARG PIP_EXTRA_INDEX_URL
	ARG UV_INDEX_URL=${PIP_INDEX_URL}
	ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}

	# PyTorch provides its own indexes for standard and nightly builds
	ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
	ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly

	# PIP supports multiple authentication schemes, including keyring
	# By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to
	# disabled by default, we allow third-party to use keyring authentication for
	# their private Python indexes, while not changing the default behavior which
	# is no authentication.
	#
	# Reference: https://pip.pypa.io/en/stable/topics/authentication/#keyring-support
	ARG PIP_KEYRING_PROVIDER=disabled
	ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}

	# Flag enables built-in KV-connector dependency libs into docker images
	ARG INSTALL_KV_CONNECTORS=false

	# prepare basic build environment
	ARG TARGETPLATFORM
	ARG INSTALL_KV_CONNECTORS=false
	ENV DEBIAN_FRONTEND=noninteractive

	ARG DEADSNAKES_MIRROR_URL
	ARG DEADSNAKES_GPGKEY_URL
	ARG GET_PIP_URL

	# Install Python and other dependencies
	RUN echo 'tzdata tzdata/Areas select America' \| debconf-set-selections \
	&& echo 'tzdata tzdata/Zones/America select Los_Angeles' \| debconf-set-selections \
	&& apt-get update -y \
	&& apt-get install -y ccache software-properties-common git curl sudo \
	&& add-apt-repository -y ppa:deadsnakes/ppa \
	&& apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
	&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
	&& update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
	&& ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
	&& curl -sS ${GET_PIP_URL} \| python${PYTHON_VERSION} \
	&& python3 --version && python3 -m pip --version

	ARG PIP_INDEX_URL UV_INDEX_URL
	ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
	ARG PYTORCH_CUDA_INDEX_BASE_URL
	ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL
	ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER

	# Install uv for faster pip installs
	RUN --mount=type=cache,target=/root/.cache/uv \
	python3 -m pip install uv

	# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
	# Reference: https://github.com/astral-sh/uv/pull/1694
	ENV UV_HTTP_TIMEOUT=500
	ENV UV_INDEX_STRATEGY="unsafe-best-match"
	# Use copy mode to avoid hardlink failures with Docker cache mounts
	ENV UV_LINK_MODE=copy

	# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
	# as it was causing spam when compiling the CUTLASS kernels
	RUN apt-get install -y gcc-10 g++-10
	RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10
	RUN <<EOF
	gcc --version
	EOF

	# Workaround for https://github.com/openai/triton/issues/2507 and
	# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
	# this won't be needed for future versions of this docker image
	# or future versions of triton.
	RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION \| cut -d. -f1,2)/compat/

	# max jobs used by Ninja to build extensions
	ARG max_jobs=2
	ENV MAX_JOBS=${max_jobs}
	# number of threads used by nvcc
	ARG nvcc_threads=8
	ENV NVCC_THREADS=$nvcc_threads
	### ----------------------------------------------------- ###

	### --- Build instructions for GPT-OSS on Ampere --- ###
	### Translated from https://github.com/vllm-project/vllm/issues/22290#issuecomment-3162301278 ###

	ARG CCACHE_NOHASHDIR="true"

	COPY . /tmp
	WORKDIR /tmp
	RUN pip install uv
	RUN uv pip install --system --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128
	RUN uv pip install --system "transformers[torch]"
	RUN python3 use_existing_torch.py
	RUN uv pip install --system -r requirements/build.txt
	RUN uv pip install --system --no-build-isolation -e . -v
	Run uv pip uninstall --system triton pytorch-triton
	RUN uv pip install --system triton==3.4.0 openai_harmony mcp
	RUN git clone https://github.com/openai/triton.git
	RUN uv pip install --system -e triton/python/triton_kernels --no-deps

	# Run
	ENV VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1
	ENTRYPOINT ["vllm"]
No results found