Skip to content

Instantly share code, notes, and snippets.

@chiragjn
Created February 20, 2024 10:21
Show Gist options
  • Save chiragjn/22e6a3ffe1b7f4aeaaefbc25af8e9461 to your computer and use it in GitHub Desktop.
Save chiragjn/22e6a3ffe1b7f4aeaaefbc25af8e9461 to your computer and use it in GitHub Desktop.
llama.cpp python cuda Dockerfile example
ARG CUDA_IMAGE="12.1.1-devel-ubuntu20.04"
FROM --platform=linux/amd64 nvidia/cuda:${CUDA_IMAGE}
ENV DEBIAN_FRONTEND=noninteractive
RUN useradd -m -u 1000 user
USER root
RUN apt-get update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:cnugteren/clblast \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
software-properties-common \
apt-transport-https \
wget \
curl \
python3-pip \
git \
build-essential \
python3 \
python3-pip \
gcc \
ocl-icd-opencl-dev \
opencl-headers \
clinfo \
libclblast-dev \
libopenblas-dev \
&& mkdir -p /etc/OpenCL/vendors \
&& echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
# setting build related env vars
ENV CUDA_DOCKER_ARCH=all
ENV LLAMA_CUBLAS=1
# Install dependencies
RUN python3 -m pip install --upgrade pip setuptools wheel
# Install llama-cpp-python (pre-built wheel)
RUN pip install https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/wheels/llama_cpp_python-0.2.23+cu121-cp38-cp38-manylinux_2_31_x86_64.whl
# Or, install llama-cpp-python (build from source with cuda)
# RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install --verbose llama-cpp-python==0.2.44
# Set working directory
WORKDIR /app
USER user
# Download the model
RUN wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_S.gguf
# Install requirements
COPY --chown=user requirements.txt .
USER root
RUN pip install -U pip setuptools wheel \
&& pip install --no-cache-dir -r requirements.txt
# Switch back to non-root user for best practices
USER user
# Copy code
COPY --chown=user . .
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment