FROM vllm/vllm-openai:v0.9.1
ENV PYTHONUNBUFFERED=1
ENV HF_HUB_CACHE=/api/models
ENV HF_HOME=/api/models
RUN mkdir -p /api/models/
# RUN chmod +x /api/entrypoint.sh
RUN chmod 777 -R /api \
&& umask 000
EXPOSE 8000
# Set user and group
ARG user=appuser
ARG group=appuser
ARG uid=1000
ARG gid=1000
RUN groupadd -g ${gid} ${group}
RUN useradd -u ${uid} -g ${group} -s /bin/sh -m ${user}
RUN chown ${user}:${group} /api
# Switch to user
USER ${uid}:${gid}
docker build -t vllm-non_root:v0.9.1-20250619 .
docker run --gpus all -v ~/.cache/huggingface:/root/.cache/huggingface --env "HUGGING_FACE_HUB_TOKEN=hf_*******" -p 8000:8000 --ipc=host vllm-non_root:v0.9.1-20250619 --model mistralai/Mistral-7B-v0.1
curl http://localhost:8000/v1/completions -H "Content-Type: application/json" -d '{
"model": "mistralai/Mistral-7B-v0.1",
"prompt": "San Francisco is a",
"max_tokens": 7,
"temperature": 0
}'
# Response
{"id":"cmpl-c979406a843e4e19b2b1b065bf08c6c2","object":"text_completion","created":1750337935,"model":"mistralai/Mistral-7B-v0.1","choices":[{"index":0,"text":" city that is known for its beautiful","logprobs":null,"finish_reason":"length","stop_reason":null,"prompt_logprobs":null}],"usage":{"prompt_tokens":5,"total_tokens":12,"completion_tokens":7,"prompt_tokens_details":null},"kv_transfer_params":null}