fpaupier · February 17, 2025 13:07
diff --git a/minimal-vllm.sh b/minimal-vllm.sh
 # Start vLLM OpenAI compatible server
 docker run --runtime nvidia --gpus all \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
    -p 8000:8000 \
    --ipc=host \
    vllm/vllm-openai:latest \
    --api-key your-secret-key \
    --tokenizer-mode "mistral" \
    --model mistralai/Mistral-Small-24B-Instruct-2501
    
 # Test query    
 curl http://localhost:8000/v1/completions \
 -H "Content-Type: application/json" \
 -H "Authorization: Bearer your-secret-key" \
 -d '{
    "model": "mistralai/Mistral-Small-24B-Instruct-2501",
    "prompt": "Raconte moi une histoire sur l exploration spatiale",
    "max_tokens": 128,
    "temperature": 0.7
 }'
	# Start vLLM OpenAI compatible server
	docker run --runtime nvidia --gpus all \
	-v ~/.cache/huggingface:/root/.cache/huggingface \
	--env "HUGGING_FACE_HUB_TOKEN=<secret>" \
	-p 8000:8000 \
	--ipc=host \
	vllm/vllm-openai:latest \
	--api-key your-secret-key \
	--tokenizer-mode "mistral" \
	--model mistralai/Mistral-Small-24B-Instruct-2501

	# Test query
	curl http://localhost:8000/v1/completions \
	-H "Content-Type: application/json" \
	-H "Authorization: Bearer your-secret-key" \
	-d '{
	"model": "mistralai/Mistral-Small-24B-Instruct-2501",
	"prompt": "Raconte moi une histoire sur l exploration spatiale",
	"max_tokens": 128,
	"temperature": 0.7
	}'