Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vanbasten23/57d13ba877bd5bf0976a5e77b69a4530 to your computer and use it in GitHub Desktop.
Save vanbasten23/57d13ba877bd5bf0976a5e77b69a4530 to your computer and use it in GitHub Desktop.
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "vllm",
"type": "debugpy",
"request": "launch",
"program": "/mnt/disks/persist/tpu_commons/examples/offline_inference.py",
"console": "integratedTerminal",
"env": {
"VLLM_USE_V1": "1",
"MODEL_IMPL_TYPE": "vllm",
"TPU_BACKEND_TYPE": "jax",
},
"args": [
"--model",
"meta-llama/Llama-3.1-8B",
"--tensor_parallel_size",
"4",
"--task",
"generate",
"--max_model_len",
"1024",
// "/mnt/disks/persist/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py",
// "-k",
// "test_structured_output_auto_mode",
// "--port",
// "8003",
// "--gpu-memory-utilization",
// "0.98",
// "--max-num-batched-tokens",
// "8192",
// "--num-scheduler-steps",
// "8",
// "--tensor-parallel-size",
// "1",
// "--max-model-len",
// "2048"
]
},
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment