Created
July 17, 2025 18:27
-
-
Save vanbasten23/57d13ba877bd5bf0976a5e77b69a4530 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
// Use IntelliSense to learn about possible attributes. | |
// Hover to view descriptions of existing attributes. | |
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 | |
"version": "0.2.0", | |
"configurations": [ | |
{ | |
"name": "vllm", | |
"type": "debugpy", | |
"request": "launch", | |
"program": "/mnt/disks/persist/tpu_commons/examples/offline_inference.py", | |
"console": "integratedTerminal", | |
"env": { | |
"VLLM_USE_V1": "1", | |
"MODEL_IMPL_TYPE": "vllm", | |
"TPU_BACKEND_TYPE": "jax", | |
}, | |
"args": [ | |
"--model", | |
"meta-llama/Llama-3.1-8B", | |
"--tensor_parallel_size", | |
"4", | |
"--task", | |
"generate", | |
"--max_model_len", | |
"1024", | |
// "/mnt/disks/persist/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py", | |
// "-k", | |
// "test_structured_output_auto_mode", | |
// "--port", | |
// "8003", | |
// "--gpu-memory-utilization", | |
// "0.98", | |
// "--max-num-batched-tokens", | |
// "8192", | |
// "--num-scheduler-steps", | |
// "8", | |
// "--tensor-parallel-size", | |
// "1", | |
// "--max-model-len", | |
// "2048" | |
] | |
}, | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment