vanbasten23 · July 17, 2025 18:27
diff --git a/gistfile1.txt b/gistfile1.txt
 {
  // Use IntelliSense to learn about possible attributes.
  // Hover to view descriptions of existing attributes.
  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
  "version": "0.2.0",
  "configurations": [
    {
            "name": "vllm",
            "type": "debugpy",
            "request": "launch",
            "program": "/mnt/disks/persist/tpu_commons/examples/offline_inference.py",
            "console": "integratedTerminal",
            "env": {
                "VLLM_USE_V1": "1",
                "MODEL_IMPL_TYPE": "vllm",
                "TPU_BACKEND_TYPE": "jax",
            },
            "args": [
                "--model",
                "meta-llama/Llama-3.1-8B",
                "--tensor_parallel_size",
                "4",
                "--task",
                "generate",
                "--max_model_len",
                "1024",
                // "/mnt/disks/persist/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py",
                // "-k",
                // "test_structured_output_auto_mode",
                // "--port",
                // "8003",
                // "--gpu-memory-utilization",
                // "0.98",
                // "--max-num-batched-tokens",
                // "8192",
                // "--num-scheduler-steps",
                // "8",
                // "--tensor-parallel-size",
                // "1",
                // "--max-model-len",
                // "2048"
            ]
        },
  ]
 }
	{
	// Use IntelliSense to learn about possible attributes.
	// Hover to view descriptions of existing attributes.
	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
	"version": "0.2.0",
	"configurations": [
	{
	"name": "vllm",
	"type": "debugpy",
	"request": "launch",
	"program": "/mnt/disks/persist/tpu_commons/examples/offline_inference.py",
	"console": "integratedTerminal",
	"env": {
	"VLLM_USE_V1": "1",
	"MODEL_IMPL_TYPE": "vllm",
	"TPU_BACKEND_TYPE": "jax",
	},
	"args": [
	"--model",
	"meta-llama/Llama-3.1-8B",
	"--tensor_parallel_size",
	"4",
	"--task",
	"generate",
	"--max_model_len",
	"1024",
	// "/mnt/disks/persist/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py",
	// "-k",
	// "test_structured_output_auto_mode",
	// "--port",
	// "8003",
	// "--gpu-memory-utilization",
	// "0.98",
	// "--max-num-batched-tokens",
	// "8192",
	// "--num-scheduler-steps",
	// "8",
	// "--tensor-parallel-size",
	// "1",
	// "--max-model-len",
	// "2048"
	]
	},
	]
	}