vanbasten23’s gists

vanbasten23 / gist:045ef20a29e1d2fcf3a13b6ad1c2582f

Last active July 8, 2025 22:12

	WARNING:root:libtpu.so and TPU device found. Setting PJRT_DEVICE=TPU.
	INFO 07-08 18:16:39 [__init__.py:253] Automatically detected platform tpu.
	INFO 07-08 18:16:39 [tpu.py:187] tpu_commons not found, using vLLM's TpuPlatform
	============================= test session starts ==============================
	platform linux -- Python 3.10.18, pytest-8.4.1, pluggy-1.6.0 -- /home/xiowei/miniconda3/envs/vllm/bin/python3.10
	cachedir: .pytest_cache
	rootdir: /home/xiowei/vllm
	configfile: pyproject.toml
	plugins: anyio-4.9.0
	collecting ... collected 1 item

vanbasten23 / gist:41ece835f92ebde3449a97a07d7ccfb2

Created July 8, 2025 17:46

	{
	// Use IntelliSense to learn about possible attributes.
	// Hover to view descriptions of existing attributes.
	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
	"version": "0.2.0",
	"configurations": [
	{
	"name": "vllm",
	"type": "debugpy",
	"request": "launch",

vanbasten23 / gist:5e926c6ea393f9ae0963da77995ed591

Created June 9, 2025 17:07

	#!/bin/bash

	# Usage:
	# Run the file under the parent directory of the vllm directory as
	# bash run_tpu_benchmark.sh --model <model_name> --tp 1
	# bash run_tpu_benchmark.sh --model <model_name> --tp 1 --profile
	# bash run_tpu_benchmark.sh --model <model_name> --tp 4
	#
	# Commonly used models:
	# meta-llama/Meta-Llama-3.1-8B-Instruct

vanbasten23 / gist:6772e44bc8b562256c3b184fb403c2b5

Created June 6, 2025 04:39

	ERROR 06-06 04:39:02 [core.py:515] EngineCore failed to start.
	ERROR 06-06 04:39:02 [core.py:515] Traceback (most recent call last):
	ERROR 06-06 04:39:02 [core.py:515] File "/home/xiowei/vllm/vllm/v1/engine/core.py", line 506, in run_engine_core
	ERROR 06-06 04:39:02 [core.py:515] engine_core = EngineCoreProc(args, *kwargs)
	ERROR 06-06 04:39:02 [core.py:515] File "/home/xiowei/vllm/vllm/v1/engine/core.py", line 390, in __init__
	ERROR 06-06 04:39:02 [core.py:515] super().__init__(vllm_config, executor_class, log_stats,
	ERROR 06-06 04:39:02 [core.py:515] File "/home/xiowei/vllm/vllm/v1/engine/core.py", line 76, in __init__
	ERROR 06-06 04:39:02 [core.py:515] self.model_executor = executor_class(vllm_config)
	ERROR 06-06 04:39:02 [core.py:515] File "/home/xiowei/vllm/vllm/executor/executor_base.py", line 53, in __init__
	ERROR 06-06 04:39:02 [core.py:515] self._init_executor()

vanbasten23 / gist:d0bf64d8866bc095121cff6df57c467f

Last active June 4, 2025 16:10

	# python pytorch/xla/test/quantized_ops/test_quantized_matmul.py -k test_blockwise_matmul_op
	# python pytorch/xla/test/quantized_ops/test_quantized_matmul.py -k test_asymmetric_per_channel

	import torch
	import torch.nn.functional as F
	import torch_xla
	from torch.library import impl
	from torch_xla.core.xla_model import XLA_LIB

	XLA_LIB.define(

vanbasten23 / gist:ddfe570363785b1c8c54f49816ac3eb8

Created June 4, 2025 05:19

	# Per-channel quant zero point
	x = torch.randn(3, 6)
	zero_point = torch.randn(8)
	zp_out = torch.einsum("...c,z->...z", x, zero_point)
	zp_out_ref = x.sum(dim=-1, keepdim=True) * zero_point
	assert torch.allclose(zp_out, zp_out_ref)

	# block-wise case

	# w: [in_channel / block_size, block_size, out_channel]

vanbasten23 / gist:d6f904b54461e7afa25b9d4b482c52f2

Created June 2, 2025 03:32

	xw32 printing named modules
	LlamaForCausalLM(
	(model): LlamaModel(
	(embed_tokens): VocabParallelEmbedding(num_embeddings=128256, embedding_dim=4096, org_vocab_size=128256, num_embeddings_padded=128256, tp_size=1)
	(layers): ModuleList(
	(0-31): 32 x LlamaDecoderLayer(
	(self_attn): LlamaAttention(
	(qkv_proj): QKVParallelLinear(in_features=4096, output_features=6144, bias=False, tp_size=1, gather_output=False)
	(o_proj): RowParallelLinear(input_features=4096, output_features=4096, bias=False, tp_size=1, reduce_results=True)
	(rotary_emb): Llama3RotaryEmbedding(head_size=128, rotary_dim=128, max_position_embeddings=131072, base=500000.0, is_neox_style=True)

vanbasten23 / gist:9685d5c64159fd169ecf764350f92f0b

Created May 31, 2025 23:36

	#!/bin/bash

	# Usage:
	# Run the file under the parent directory of the vllm directory as
	# bash run_tpu_benchmark.sh --model <model_name> --tp 1
	# bash run_tpu_benchmark.sh --model <model_name> --tp 1 --profile
	# bash run_tpu_benchmark.sh --model <model_name> --tp 4
	#
	# Commonly used models:
	# meta-llama/Meta-Llama-3.1-8B-Instruct

vanbasten23 / gist:f2a54059c820d1c69e368f5cc1a115c1

Created May 18, 2025 23:16

	{
	// Use IntelliSense to learn about possible attributes.
	// Hover to view descriptions of existing attributes.
	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
	"version": "0.2.0",
	"configurations": [
	{
	"name": "vllm",
	"type": "debugpy",
	"request": "launch",

vanbasten23 / gist:d5be988a5c831ccce0209a9f9703f822

Created May 18, 2025 22:29

	{
	// Use IntelliSense to learn about possible attributes.
	// Hover to view descriptions of existing attributes.
	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
	"version": "0.2.0",
	"configurations": [
	{
	"name": "vllm",
	"type": "debugpy",
	"request": "launch",

XiongfeiWei vanbasten23