Skip to content

Instantly share code, notes, and snippets.

WARNING:root:libtpu.so and TPU device found. Setting PJRT_DEVICE=TPU.
INFO 07-08 18:16:39 [__init__.py:253] Automatically detected platform tpu.
INFO 07-08 18:16:39 [tpu.py:187] tpu_commons not found, using vLLM's TpuPlatform
============================= test session starts ==============================
platform linux -- Python 3.10.18, pytest-8.4.1, pluggy-1.6.0 -- /home/xiowei/miniconda3/envs/vllm/bin/python3.10
cachedir: .pytest_cache
rootdir: /home/xiowei/vllm
configfile: pyproject.toml
plugins: anyio-4.9.0
collecting ... collected 1 item
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "vllm",
"type": "debugpy",
"request": "launch",
#!/bin/bash
# Usage:
# Run the file under the parent directory of the vllm directory as
# bash run_tpu_benchmark.sh --model <model_name> --tp 1
# bash run_tpu_benchmark.sh --model <model_name> --tp 1 --profile
# bash run_tpu_benchmark.sh --model <model_name> --tp 4
#
# Commonly used models:
# meta-llama/Meta-Llama-3.1-8B-Instruct
ERROR 06-06 04:39:02 [core.py:515] EngineCore failed to start.
ERROR 06-06 04:39:02 [core.py:515] Traceback (most recent call last):
ERROR 06-06 04:39:02 [core.py:515] File "/home/xiowei/vllm/vllm/v1/engine/core.py", line 506, in run_engine_core
ERROR 06-06 04:39:02 [core.py:515] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 06-06 04:39:02 [core.py:515] File "/home/xiowei/vllm/vllm/v1/engine/core.py", line 390, in __init__
ERROR 06-06 04:39:02 [core.py:515] super().__init__(vllm_config, executor_class, log_stats,
ERROR 06-06 04:39:02 [core.py:515] File "/home/xiowei/vllm/vllm/v1/engine/core.py", line 76, in __init__
ERROR 06-06 04:39:02 [core.py:515] self.model_executor = executor_class(vllm_config)
ERROR 06-06 04:39:02 [core.py:515] File "/home/xiowei/vllm/vllm/executor/executor_base.py", line 53, in __init__
ERROR 06-06 04:39:02 [core.py:515] self._init_executor()
# python pytorch/xla/test/quantized_ops/test_quantized_matmul.py -k test_blockwise_matmul_op
# python pytorch/xla/test/quantized_ops/test_quantized_matmul.py -k test_asymmetric_per_channel
import torch
import torch.nn.functional as F
import torch_xla
from torch.library import impl
from torch_xla.core.xla_model import XLA_LIB
XLA_LIB.define(
# Per-channel quant zero point
x = torch.randn(3, 6)
zero_point = torch.randn(8)
zp_out = torch.einsum("...c,z->...z", x, zero_point)
zp_out_ref = x.sum(dim=-1, keepdim=True) * zero_point
assert torch.allclose(zp_out, zp_out_ref)
# block-wise case
# w: [in_channel / block_size, block_size, out_channel]
xw32 printing named modules
LlamaForCausalLM(
(model): LlamaModel(
(embed_tokens): VocabParallelEmbedding(num_embeddings=128256, embedding_dim=4096, org_vocab_size=128256, num_embeddings_padded=128256, tp_size=1)
(layers): ModuleList(
(0-31): 32 x LlamaDecoderLayer(
(self_attn): LlamaAttention(
(qkv_proj): QKVParallelLinear(in_features=4096, output_features=6144, bias=False, tp_size=1, gather_output=False)
(o_proj): RowParallelLinear(input_features=4096, output_features=4096, bias=False, tp_size=1, reduce_results=True)
(rotary_emb): Llama3RotaryEmbedding(head_size=128, rotary_dim=128, max_position_embeddings=131072, base=500000.0, is_neox_style=True)
#!/bin/bash
# Usage:
# Run the file under the parent directory of the vllm directory as
# bash run_tpu_benchmark.sh --model <model_name> --tp 1
# bash run_tpu_benchmark.sh --model <model_name> --tp 1 --profile
# bash run_tpu_benchmark.sh --model <model_name> --tp 4
#
# Commonly used models:
# meta-llama/Meta-Llama-3.1-8B-Instruct
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "vllm",
"type": "debugpy",
"request": "launch",
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "vllm",
"type": "debugpy",
"request": "launch",