vanbasten23’s gists

vanbasten23 / gist:33841e7fc6de64e7022d59e5924179c1

Created May 14, 2025 04:24

	{
	// Use IntelliSense to learn about possible attributes.
	// Hover to view descriptions of existing attributes.
	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
	"version": "0.2.0",
	"configurations": [
	{
	"name": "vllm",
	"type": "debugpy",
	"request": "launch",

vanbasten23 / gist:d63b16eb9e20bb10069d8a23ae39676e

Created May 14, 2025 00:43

	# SPDX-License-Identifier: Apache-2.0

	# Adapted from
	# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
	# Copyright 2023 The vLLM team.
	# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
	#
	# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
	# and OPT implementations in this library. It has been modified from its
	# original forms to accommodate minor architectural differences compared

vanbasten23 / gist:79f692484a0c33cac8b33cdbd2b2eb64

Created May 13, 2025 03:04

	# SPDX-License-Identifier: Apache-2.0

	# Adapted from
	# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
	# Copyright 2023 The vLLM team.
	# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
	#
	# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
	# and OPT implementations in this library. It has been modified from its
	# original forms to accommodate minor architectural differences compared

vanbasten23 / gist:c3fd5e3a21fd19431bd6fb86d7989e5a

Created May 12, 2025 23:26

	{
	// Use IntelliSense to learn about possible attributes.
	// Hover to view descriptions of existing attributes.
	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
	"version": "0.2.0",
	"configurations": [
	{
	"name": "vllm",
	"type": "debugpy",
	"request": "launch",

vanbasten23 / gist:4d12db7cf4f823509ea96a2abfe83cf4

Created April 30, 2025 02:47

	{
	// Use IntelliSense to learn about possible attributes.
	// Hover to view descriptions of existing attributes.
	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
	"version": "0.2.0",
	"configurations": [
	{
	"name": "vllm",
	"type": "debugpy",
	"request": "launch",

vanbasten23 / gist:5e343c4b65c365ecec347abd35720800

Created April 28, 2025 18:32

	Traceback (most recent call last):
	File "/home/xiowei/vllm/vllm/v1/executor/multiproc_executor.py", line 465, in worker_busy_loop
	output = func(args, *kwargs)
	File "/home/xiowei/vllm/vllm/v1/worker/tpu_worker.py", line 160, in determine_available_memory
	self.model_runner.profile_run(self.model_runner.max_num_tokens)
	File "/home/xiowei/vllm/vllm/v1/worker/tpu_model_runner.py", line 1166, in profile_run
	dummy_encoder_outputs = self.model.get_multimodal_embeddings(
	File "/home/xiowei/vllm/vllm/model_executor/models/gemma3_mm.py", line 588, in get_multimodal_embeddings
	return self._process_image_input(image_input)
	File "/home/xiowei/vllm/vllm/model_executor/models/gemma3_mm.py", line 569, in _process_image_input

vanbasten23 / gist:174b81cc1d29a6829997d4eacc9b8760

Created April 24, 2025 03:24

	{
	// Use IntelliSense to learn about possible attributes.
	// Hover to view descriptions of existing attributes.
	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
	"version": "0.2.0",
	"configurations": [
	{
	"name": "vllm",
	"type": "debugpy",
	"request": "launch",

vanbasten23 / gist:dad1febba89027747cb740b1e042e14a

Created April 22, 2025 02:19

	WARNING:root:libtpu.so and TPU device found. Setting PJRT_DEVICE=TPU.
	INFO 04-21 23:15:22 [__init__.py:239] Automatically detected platform tpu.
	xw32 line246 engine_args=EngineArgs(model='google/gemma-3-4b-it', served_model_name=None, tokenizer='google/gemma-3-4b-it', hf_config_path=None, task='auto', skip_tokenizer_init=False, tokenizer_mode='auto', trust_remote_code=False, allowed_local_media_path='', download_dir=None, load_format='auto', config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', kv_cache_dtype='auto', seed=None, max_model_len=None, distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=4, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, block_size=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', disable_sliding_window=False, disable_cascade_attn=False, use_v2_block_manager=True, swap_space=4, cpu_offload_gb=0, gpu_memory_utilization=0.9, max_num_batched_tokens=256, max_num_partial_prefills=1, max_long_part

vanbasten23 / gist:e453bbb23c59ea1f868879412d86d481

Created April 21, 2025 17:22

	def selective_scan_ref(
	u, # B L D
	delta, # B L D
	A, # D N
	B, # B L N
	C, # B L N
	D, # D
	h_init: jax.Array \| None = None,
	) -> jax.Array:
	"""Reference function equivalent to pallas_selective_scan."""

vanbasten23 / gist:8a3f7e98f70378a09eca68f773a4f3fc

Created April 10, 2025 20:20

	bs=4
	seq_len=4096
	d_inner=1536
	d_state=16

	delta = random.uniform(random.key(0), (bs, seq_len, d_inner), dtype=jnp.float32)
	A = random.uniform(random.key(0), (d_inner, d_state), dtype=jnp.float32)
	deltaA_1 = jnp.exp(jnp.einsum('b l d, d n -> b l d n', delta, A))
	deltaA_2 = jnp.exp(delta[..., None]*A)
	assert jnp.allclose(deltaA_1, deltaA_2)

XiongfeiWei vanbasten23