Liangfu Chen liangfu

liangfu / dot_emacs.el

Created June 5, 2025 00:48

Configure emacs to use gptel with Sonnet-3.7 on Bedrock

	;; Configure emacs to use gptel with Sonnet-3.7 on Bedrock
	(add-to-list 'load-path "/home/ubuntu/workspace/gptel")
	(setq gptel-use-curl "/home/ubuntu/miniconda3/envs/py310/bin/curl")
	(setq
	gptel-model 'claude-3-7-sonnet-20250219
	gptel-backend (gptel-make-bedrock "AWS"
	:region "us-west-2"
	;; subset of gptel--bedrock-models
	:models '(claude-3-7-sonnet-20250219)
	;; optional for provisioned access

liangfu / Makefile

Created March 18, 2025 18:44

Rasterization (aka Rendering triangles in framebuffer) in C

liangfu / test_mixed_eager_aot.py

Created March 13, 2025 16:34

Evaluate consistency when mixing eager execution with torch.compile()

	import torch
	import os
	import torch_xla.core.xla_model as xm

	def write_to_kv_cache(
	key: torch.Tensor,
	value: torch.Tensor,
	key_cache: torch.Tensor,
	value_cache: torch.Tensor,
	slot_mapping: torch.Tensor,

liangfu / test_split_neuronx.py

Created March 11, 2025 21:26

Evaluate torch.split and slice operator support on openxla backend (with torch_neuronx)

	import os
	import pytest
	import torch
	import torch_neuronx
	import torch_xla.core.xla_model as xm

	@pytest.mark.parametrize("batch_size,seq_len,q_size,kv_size,use_torch_compile,disable_functionalization", [
	(2, 128, 32, 32, False, True),
	(2, 128, 32, 32, True, True),
	(2, 128, 32, 32, False, False),

liangfu / test_split.py

Last active March 10, 2025 23:45

Evaluate torch.split and slice operator support on openxla backend

	import pytest
	import torch
	import torch_xla.core.xla_model as xm

	@pytest.mark.parametrize("batch_size,seq_len,q_size,kv_size", [
	(2, 128, 32, 32),
	(4, 256, 64, 64),
	])
	def test_split_consistency(batch_size, seq_len, q_size, kv_size):
	# Get XLA device

liangfu / test_sr.py

Created January 17, 2025 23:52

Evaluate stochastic rounding

	import os
	import time
	import torch
	import torch_xla.core.xla_model as xm

	N = 16

	def main():
	# os.environ["XLA_USE_BF16"] = "1"
	os.environ["NEURON_RT_STOCHASTIC_ROUNDING_EN"] = "1"

liangfu / test_index_copy.py

Created January 14, 2025 07:19

	import torch
	import os
	import depyf
	import torch_xla.core.xla_model as xm

	os.environ["NEURON_CC_FLAGS"]= " --model-type=transformer -O1 "
	os.environ["NEURON_COMPILE_CACHE_URL"] = os.path.join(os.getcwd(), "_compile_cache")

	@torch.compiler.allow_in_graph
	def write_to_kv_cache(

liangfu / benchmark_xla_scatter.py

Created January 9, 2025 04:34

Benchmark xla scatter with torch-xla

	import time
	import torch
	import torch_xla.core.xla_model as xm

	N = 128
	n_iters = 100

	def main():
	device = xm.xla_device()
	src = torch.arange(1, 2*N+1).reshape((2, N)).to(device=device)

liangfu / depyf_openxla.py

Created December 2, 2024 23:12

Demonstrate the feasibility of combining depyf with openxla backend

	import torch
	import torch_xla.core.xla_model as xm

	@torch.compile(backend="openxla")
	def toy_example(a, b):
	x = a / (torch.abs(a) + 1)
	if b.sum() < 0:
	b = b * -1
	return x * b

liangfu / test_copy_blocks.py

Created November 26, 2024 19:51

	from typing import Any, Dict, List, Optional, Tuple, Type

	import torch
	import torch_xla.core.xla_model as xm
	import torch_xla.experimental.custom_kernel # Required to register custom ops.

	class PallasAttentionBackend:
	@torch.compile(backend="openxla")
	@staticmethod
	def copy_blocks(