Sofian Mejjoute Ryu1845

🎯

Focusing

Chillee / softmax_quack.py

Created July 10, 2025 21:07

Random Kernel Microbenchmarks

	import argparse
	import time
	from typing import Type

	import torch
	import torch.nn.functional as F
	import torch._inductor.config

	torch._inductor.config.triton.multi_kernel = True

ezyang / gist:dbb48a060630143634eb1e07cb92da16

Created July 8, 2025 20:50

	import torch
	from torch import nn
	from torch.distributed.tensor.placement_types import Replicate, Shard
	from torch.testing._internal.distributed.fake_pg import FakeStore

	import torch.distributed as dist
	from torch.distributed.device_mesh import init_device_mesh
	from torch.distributed.tensor import DTensor, Replicate

	world_size = 4

Birch-san / _06_fused_attention_blockptr_jvp.py

Last active September 2, 2025 02:13

Triton fused attention tutorial, updated with JVP support. Albeit with atol=1e-3 accuracy on JVP.

	from __future__ import annotations
	"""
	Fused Attention
	===============

	This is a Triton implementation of the Flash Attention v2 algorithm from Tri Dao (https://tridao.me/publications/flash2/flash2.pdf)

	Credits: OpenAI kernel team
	Extra Credits:

aksh-at / modal_quic_hole_punch.py

Last active May 4, 2025 12:51

Modal QUIC NAT hole-punching

	"""
	Proof-of-concept for NAT traversal and low-latency communication over QUIC
	between two Modal containers.

	In theory this could be used to establish a low-latency p2p connection between a
	service running outside Modal and a Modal GPU container, e.g. for real-time
	inference on a video stream. Please let us know if you try it!

	Usage:
	> modal run modal_quic_hole_punch.py

Created April 30, 2025 00:56

diloco_nesterov_.7lr_.0_to_.9_momentum_1000_momentum_warmup_1-momentum_dampening_dampening_initial_step_bugfix_25_steps_all_run3.log

	import os
	import sys
	with open(sys.argv[0]) as f:
	code = f.read() # read the code of this file ASAP, for logging
	import uuid
	import time
	import glob
	import subprocess
	import contextlib
	from dataclasses import dataclass

autoregression / diffusion_mixer.py

Last active January 18, 2025 12:31

	"""DiM (Diffusion Mixer)."""

	import math
	import typing

	import einops
	import torch


	class DiMConfig(typing.NamedTuple):

joey00072 / mla.py

Created December 28, 2024 16:25

multi head latent attention (MLA)

	# https://x.com/shxf0072/status/1873038335427658011

	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from dataclasses import dataclass
	from collections import OrderedDict

	from ohara.modules.norm import RMSNorm

zjlww / model.py

Created December 7, 2024 01:39

Stripped AudioCodecModel from NeMo @ bde672e

	from typing import Tuple

	import torch
	from torch import nn, Tensor
	import torch.nn.functional as F
	from einops import rearrange
	from .modules import HiFiGANEncoder, HiFiGANDecoder, GroupFiniteScalarQuantizer


	class AudioCodecModel(nn.Module):

charlesfrye / wrapper.py

Last active July 28, 2025 03:54

Train GPT-2 in five minutes -- for free!

	# Train GPT-2 in five minutes -- for free
	#
	# ```bash
	# pip install modal
	# modal setup
	# modal run wrapper.py
	# ```
	#
	# Note that the end-to-end latency the first time is more like 25 minutes:
	# - five minutes to install Torch (rip)

crowsonkb / ring_attn.py

Created October 10, 2024 16:19

Ring attention for PyTorch.

	"""Ring attention for PyTorch.

	See https://github.com/nshepperd/flash_attn_jax/blob/main/src/flash_attn_jax/ring_attention.py.
	"""

	import flash_attn.flash_attn_interface as fai
	import torch
	import torch.distributed as dist