Motivated by https://x.com/gabrielpeyre/status/1837156819799577034
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import torch | |
import time | |
import random | |
import numpy as np | |
import multiprocessing | |
from multiprocessing import Process, Manager, Event | |
import plotly.express as px |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
class TwoDimRotary(torch.nn.Module): | |
def __init__(self, dim, base=100, h = 128, w = 128): | |
super().__init__() | |
self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / (dim))) | |
self.h = h | |
self.w = w | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://x.com/jxbz/status/1857145985480438073 | |
import torch | |
def polar_factor_newton_schulz(M, max_iter=50): | |
M_t = M / M.norm(p='fro') | |
for _ in range(max_iter): | |
M_t = 1.5 * M_t - 0.5 * M_t @ M_t.T @ M_t | |
return M_t |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# batch_eigendecomp.py | |
import torch | |
from torch.utils.cpp_extension import load_inline | |
import argparse | |
import os | |
import shutil | |
def clear_cuda_cache(): | |
cache_path = os.path.expanduser('~/.cache/torch_extensions') | |
if os.path.exists(cache_path): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Install required packages if not present | |
check_and_install_dependencies() { | |
local packages=("inotify-tools" "texlive" "texlive-latex-extra" "biber") | |
echo "Checking and installing dependencies..." | |
for package in "${packages[@]}"; do | |
if ! dpkg -l | grep -q "^ii $package "; then | |
sudo apt-get install -y "$package" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torchvision import datasets, transforms | |
import numpy as np | |
import math | |
def compute_activation_std(model, dataset, device='cpu', batch_size=32, num_workers=0, layer_names=None): | |
activations = {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import time | |
torch.backends.cuda.matmul.allow_tf32 = True | |
torch.backends.cudnn.allow_tf32 = True | |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False | |
@torch.no_grad() | |
def benchmark_gemm(m, k, n, dtype=torch.bfloat16, allow_bf16_reduce=True): | |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = allow_bf16_reduce |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import triton | |
import triton.language as tl | |
from triton.language.extra import libdevice | |
@triton.jit | |
def fractal_kernel( | |
zr_ptr, zi_ptr, cr_ptr, ci_ptr, output_ptr, | |
alpha_ptr, beta_ptr, poly0_ptr, poly1_ptr, poly2_ptr, poly3_ptr, p_ptr, R, max_iter, | |
H, W, |
NewerOlder