cloneofsimo’s gists

cloneofsimo / humanize.py

Created June 23, 2025 16:39

humanize

	# credit: https://github.com/Nordth/humanize-ai-lib/blob/main/src/humanize-string.ts
	import re

	_HIDDEN_CHARS = re.compile(
	r"[\u00AD\u180E\u200B-\u200F\u202A-\u202E\u2060\u2066-\u2069\uFEFF]"
	)
	_TRAILING_WS = re.compile(r"[ \t\x0B\f]+$", re.MULTILINE)
	_NBSP = re.compile(r"\u00A0")
	_DASHES = re.compile(r"[—–]+") # em- & en-dashes → ASCII hyphen
	_DQUOTES = re.compile(r"[“”«»„]") # curly / guillemets → "

cloneofsimo / prompt.md

Last active June 21, 2025 21:13

Neel's Paper Quality Analysis Prompt

Credit: How to write ML papers by Neel Nanda

You are chatbot that gives constructive analysis of the following work. Specifically, you care about the following criteria:

## Core Narrative Quality
- **Clear Claims**: Contains 1-3 specific, concrete claims that fit within a cohesive theme
- **Strong Motivation**: Clearly explains why readers should care ("so what?")

cloneofsimo / test.py

Created December 29, 2024 15:44

Are MFU correlated with watt usage in practice?

	#!/usr/bin/env python

	import torch
	import time
	import random
	import numpy as np
	import multiprocessing
	from multiprocessing import Process, Manager, Event

	import plotly.express as px

cloneofsimo / vis.py

Created December 5, 2024 00:57

twodimropevis

	import torch


	class TwoDimRotary(torch.nn.Module):
	def __init__(self, dim, base=100, h = 128, w = 128):
	super().__init__()
	self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / (dim)))
	self.h = h
	self.w = w

cloneofsimo / runner.py

Last active November 14, 2024 20:47

Orthogonal weight update

	https://x.com/jxbz/status/1857145985480438073

	import torch

	def polar_factor_newton_schulz(M, max_iter=50):

	M_t = M / M.norm(p='fro')
	for _ in range(max_iter):
	M_t = 1.5 * M_t - 0.5 * M_t @ M_t.T @ M_t
	return M_t

cloneofsimo / syevjBatched.py

Created October 31, 2024 09:43

extended_syevjBatched torch

	# batch_eigendecomp.py
	import torch
	from torch.utils.cpp_extension import load_inline
	import argparse
	import os
	import shutil

	def clear_cuda_cache():
	cache_path = os.path.expanduser('~/.cache/torch_extensions')
	if os.path.exists(cache_path):

cloneofsimo / watch_setup.sh

Created October 25, 2024 22:18

latex-watcher

	#!/bin/bash

	# Install required packages if not present
	check_and_install_dependencies() {
	local packages=("inotify-tools" "texlive" "texlive-latex-extra" "biber")

	echo "Checking and installing dependencies..."
	for package in "${packages[@]}"; do
	if ! dpkg -l \| grep -q "^ii $package "; then
	sudo apt-get install -y "$package"

cloneofsimo / unit_activation_reinitializer.py

Created October 15, 2024 10:41

Unit-Scale Activation Initialization by Brute Force search

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torchvision import datasets, transforms
	import numpy as np
	import math


	def compute_activation_std(model, dataset, device='cpu', batch_size=32, num_workers=0, layer_names=None):
	activations = {}

cloneofsimo / bf16_matmul_reduction.py

Created October 6, 2024 10:59

wtf man

	import torch
	import time

	torch.backends.cuda.matmul.allow_tf32 = True
	torch.backends.cudnn.allow_tf32 = True
	torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False

	@torch.no_grad()
	def benchmark_gemm(m, k, n, dtype=torch.bfloat16, allow_bf16_reduce=True):
	torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = allow_bf16_reduce

cloneofsimo / infinite_parameterized_fractal.py

Created October 3, 2024 18:03

Parameterized Fractal Triton

	import torch
	import triton
	import triton.language as tl
	from triton.language.extra import libdevice

	@triton.jit
	def fractal_kernel(
	zr_ptr, zi_ptr, cr_ptr, ci_ptr, output_ptr,
	alpha_ptr, beta_ptr, poly0_ptr, poly1_ptr, poly2_ptr, poly3_ptr, p_ptr, R, max_iter,
	H, W,

Simo Ryu cloneofsimo