Skip to content

Instantly share code, notes, and snippets.

View zoecarver's full-sized avatar

Zoe Carver zoecarver

View GitHub Profile
# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
"""
Sweep benchmark: mcast matmul, balanced matmul, balanced matmul+relu vs ttnn.
mcast: A+B mcast both on dm_read, grid="auto" (works at any size)
balanced: A on dm_read, B on dm_write (two-NoC), 1 block/core
relu: balanced matmul with fused relu on last K iteration
tt-lang compute (M=8, N=8, K=8, 21ms)
// Phase 1: Zero-fill 64 acc tiles (8x8 output block)
cb_reserve_back(cb2, 64);
for (m = 0; m < 8; m++) // M=8
for (n = 0; n < 8; n += 4) // N=8, subblock of 4
acquire; fill x4; commit; wait; pack x4; release;
@zoecarver
zoecarver / file.py
Last active April 7, 2026 01:54
rms norm + attn v2
# ===== RMSNorm per-row kernels =====
@ttl.compute()
def rmsnorm_compute_row(x_dfb, sc_dfb, ms_dfb, sq_dfb, red_dfb, acc_dfb,
bcast_dfb, rsq_dfb, out_dfb):
sc = sc_dfb.pop_back_val()
ms = ms_dfb.pop_back_val()
# Pass 1: sum of squares
x0 = x_dfb.pop_back_val()
@zoecarver
zoecarver / gist:559691c360554f38caa2db0d8827b22e
Created April 7, 2026 01:46
rms norm + attention after.py
@ttl.compute()
def compute(x_dfb, sc_dfb, ms_dfb, sq_dfb, red_dfb, acc_dfb, bcast_dfb, rsq_dfb, out_dfb):
core_x, _ = ttl.node(dims=2)
sc = sc_dfb.pop_back_val()
ms = ms_dfb.pop_back_val()
for local_t in range(tiles_per_core):
tile_idx = core_x * tiles_per_core + local_t
if tile_idx < seq_tiles:
# Pass 1: sum of squares across dim tiles
@zoecarver
zoecarver / gist:e6b9b7a002f3f9cece2a80c5e5557b8e
Last active April 7, 2026 01:46
rmsnorm + attention before
@ttl.operation(grid="auto")
def rmsnorm_kernel(x, scaler, mean_scale, out):
grid_cols, _ = ttl.grid_size(dims=2)
seq_tiles = x.shape[0] // TILE
tiles_per_core = -(-seq_tiles // grid_cols)
x_dfb = ttl.make_dataflow_buffer_like(x, shape=(1, 1), buffer_factor=2)
sc_dfb = ttl.make_dataflow_buffer_like(scaler, shape=(1, 1), buffer_factor=1)
ms_dfb = ttl.make_dataflow_buffer_like(mean_scale, shape=(1, 1), buffer_factor=1)
sq_dfb = ttl.make_dataflow_buffer_like(x, shape=(1, 1), buffer_factor=2)
Input:
# =============================================================================
# Tensor accessor page print
# =============================================================================
@ttl.kernel(grid=(1, 1))
def tensor_accessor_print(inp, out):
inp_dfb = ttl.make_dataflow_buffer_like(inp, shape=(1, 1), buffer_factor=2)
"""
Cell-list molecular dynamics on Tenstorrent hardware using TT-Lang.
Full Ewald electrostatics: erfc-damped real-space (cell-list, TT kernel) +
u-series reciprocal-space (separable Gaussian convolution, TT kernel + host).
LJ short-range forces. Periodic boundary conditions.
On-device Verlet integration (f32) with bf16 force kernels.
Validated: 10K atoms, 10K steps, 1.1ms/step (non-rebuild), 12 min total.
"""
# Saturn Privacy Policy
**Last Updated:** January 2025
## Overview
Saturn is a voice assistant app that connects you to Google's Gemini AI and the internet while prioritizing your privacy. This policy explains how we handle your data.
**Note:** Saturn connects to the internet and processes content through third-party AI services, which may not be suitable for children.
*** with-deleted.txt 2021-04-22 11:20:57.000000000 -0700
--- without-deleted.txt 2021-04-22 11:21:38.000000000 -0700
***************
*** 1,42 ****
! /Users/zoecarver/Developer/llvm-source/llvm-project/libcxx/test/std/ranges/range.access/range.prim/size.compile.pass.cpp:199:3: error: call to deleted function call operator in type 'const std::ranges::__size::__fn'
std::ranges::size(d);
^~~~~~~~~~~~~~~~~
- /Users/zoecarver/Developer/llvm-source/build/include/c++/v1/__ranges/size.h:105:10: note: candidate function [with auto:1 = InvalidMinusBeginEnd &] has been explicitly deleted
- size_t operator()(auto &&) const = delete;
- ^
@zoecarver
zoecarver / The SILOptimizer as Swift sees it.swift
Last active December 24, 2020 20:03
A dump of the entire SILOptimizer and all its dependencies.
This file has been truncated, but you can view the full file.
import std_config
var SWIFT_LLVM_ODR_SAFE: Int32 { get }
extension swift {
typealias SmallBitVector = llvm.SmallBitVector
typealias StringLiteral = llvm.StringLiteral
typealias StringRef = llvm.StringRef
typealias Twine = llvm.Twine
typealias APFloat = llvm.APFloat
typealias APInt = llvm.APInt