dtype | SOTA | 2.2.2+eager | 2.3.0+eager | 2.3.0+compile | trunk + compile |
---|---|---|---|---|---|
bfloat16 (M1) | 111 tokens/sec | 110 tokens/sec | 80 tokens/sec | ||
float32 (M1) | 687 tokens/sec | 165 tokens/sec | 176 tokens/sec | ||
float16 (M1) | 1106 tokens/sec | 50 tokens/sec | 187 tokens/sec | ||
float16 (LinX86) | 40 tokens/sec | 43 tokens/sec | 173 tokens/sec | ||
float32 (LinX86) | 38 tokens/sec | 40 tokens/sec | 179 tokens/sec | ||
bfloat16 (LinX86) | 73 tokens/sec | 78 tokens/sec | 180 tokens/sec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import MetalPerformanceShadersGraph | |
let graph = MPSGraph() | |
let x = graph.constant(1, shape: [32, 4096, 40], dataType: .float32) | |
let y = graph.constant(1, shape: [32, 40, 4096], dataType: .float32) | |
let z = graph.matrixMultiplication(primary: x, secondary: y, name: nil) | |
let device = MTLCreateSystemDefaultDevice()! | |
let buf = device.makeBuffer(length: 16384)! | |
let td = MPSGraphTensorData(buf, shape: [64, 64], dataType: .int32) | |
let cmdBuf = MPSCommandBuffer(from: device.makeCommandQueue()!) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Benchmark relative performance of torch.mm and torch.bmm with single batch | |
import torch | |
import time | |
def benchmark_fn(fn, args, warmup=5, cycles=300, use_kineto=False) -> float: | |
if use_kineto: | |
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CUDA]) as p: | |
fn(*args) | |
return sum([e.cuda_time for e in p.key_averages()]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import triton | |
import triton.language as tl | |
@triton.jit | |
def kernel(in_ptr0, out_ptr0, xnumel, XBLOCK: tl.constexpr): | |
xnumel = 10 | |
xoffset = tl.program_id(0) * XBLOCK | |
xindex = xoffset + tl.arange(0, XBLOCK)[:] | |
xmask = xindex < xnumel | |
x0 = xindex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn.functional as F | |
def to_float8(x, dtype=torch.float8_e4m3fn): | |
finfo = torch.finfo(dtype) | |
# Calculate the scale as dtype max divided by absmax | |
scale = finfo.max / x.abs().max().clamp(min=1e-12) | |
# scale and clamp the tensor to bring it to | |
# the representative range of float8 data type | |
# (as default cast is unsaturated) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// My attempt at FP8 matmul implementation | |
#include <iostream> | |
#include <vector> | |
#include <numeric> | |
#include <cublasLt.h> | |
#include <cuda_fp8.h> | |
#include <stdio.h> | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For some reason does not work when copied-an-pasted not as raw file, but otherwise shoudl hang | |
import re | |
pat=re.compile('\\.\\. (code-block|math)::.*$\\n*(?P<S2VCUH>(?P<first>(^(?P<indent>[ ]+).*$\\n))(?P<other>(^([ \\t]+.*|[ \\t]*)$\\n)*))(?:(^(?![ \\t]+.*$))|\\Z)', re.MULTILINE) | |
text="""##################################################################### | |
We get the following performance profiling table for the eager-mode model (omitting some columns): | |
.. code-block:: shell | |
------------------------- ------------ ------------ ------------ ------------ | |
Name CPU total % CPU total CPU time avg # of Calls |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Adapted from https://rosettacode.org/wiki/Square_root_by_hand | |
def next_digit(val, k): | |
for d in range(1, 11): | |
if val < d * (k + d): | |
return d - 1 | |
raise RuntimeError("Impossible") | |
def compute_sqrt(val=2, num_char=500): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import pandas as pd | |
from datetime import datetime, timedelta | |
from typing import Optional | |
cloudwatch = boto3.client("cloudwatch") | |
ec2 = boto3.resource("ec2") | |
def ec2_get_instances(filter_name, filter_value): | |
return ec2.instances.filter(Filters=[{'Name': filter_name, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3.10.8 (main, Nov 24 2022, 08:08:27) [Clang 14.0.6 ] on darwin | |
Type "help", "copyright", "credits" or "license" for more information. | |
>>> import torch | |
>>> import whisper | |
>>> torch.__version__ | |
'2.0.0a0+git01de5dd' | |
>>> model = whisper.load_model("base") | |
>>> audio = whisper.load_audio("c1.mp3") # downloaded from https://www.mobydickbigread.com/chapter-1-loomings/ | |
>>> audio = whisper.pad_or_trim(audio) | |
>>> model.transcribe(audio)["text"] |