Created
November 5, 2022 01:02
-
-
Save Birch-san/cba16789ec27bb20996a4b4831b13ce0 to your computer and use it in GitHub Desktop.
benchmark: batched matmul
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch import einsum, matmul, bmm | |
import time | |
repeats = 10 | |
batch_duration = 0 | |
for ix in range(repeats): | |
attn = torch.rand(16, 4096, 4096, dtype=torch.float, device="mps") | |
v = torch.rand(16, 4096, 40, dtype=torch.float, device="mps") | |
start = time.perf_counter() | |
einsum('b i j, b j d -> b i d', attn, v).max().item() | |
duration = time.perf_counter()-start | |
print('einsum 1 iteration %d took %.4f seconds' % (ix, duration)) | |
batch_duration += duration | |
print('%d iterations of einsum 1 took %.4f seconds; avg %.4f secs' % (repeats, batch_duration, batch_duration/repeats)) | |
batch_duration = 0 | |
for ix in range(repeats): | |
attn = torch.rand(16, 4096, 4096, dtype=torch.float, device="mps") | |
v = torch.rand(16, 4096, 40, dtype=torch.float, device="mps") | |
start = time.perf_counter() | |
matmul(attn, v).max().item() | |
duration = time.perf_counter()-start | |
print('matmul iteration %d took %.4f seconds' % (ix, duration)) | |
batch_duration += duration | |
print('%d iterations of matmul took %.4f seconds; avg %.4f secs' % (repeats, batch_duration, batch_duration/repeats)) | |
batch_duration = 0 | |
for ix in range(repeats): | |
attn = torch.rand(16, 4096, 4096, dtype=torch.float, device="mps") | |
v = torch.rand(16, 4096, 40, dtype=torch.float, device="mps") | |
start = time.perf_counter() | |
bmm(attn, v).max().item() | |
duration = time.perf_counter()-start | |
print('bmm iteration %d took %.4f seconds' % (ix, duration)) | |
batch_duration += duration | |
print('%d iterations of bmm took %.4f seconds; avg %.4f secs' % (repeats, batch_duration, batch_duration/repeats)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
8 Heun steps, building on top of the baddbmm optimization from:
https://gist.github.com/Birch-san/8f3eb99deffdc3541595e46a01605dea