This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Original kernel is from https://github.com/triton-lang/triton/issues/4906. | |
This kernel is modified to use dot_scaled and fp4. It _should_ be faster than int4 because it skips the int->float conversion, but it's not. | |
""" | |
# pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly; | |
# OMP_NUM_THREADS=16 CUDA_VISIBLE_DEVICES=0 ipython3 A100_vs_4090_test.py | |
########################################################################## | |
import torch |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Results: | |
# | |
# Vertical indices ms: 2.8862898349761963 | |
# Horizontal indices ms: 0.3734990060329437 | |
import torch | |
import triton | |
import triton.language as tl | |
BLOCK_SIZE = 64 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- mobicham.py 2024-11-25 14:02:15.355460967 -0800 | |
+++ mobicham_fp4.py 2024-11-25 14:44:09.015276420 -0800 | |
@@ -42,6 +42,7 @@ | |
a_ptr, b_ptr, c_ptr, | |
M, N, K, | |
elements_per_sample: tl.constexpr, | |
+ b_type: tl.constexpr, | |
stride_am, stride_ak, | |
stride_bk, stride_bn, | |
stride_cm, stride_cn, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly; | |
# OMP_NUM_THREADS=16 CUDA_VISIBLE_DEVICES=0 ipython3 A100_vs_4090_test.py | |
########################################################################## | |
import torch | |
import triton | |
import triton.language as tl | |
from triton.testing import do_bench | |
import itertools |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AOT ID: ['0_inference'] | |
from ctypes import c_void_p, c_long, c_int | |
import torch | |
import math | |
import random | |
import os | |
import tempfile | |
from math import inf, nan | |
from torch._inductor.hooks import run_intermediate_hooks | |
from torch._inductor.utils import maybe_profile |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AOT ID: ['0_inference'] | |
from ctypes import c_void_p, c_long, c_int | |
import torch | |
import math | |
import random | |
import os | |
import tempfile | |
from math import inf, nan | |
from torch._inductor.hooks import run_intermediate_hooks | |
from torch._inductor.utils import maybe_profile |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AOT ID: ['0_inference'] | |
from ctypes import c_void_p, c_long, c_int | |
import torch | |
import math | |
import random | |
import os | |
import tempfile | |
from math import inf, nan | |
from torch._inductor.hooks import run_intermediate_hooks | |
from torch._inductor.utils import maybe_profile |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch._functorch.config | |
def fn(values, offsets, w): | |
for _ in range(10): | |
nt = torch.nested.nested_tensor_from_jagged(values, offsets, min_seqlen=1, max_seqlen=4).view(-1, -1, 4, 16).transpose(1, 2) | |
nt = torch.nn.functional.scaled_dot_product_attention(nt, nt, nt) | |
values = nt.transpose(1, 2).view(-1, -1, 64).values().cos() | |
values = values @ w | |
return values |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/dberard/local/pytorch/torch/backends/cudnn/__init__.py:106: UserWarning: PyTorch was compiled without cuDNN/MIOpen support. To use cuDNN/MIOpen, rebuild PyTorch making sure the library is visible to the build system. | |
warnings.warn( | |
/home/dberard/local/pytorch/torch/backends/cudnn/__init__.py:106: UserWarning: PyTorch was compiled without cuDNN/MIOpen support. To use cuDNN/MIOpen, rebuild PyTorch making sure the library is visible to the build system. | |
warnings.warn( | |
/home/dberard/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/z3/z3core.py:5: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html | |
import pkg_resources | |
/home/dberard/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pkg_resources/__init__.py:2871: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('ruamel')`. | |
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See ht |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
E | |
====================================================================== | |
ERROR: test_torch_function_call_to_size_within_aot_autograd_graph (__main__.TestNestedTensor.test_torch_function_call_to_size_within_aot_autograd_graph) | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/data/users/dberard/pytorch/torch/testing/_internal/common_utils.py", line 2739, in wrapper | |
method(*args, **kwargs) | |
File "/data/users/dberard/pytorch/test/dynamo/test_subclasses.py", line 1403, in test_torch_function_call_to_size_within_aot_autograd_graph | |
compiled_fn(x, y) | |
File "/data/users/dberard/pytorch/torch/_dynamo/eval_frame.py", line 451, in _fn |