This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
_________________ TestAutoQuant.test_autoquant_compile_12_cuda _________________ | |
a = (<test_integration.TestAutoQuant testMethod=test_autoquant_compile_12_cuda>,) | |
kw = {} | |
@wraps(func) | |
def standalone_func(*a, **kw): | |
> return func(*(a + p.args), **p.kwargs, **kw) | |
/opt/conda/envs/venv/lib/python3.9/site-packages/parameterized/parameterized.py:620: |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/cdhernandez/.conda/envs/pytorch-3.12/lib/python3.12/contextlib.py:105: FutureWarning: `torch.backends.cuda.sdp_kernel()` is deprecated. In the future, this context manager will be removed. Please see `torch.nn.attention.sdpa_kernel()` for the new context manager, with updated signature. | |
self.gen = func(*args, **kwds) | |
V0401 02:34:28.775000 3240940 site-packages/torch/_inductor/codecache.py:1091] [2/0_1] [__output_code] Output code: | |
V0401 02:34:28.775000 3240940 site-packages/torch/_inductor/codecache.py:1091] [2/0_1] [__output_code] # AOT ID: ['0_inference'] | |
V0401 02:34:28.775000 3240940 site-packages/torch/_inductor/codecache.py:1091] [2/0_1] [__output_code] from ctypes import c_void_p, c_long, c_int | |
V0401 02:34:28.775000 3240940 site-packages/torch/_inductor/codecache.py:1091] [2/0_1] [__output_code] import torch | |
V0401 02:34:28.775000 3240940 site-packages/torch/_inductor/codecache.py:1091] [2/0_1] [__output_code] import math | |
V0401 02:34:28.775000 3240940 site-packages/torch/_inductor/codecache.py:1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
V0320 11:46:30.704000 18434 site-packages/torch/_dynamo/utils.py:1782] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6dda4945313dbc76cddf217f3df965aa"} | |
{ | |
"name": "dynamo", | |
"ts": 1742496390704270.5, | |
"args": { | |
"compile_id": "0/0" | |
}, | |
"ph": "B", | |
"cat": "dynamo_timed", | |
"tid": 0, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch import Tensor | |
from torch.nn import functional as F | |
from dataclasses import dataclass | |
torch.manual_seed(0) | |
# T tokens |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch import Tensor | |
from torch.nn import functional as F | |
from dataclasses import dataclass | |
torch.manual_seed(0) | |
# T tokens |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch import Tensor | |
from torch.nn import functional as F | |
from dataclasses import dataclass | |
torch.manual_seed(0) | |
# T tokens |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# BSR benchmarks | |
export CHECKPOINT_PATH=../../../checkpoints # path to checkpoints folder | |
export MODEL_REPO=meta-llama/Meta-Llama-3.1-8B | |
python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result bsr_bench_results.txt | |
python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization sparse-marlin --sparsity semi-structured --precision float16 --write_result bsr_bench_results.txt | |
python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --sparsity semi-structured --precision float16 --write_result bsr_bench_results.txt | |
python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --write_result bsr_bench_results.txt --sparsity bsr-0.8-32 | |
python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --write_result bsr_bench_results.txt --sparsity bsr-0.8-64 | |
python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/mod |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
W1008 09:22:11.858000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] ValueError: Incorrect number of arguments passed to kernel | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] Encountered an exception in identify_mutated_tensors, assuming every input is mutated | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] Traceback (most recent call last): | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] File "/home/cdhernandez/local/pytorch/torch/_higher_order_ops/triton_kernel_wrap.py", line 482, in identify_mutated_tensors | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] ttir_module, ordered_tensor_names = generate_ttir(kernel, kwargs) | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] File "/home/cdhernandez/local/pytorch/torch/_higher_order_ops/triton_kernel_wrap.py", line 139, in generate_ttir | |
W1008 09:22 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#OMP_NUM_THREADS=16 CUDA_VISIBLE_DEVICES=0 ipython3 benchmark_triton.py #select the right number of threads based on your machine | |
#You can change the matmul_dtype: GEMM, GEMV or AUTO | |
#Note: bfloat16 only supported in GEMM mode with float32 accumulation | |
################################################################################################################################# | |
import torch | |
import numpy as np | |
device = 'cuda:0' | |
compute_dtype = torch.float16 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export CHECKPOINT_PATH=../../../checkpoints # path to checkpoints folder | |
# README EVALUATIONS | |
export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth #12.212 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8dq --compile #12.262 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8wo #12.204 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization fp6 --compile --precision float16 #12.369 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64-hqq #12.825717540084083 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64 #12.87233037343588 |
NewerOlder