This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
W1008 09:22:11.858000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] ValueError: Incorrect number of arguments passed to kernel | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] Encountered an exception in identify_mutated_tensors, assuming every input is mutated | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] Traceback (most recent call last): | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] File "/home/cdhernandez/local/pytorch/torch/_higher_order_ops/triton_kernel_wrap.py", line 482, in identify_mutated_tensors | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] ttir_module, ordered_tensor_names = generate_ttir(kernel, kwargs) | |
W1008 09:22:11.870000 1289935 torch/_higher_order_ops/triton_kernel_wrap.py:503] [0/0] File "/home/cdhernandez/local/pytorch/torch/_higher_order_ops/triton_kernel_wrap.py", line 139, in generate_ttir | |
W1008 09:22 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#OMP_NUM_THREADS=16 CUDA_VISIBLE_DEVICES=0 ipython3 benchmark_triton.py #select the right number of threads based on your machine | |
#You can change the matmul_dtype: GEMM, GEMV or AUTO | |
#Note: bfloat16 only supported in GEMM mode with float32 accumulation | |
################################################################################################################################# | |
import torch | |
import numpy as np | |
device = 'cuda:0' | |
compute_dtype = torch.float16 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export CHECKPOINT_PATH=../../../checkpoints # path to checkpoints folder | |
# README EVALUATIONS | |
export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth #12.212 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8dq --compile #12.262 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8wo #12.204 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization fp6 --compile --precision float16 #12.369 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64-hqq #12.825717540084083 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64 #12.87233037343588 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch.utils._pytree import tree_flatten, tree_unflatten | |
import gc | |
class MultiTensor(torch.Tensor): | |
@staticmethod | |
def __new__(cls, input, **kwargs): | |
if isinstance(input, (list, tuple)): | |
input = input[0] | |
kwargs["dtype"]=kwargs.get("dtype", input.dtype) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from lm_eval.models.huggingface import HFLM | |
from lm_eval.evaluator import evaluate | |
from lm_eval.tasks import get_task_dict | |
path_to_hf_checkpoint = "/home/cdhernandez/local/gpt-fast/checkpoints/meta-llama/Meta-Llama-3-8B" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch.utils._pytree import tree_flatten, tree_unflatten | |
class MultiTensor(torch.Tensor): | |
@staticmethod | |
def __new__(cls, input, **kwargs): | |
if isinstance(input, (list, tuple)): | |
input = input[0] | |
kwargs["dtype"]=kwargs.get("dtype", input.dtype) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn.functional as F | |
import triton | |
import triton.language as tl | |
from triton import Config | |
from torch._inductor import config | |
from torch import _dynamo | |
aten = torch.ops.aten | |
def get_configs_io_bound(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn.functional as F | |
import triton | |
import triton.language as tl | |
from triton.ops.matmul import matmul as triton_matmul | |
from triton.ops.matmul import _kernel | |
from triton import Config | |
from torch._inductor import config | |
from torch import _dynamo | |
torch._inductor.config.coordinate_descent_tuning = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###################################################################### | |
# Comparing Torchao # | |
# and BitsandBytes # | |
###################################################################### | |
# Set up Your Environment | |
# -------------------------------- | |
# | |
# First, let's configure your environment. This guide requires you to use CUDA 12.1. | |
# We have run this tutorial on an A100-PG509-200 power limited to 330.00 W. If you | |
# are using a different hardware, you might see different performance numbers. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/cdhernandez/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/transformers/utils/generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
_torch_pytree._register_pytree_node( | |
/home/cdhernandez/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
_torch_pytree._register_pytree_node( | |
/home/cdhernandez/local/diffusers/src/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
torch.utils._pytree._register_pytree_node( | |
Namespace(no_bf16=False, no_sdpa=False, batch_size=1, num_inference_steps=30, enable_fused_projections=True, upcast_vae=False, compile_unet=True, compile_vae=True, compile_mode='max-autotune', change_comp_config=True, do_quan |
NewerOlder