Skip to content

Instantly share code, notes, and snippets.

View pashu123's full-sized avatar
๐Ÿ˜‡
Working from home

Prashant Kumar pashu123

๐Ÿ˜‡
Working from home
View GitHub Profile
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from torch.fx.experimental.proxy_tensor import make_fx
from torch._decomp import get_decompositions
import tempfile
import torch_mlir
def prepare_sentence_tokens(hf_model: str):
tokenizer = AutoTokenizer.from_pretrained(hf_model)
Package Version Editable project location
---------------------------- ------------------------ ------------------------------
absl-py 1.2.0
aiofiles 22.1.0
aiohttp 3.8.3
aiosignal 1.2.0
albumentations 0.4.3
altair 4.2.0
antlr4-python3-runtime 4.8
anyio 3.6.2
This file has been truncated, but you can view the full file.
#map = affine_map<(d0) -> (0)>
#map1 = affine_map<(d0) -> (d0)>
#map2 = affine_map<(d0) -> ()>
#map3 = affine_map<(d0, d1) -> (d0, 0)>
#map4 = affine_map<(d0, d1) -> (0, d1)>
#map5 = affine_map<(d0, d1) -> (d0, d1)>
#map6 = affine_map<(d0, d1) -> (d1, d0)>
#map7 = affine_map<(d0, d1) -> (d1)>
#map8 = affine_map<(d0, d1, d2, d3) -> (d1)>
#map9 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
This file has been truncated, but you can view the full file.
module attributes {torch.debug_module_name = "_lambda"} {
func.func @forward(%arg0: !torch.vtensor<[2,4,96,96],f16>, %arg1: !torch.vtensor<[1],f32>, %arg2: !torch.vtensor<[4,77,1024],f16>, %arg3: !torch.vtensor<[],f32>) -> !torch.vtensor<[2,96,96],f16> {
%int1 = torch.constant.int 1
%int0 = torch.constant.int 0
%false = torch.constant.bool false
%int4 = torch.constant.int 4
%float0.000000e00 = torch.constant.float 0.000000e+00
%int5 = torch.constant.int 5
%none = torch.constant.none
%int320 = torch.constant.int 320
Args: {'output_path': '.', 'quant_device': 'cuda', 'batch_size': 2, 'bit_width': 8, 'conv_group_size': 16, 'linear_group_size': 16, 'layer_type': 'conv', 'weight_quant': 'group_scale_param', 'input_quant': 'group_dynamic_sym', 'float16': True, 'inline_ts_graph': False, 'onnx_export': False, 'make_fx': True, 'quantize': True}
Moving model to cuda...
Run model quantization...
Quantize conv2d: conv
Tracing to FX functional representation...
FX graph model...
graph():
%arg0_1 : [#users=2] = placeholder[target=arg0_1]
%_tensor_constant0 : [#users=1] = get_attr[target=_tensor_constant0]
%lift_fresh_copy : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant0,), kwargs = {})
/home/prashant/llama/llama/model.py:228: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
start_pos = int(start_pos_tensor)
/home/prashant/llama/llama/model.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if seqlen > 1:
/home/prashant/llama/llama/model.py:58: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
assert freqs_cis.shape == (x.shape[1], x.shape[-1])
This file has been truncated, but you can view the full file.
#loc = loc(unknown)
module attributes {torch.debug_module_name = "Transformer"} {
func.func private @__torch__.torch.nn.modules.sparse.Embedding.forward(%arg0: !torch.nn.Module<"__torch__.torch.nn.modules.sparse.Embedding"> loc(unknown), %arg1: !torch.tensor loc(unknown)) -> !torch.tensor {
%185 = torch.tensor_static_info_cast %arg1 : !torch.tensor to !torch.tensor<[1,1],si64> loc(#loc)
%186 = torch.prim.GetAttr %arg0["weight"] : !torch.nn.Module<"__torch__.torch.nn.modules.sparse.Embedding"> -> !torch.tensor loc(#loc)
%int-1 = torch.constant.int -1 loc(#loc1)
%false = torch.constant.bool false loc(#loc1)
%false_0 = torch.constant.bool false loc(#loc1)
%187 = torch.aten.embedding %186, %185, %int-1, %false, %false_0 : !torch.tensor, !torch.tensor<[1,1],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.tensor<[1,1,512],f32> loc(#loc1)
%188 = torch.tensor_static_info_cast %187 : !torch.tensor<[1,1,512],f32> to !torch.tensor loc(#loc1)
func.func @forward(%arg0: tensor<512xf32>, %arg1: tensor<512xf32>, %arg2: tensor<512xf32>, %arg3: tensor<512xf32>, %arg4: tensor<512xf32>, %arg5: tensor<512xf32>, %arg6: tensor<512xf32>, %arg7: tensor<512xf32>, %arg8: tensor<512xf32>, %arg9: tensor<512xf32>, %arg10: tensor<512xf32>, %arg11: tensor<512xf32>, %arg12: tensor<512xf32>, %arg13: tensor<512xf32>, %arg14: tensor<512xf32>, %arg15: tensor<512xf32>, %arg16: tensor<512xf32>, %arg17: tensor<1x512xf32>, %arg18: tensor<512x512xf32>, %arg19: tensor<512x512xf32>, %arg20: tensor<512x512xf32>, %arg21: tensor<512x512xf32>, %arg22: tensor<1536x512xf32>, %arg23: tensor<1536x512xf32>, %arg24: tensor<512x1536xf32>, %arg25: tensor<512x512xf32>, %arg26: tensor<512x512xf32>, %arg27: tensor<512x512xf32>, %arg28: tensor<512x512xf32>, %arg29: tensor<1536x512xf32>, %arg30: tensor<1536x512xf32>, %arg31: tensor<512x1536xf32>, %arg32: tensor<512x512xf32>, %arg33: tensor<512x512xf32>, %arg34: tensor<512x512xf32>, %arg35: tensor<512x512xf32>, %arg36: tensor<1536x512xf32>, %arg3
from iree import runtime as ireert
from iree.compiler import compile_str
import numpy as np
LINALG_IR = '''
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module {
func.func @forward(%arg0: tensor<1x6x32x64x2xf32>) -> tensor<1x6x32x64xcomplex<f32>> {
%c1 = arith.constant 1 : index
This file has been truncated, but you can view the full file.
#map = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map2 = affine_map<(d0) -> (0)>
#map3 = affine_map<(d0) -> (d0)>
#map4 = affine_map<(d0) -> ()>
#map5 = affine_map<(d0, d1) -> (d0, 0)>
#map6 = affine_map<(d0, d1) -> (0, d1)>
#map7 = affine_map<(d0, d1) -> (d0, d1)>
#map8 = affine_map<(d0, d1) -> (d1, d0)>
#map9 = affine_map<(d0, d1) -> (d1)>