Skip to content

Instantly share code, notes, and snippets.

View pashu123's full-sized avatar
๐Ÿ˜‡
Working from home

Prashant Kumar pashu123

๐Ÿ˜‡
Working from home
View GitHub Profile
This file has been truncated, but you can view the full file.
#loc = loc(unknown)
module attributes {torch.debug_module_name = "Transformer"} {
func.func private @__torch__.torch.nn.modules.sparse.Embedding.forward(%arg0: !torch.nn.Module<"__torch__.torch.nn.modules.sparse.Embedding"> loc(unknown), %arg1: !torch.tensor loc(unknown)) -> !torch.tensor {
%185 = torch.tensor_static_info_cast %arg1 : !torch.tensor to !torch.tensor<[1,1],si64> loc(#loc)
%186 = torch.prim.GetAttr %arg0["weight"] : !torch.nn.Module<"__torch__.torch.nn.modules.sparse.Embedding"> -> !torch.tensor loc(#loc)
%int-1 = torch.constant.int -1 loc(#loc1)
%false = torch.constant.bool false loc(#loc1)
%false_0 = torch.constant.bool false loc(#loc1)
%187 = torch.aten.embedding %186, %185, %int-1, %false, %false_0 : !torch.tensor, !torch.tensor<[1,1],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.tensor<[1,1,512],f32> loc(#loc1)
%188 = torch.tensor_static_info_cast %187 : !torch.tensor<[1,1,512],f32> to !torch.tensor loc(#loc1)
/home/prashant/llama/llama/model.py:228: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
start_pos = int(start_pos_tensor)
/home/prashant/llama/llama/model.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if seqlen > 1:
/home/prashant/llama/llama/model.py:58: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
assert freqs_cis.shape == (x.shape[1], x.shape[-1])
Args: {'output_path': '.', 'quant_device': 'cuda', 'batch_size': 2, 'bit_width': 8, 'conv_group_size': 16, 'linear_group_size': 16, 'layer_type': 'conv', 'weight_quant': 'group_scale_param', 'input_quant': 'group_dynamic_sym', 'float16': True, 'inline_ts_graph': False, 'onnx_export': False, 'make_fx': True, 'quantize': True}
Moving model to cuda...
Run model quantization...
Quantize conv2d: conv
Tracing to FX functional representation...
FX graph model...
graph():
%arg0_1 : [#users=2] = placeholder[target=arg0_1]
%_tensor_constant0 : [#users=1] = get_attr[target=_tensor_constant0]
%lift_fresh_copy : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant0,), kwargs = {})
This file has been truncated, but you can view the full file.
module attributes {torch.debug_module_name = "_lambda"} {
func.func @forward(%arg0: !torch.vtensor<[2,4,96,96],f16>, %arg1: !torch.vtensor<[1],f32>, %arg2: !torch.vtensor<[4,77,1024],f16>, %arg3: !torch.vtensor<[],f32>) -> !torch.vtensor<[2,96,96],f16> {
%int1 = torch.constant.int 1
%int0 = torch.constant.int 0
%false = torch.constant.bool false
%int4 = torch.constant.int 4
%float0.000000e00 = torch.constant.float 0.000000e+00
%int5 = torch.constant.int 5
%none = torch.constant.none
%int320 = torch.constant.int 320
This file has been truncated, but you can view the full file.
#map = affine_map<(d0) -> (0)>
#map1 = affine_map<(d0) -> (d0)>
#map2 = affine_map<(d0) -> ()>
#map3 = affine_map<(d0, d1) -> (d0, 0)>
#map4 = affine_map<(d0, d1) -> (0, d1)>
#map5 = affine_map<(d0, d1) -> (d0, d1)>
#map6 = affine_map<(d0, d1) -> (d1, d0)>
#map7 = affine_map<(d0, d1) -> (d1)>
#map8 = affine_map<(d0, d1, d2, d3) -> (d1)>
#map9 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
Package Version Editable project location
---------------------------- ------------------------ ------------------------------
absl-py 1.2.0
aiofiles 22.1.0
aiohttp 3.8.3
aiosignal 1.2.0
albumentations 0.4.3
altair 4.2.0
antlr4-python3-runtime 4.8
anyio 3.6.2
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from torch.fx.experimental.proxy_tensor import make_fx
from torch._decomp import get_decompositions
import tempfile
import torch_mlir
def prepare_sentence_tokens(hf_model: str):
tokenizer = AutoTokenizer.from_pretrained(hf_model)
; SPIR-V
; Version: 1.3
; Generator: Khronos; 22
; Bound: 356
; Schema: 0
OpCapability Int64
OpCapability Shader
OpCapability GroupNonUniformShuffle
OpExtension "SPV_KHR_storage_buffer_storage_class"
%105 = OpExtInstImport "GLSL.std.450"
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size
; SPIR-V
; Version: 1.3
; Generator: Khronos; 22
; Bound: 138
; Schema: 0
OpCapability Shader
OpCapability Int64
OpCapability GroupNonUniformShuffle
OpExtension "SPV_KHR_storage_buffer_storage_class"
%104 = OpExtInstImport "GLSL.std.450"