Skip to content

Instantly share code, notes, and snippets.

View pashu123's full-sized avatar
๐Ÿ˜‡
Working from home

Prashant Kumar pashu123

๐Ÿ˜‡
Working from home
View GitHub Profile
This file has been truncated, but you can view the full file.
// -----// IR Dump Before AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
module {
func.func @faulty(%arg0: !torch.vtensor<[32,4096],bf16>, %arg1: !torch.vtensor<[4096,4096],bf16>) -> !torch.vtensor<[32,4096],bf16> {
%int26 = torch.constant.int 26
%int15 = torch.constant.int 15
%0 = torch.prims.convert_element_type %arg0, %int26 : !torch.vtensor<[32,4096],bf16>, !torch.int -> !torch.vtensor<[32,4096],f8E4M3FNUZ>
%1 = torch.prims.convert_element_type %arg1, %int26 : !torch.vtensor<[4096,4096],bf16>, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%2 = torch.aten.mm %0, %1 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[32,4096],f8E4M3FNUZ>
%3 = torch.prims.convert_element_type %2, %int15 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[32,4096],bf16>
return %3 : !torch.vtensor<[32,4096],bf16>
This file has been truncated, but you can view the full file.
// -----// IR Dump Before AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
module {
func.func @faulty(%arg0: !torch.vtensor<[32,4096],bf16>, %arg1: !torch.vtensor<[4096,4096],bf16>) -> !torch.vtensor<[32,4096],bf16> {
%int26 = torch.constant.int 26
%int15 = torch.constant.int 15
%0 = torch.prims.convert_element_type %arg0, %int26 : !torch.vtensor<[32,4096],bf16>, !torch.int -> !torch.vtensor<[32,4096],f8E4M3FNUZ>
%1 = torch.prims.convert_element_type %arg1, %int26 : !torch.vtensor<[4096,4096],bf16>, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%2 = torch.aten.mm %0, %1 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[32,4096],f8E4M3FNUZ>
%3 = torch.prims.convert_element_type %2, %int15 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[32,4096],bf16>
return %3 : !torch.vtensor<[32,4096],bf16>
module @module {
util.global private @__auto.token_embd.weight = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xbf16>
util.global private @__auto.blk.0.attn_norm.weight = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xbf16>
util.global private @"__auto.blk.0.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_q.q_input:rscale"> : tensor<f32>
util.global private @"__auto.blk.0.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>
util.global private @"__auto.blk.0.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_k.q_input:rscale"> : tensor<f32>
util.global private @"__auto.blk.0.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>
util.global private @"__auto.blk.0.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_v.q_input:rscale"> : tensor<f32>
util.global private @"__au
import struct
def check_bf16_nan(filename):
"""
Checks if the given BF16 binary file contains any NaN values.
Assumes the file is in big-endian byte order.
Returns True if any NaN is found, False otherwise.
"""
has_nan = False
with open(filename, 'rb') as f:
iree-compile faulty.mlir --iree-hip-target=gfx942 --iree-hal-target-device=hip --iree-dispatch-creation-enable-aggressive-fusion=true --iree-global-opt-propagate-transposes=true --iree-opt-aggressively-propagate-transposes=true --iree-opt-data-tiling=false --iree-preprocessing-pass-pipeline='builtin.module(util.func(iree-preprocessing-generalize-linalg-matmul-experimental))' --iree-hal-indirect-command-buffers=true --iree-stream-resource-memory-model=discrete --iree-hal-memoization=true --iree-opt-strip-assertions -o=init.vmfb
iree-run-module --hip_use_streams=true --device=hip://0 --function=faulty --module=init.vmfb [email protected] [email protected]
module {
func.func @faulty(%arg0: !torch.vtensor<[32,4096],bf16>, %arg1: !torch.vtensor<[4096,4096],bf16>) -> !torch.vtensor<[32,4096],bf16> {
%int26 = torch.constant.int 26
%int15 = torch.constant.int 15
%0 = torch.prims.convert_element_type %arg0, %int26 : !torch.vtensor<[32,4096],bf16>, !torch.int -> !torch.vtensor<[32,4096],f8E4M3FNUZ>
%1 = torch.prims.convert_element_type %arg1, %int26 : !torch.vtensor<[4096,4096],bf16>, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%2 = torch.aten.mm %0, %1 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[32,4096],f8E4M3FNUZ>
%3 = torch.prims.convert_element_type %2, %int15 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[32,4096],bf16>
return %3 : !torch.vtensor<[32,4096],bf16>
}
This file has been truncated, but you can view the full file.
module @module {
util.global private @__auto.token_embd.weight = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xbf16>
util.global private @__auto.blk.0.attn_norm.weight = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xbf16>
util.global private @"__auto.blk.0.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_q.q_input:rscale"> : tensor<f32>
util.global private @"__auto.blk.0.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>
util.global private @"__auto.blk.0.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_k.q_input:rscale"> : tensor<f32>
util.global private @"__auto.blk.0.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>
util.global private @"__auto.blk.0.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_v.q_input:rscale"> : tensor<f32>
util.global private @"__au
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
module {
func.func @matmul(%arg0: tensor<1024x1024xbf16>, %arg1: tensor<1024x1024xbf16>) -> tensor<1024x1024xf32> {
%0 = tensor.empty() : tensor<1024x1024xf32>
%c0_i32 = arith.constant 0 : i32
%1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
%2 = linalg.matmul ins(%arg0, %arg1 : tensor<1024x1024xbf16>, tensor<1024x1024xbf16>) outs(%1 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
return %2 : tensor<1024x1024xf32>
}
}
// -----// IR Dump After TileAndDistributeToWorkgroupsUsingForallOpPass (iree-codegen-tile-and-distribute-to-workgroups-using-forall-op) //----- //
func.func @matvec_fp16() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {}>} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
%2 = hal.interface.binding.subspan layout(<bindings =
// -----// IR Dump After TileAndDistributeToWorkgroupsUsingForallOpPass (iree-codegen-tile-and-distribute-to-workgroups-using-forall-op) //----- //
func.func @matmul_256x256x256_f16_f32() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [256, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = true, no_reduce_shared_memory_bank_conflicts = false>}>} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(1) alignment