๐
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump Before AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- // | |
module { | |
func.func @faulty(%arg0: !torch.vtensor<[32,4096],bf16>, %arg1: !torch.vtensor<[4096,4096],bf16>) -> !torch.vtensor<[32,4096],bf16> { | |
%int26 = torch.constant.int 26 | |
%int15 = torch.constant.int 15 | |
%0 = torch.prims.convert_element_type %arg0, %int26 : !torch.vtensor<[32,4096],bf16>, !torch.int -> !torch.vtensor<[32,4096],f8E4M3FNUZ> | |
%1 = torch.prims.convert_element_type %arg1, %int26 : !torch.vtensor<[4096,4096],bf16>, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%2 = torch.aten.mm %0, %1 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[32,4096],f8E4M3FNUZ> | |
%3 = torch.prims.convert_element_type %2, %int15 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[32,4096],bf16> | |
return %3 : !torch.vtensor<[32,4096],bf16> |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump Before AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- // | |
module { | |
func.func @faulty(%arg0: !torch.vtensor<[32,4096],bf16>, %arg1: !torch.vtensor<[4096,4096],bf16>) -> !torch.vtensor<[32,4096],bf16> { | |
%int26 = torch.constant.int 26 | |
%int15 = torch.constant.int 15 | |
%0 = torch.prims.convert_element_type %arg0, %int26 : !torch.vtensor<[32,4096],bf16>, !torch.int -> !torch.vtensor<[32,4096],f8E4M3FNUZ> | |
%1 = torch.prims.convert_element_type %arg1, %int26 : !torch.vtensor<[4096,4096],bf16>, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%2 = torch.aten.mm %0, %1 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[32,4096],f8E4M3FNUZ> | |
%3 = torch.prims.convert_element_type %2, %int15 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[32,4096],bf16> | |
return %3 : !torch.vtensor<[32,4096],bf16> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module @module { | |
util.global private @__auto.token_embd.weight = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xbf16> | |
util.global private @__auto.blk.0.attn_norm.weight = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.0.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__au |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
def check_bf16_nan(filename): | |
""" | |
Checks if the given BF16 binary file contains any NaN values. | |
Assumes the file is in big-endian byte order. | |
Returns True if any NaN is found, False otherwise. | |
""" | |
has_nan = False | |
with open(filename, 'rb') as f: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
iree-compile faulty.mlir --iree-hip-target=gfx942 --iree-hal-target-device=hip --iree-dispatch-creation-enable-aggressive-fusion=true --iree-global-opt-propagate-transposes=true --iree-opt-aggressively-propagate-transposes=true --iree-opt-data-tiling=false --iree-preprocessing-pass-pipeline='builtin.module(util.func(iree-preprocessing-generalize-linalg-matmul-experimental))' --iree-hal-indirect-command-buffers=true --iree-stream-resource-memory-model=discrete --iree-hal-memoization=true --iree-opt-strip-assertions -o=init.vmfb | |
iree-run-module --hip_use_streams=true --device=hip://0 --function=faulty --module=init.vmfb [email protected] [email protected] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module { | |
func.func @faulty(%arg0: !torch.vtensor<[32,4096],bf16>, %arg1: !torch.vtensor<[4096,4096],bf16>) -> !torch.vtensor<[32,4096],bf16> { | |
%int26 = torch.constant.int 26 | |
%int15 = torch.constant.int 15 | |
%0 = torch.prims.convert_element_type %arg0, %int26 : !torch.vtensor<[32,4096],bf16>, !torch.int -> !torch.vtensor<[32,4096],f8E4M3FNUZ> | |
%1 = torch.prims.convert_element_type %arg1, %int26 : !torch.vtensor<[4096,4096],bf16>, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%2 = torch.aten.mm %0, %1 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[32,4096],f8E4M3FNUZ> | |
%3 = torch.prims.convert_element_type %2, %int15 : !torch.vtensor<[32,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[32,4096],bf16> | |
return %3 : !torch.vtensor<[32,4096],bf16> | |
} |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module @module { | |
util.global private @__auto.token_embd.weight = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xbf16> | |
util.global private @__auto.blk.0.attn_norm.weight = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.0.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__au |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- // | |
module { | |
func.func @matmul(%arg0: tensor<1024x1024xbf16>, %arg1: tensor<1024x1024xbf16>) -> tensor<1024x1024xf32> { | |
%0 = tensor.empty() : tensor<1024x1024xf32> | |
%c0_i32 = arith.constant 0 : i32 | |
%1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32> | |
%2 = linalg.matmul ins(%arg0, %arg1 : tensor<1024x1024xbf16>, tensor<1024x1024xbf16>) outs(%1 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32> | |
return %2 : tensor<1024x1024xf32> | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After TileAndDistributeToWorkgroupsUsingForallOpPass (iree-codegen-tile-and-distribute-to-workgroups-using-forall-op) //----- // | |
func.func @matvec_fp16() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {}>} { | |
%c0 = arith.constant 0 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>> | |
%2 = hal.interface.binding.subspan layout(<bindings = |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After TileAndDistributeToWorkgroupsUsingForallOpPass (iree-codegen-tile-and-distribute-to-workgroups-using-forall-op) //----- // | |
func.func @matmul_256x256x256_f16_f32() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [256, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = true, no_reduce_shared_memory_bank_conflicts = false>}>} { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(1) alignment |