Created
February 10, 2023 15:33
-
-
Save pashu123/2d3236c4ec681e5e56a393e62636a34c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/prashant/stable.mlir:793:11: error: failed to materialize conversion for result #0 of operation 'hal.interface.constant.load' that remained live after conversion | |
%10 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel"]} ins(%9 : tensor<2x160xf32>) outs(%8 : tensor<2x160xf32>) { | |
^ | |
/home/prashant/stable.mlir:24:3: note: called from | |
func.func @forward(%arg0: tensor<1x4x96x96xf32>, %arg1: tensor<1xf32>, %arg2: tensor<2x64x1024xf32>, %arg3: tensor<f32>) -> tensor<1x4x96x96xf32> { | |
^ | |
/home/prashant/stable.mlir:793:11: note: see current operation: %20 = "hal.interface.constant.load"() {index = 0 : index} : () -> i32 | |
%10 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel"]} ins(%9 : tensor<2x160xf32>) outs(%8 : tensor<2x160xf32>) { | |
^ | |
/home/prashant/stable.mlir:793:11: note: see existing live user here: %28 = "spirv.UConvert"(%20) : (i32) -> i64 | |
/home/prashant/stable.mlir:804:27: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}> | |
%inserted_slice_724 = tensor.insert_slice %10 into %12[0, 0] [2, 160] [1, 1] : tensor<2x160xf32> into tensor<2x320xf32> | |
^ | |
/home/prashant/stable.mlir:24:3: note: called from | |
func.func @forward(%arg0: tensor<1x4x96x96xf32>, %arg1: tensor<1xf32>, %arg2: tensor<2x64x1024xf32>, %arg3: tensor<f32>) -> tensor<1x4x96x96xf32> { | |
^ | |
/home/prashant/stable.mlir:804:27: note: see current operation: | |
"hal.executable.variant"() ({ | |
"hal.executable.export"() ({ | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
%0 = "arith.constant"() {value = 5 : index} : () -> index | |
%1 = "arith.constant"() {value = 2 : index} : () -> index | |
%2 = "arith.constant"() {value = 1 : index} : () -> index | |
"hal.return"(%0, %1, %2) : (index, index, index) -> () | |
}) {layout = #hal.pipeline.layout<push_constants = 2, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "forward_dispatch_3", translation_info = #iree_codegen.translation_info<SPIRVBaseDistribute>, workgroup_size = [32 : index, 1 : index, 1 : index]} : () -> () | |
"builtin.module"() ({ | |
"spirv.GlobalVariable"() {binding = 0 : i32, descriptor_set = 0 : i32, sym_name = "__resource_var_0_0_", type = !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>} : () -> () | |
"spirv.GlobalVariable"() {binding = 1 : i32, descriptor_set = 0 : i32, sym_name = "__resource_var_0_1_", type = !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>} : () -> () | |
"func.func"() ({ | |
%0 = "arith.constant"() {value = -1 : index} : () -> index | |
%1 = "arith.constant"() {value = 4 : index} : () -> index | |
%2 = "arith.constant"() {value = 32 : index} : () -> index | |
%3 = "arith.constant"() {value = 160 : index} : () -> index | |
%4 = "arith.constant"() {value = 0 : index} : () -> index | |
%5 = "arith.constant"() {value = 640 : index} : () -> index | |
%6 = "arith.constant"() {value = 320 : index} : () -> index | |
%7 = "hal.interface.constant.load"() {index = 0 : index} : () -> i32 | |
%8 = "hal.interface.constant.load"() {index = 1 : index} : () -> i32 | |
%9 = "arith.index_castui"(%7) : (i32) -> index | |
%10 = "arith.index_castui"(%8) : (i32) -> index | |
%11 = "hal.interface.binding.subspan"(%9, %6) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operand_segment_sizes = array<i32: 1, 1>, set = 0 : index} : (index, index) -> memref<?xf32, #spirv.storage_class<StorageBuffer>> | |
%12 = "hal.interface.binding.subspan"(%4, %6) {alignment = 64 : index, binding = 0 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operand_segment_sizes = array<i32: 1, 1>, set = 0 : index} : (index, index) -> memref<?xf32, #spirv.storage_class<StorageBuffer>> | |
%13 = "hal.interface.binding.subspan"(%10, %5) {alignment = 64 : index, binding = 1 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operand_segment_sizes = array<i32: 1, 1>, set = 0 : index} : (index, index) -> memref<?xf32, #spirv.storage_class<StorageBuffer>> | |
%14 = "hal.interface.binding.subspan"(%4, %5) {alignment = 64 : index, binding = 1 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operand_segment_sizes = array<i32: 1, 1>, set = 0 : index} : (index, index) -> memref<?xf32, #spirv.storage_class<StorageBuffer>> | |
%15 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%16 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%17 = "gpu.thread_id"() {dimension = #gpu<dim x>} : () -> index | |
%18 = "gpu.thread_id"() {dimension = #gpu<dim y>} : () -> index | |
%19 = "arith.muli"(%16, %3) : (index, index) -> index | |
%20 = "arith.muli"(%18, %3) : (index, index) -> index | |
%21 = "arith.addi"(%19, %20) : (index, index) -> index | |
%22 = "arith.muli"(%15, %2) : (index, index) -> index | |
%23 = "arith.addi"(%21, %22) : (index, index) -> index | |
%24 = "arith.addi"(%23, %17) : (index, index) -> index | |
%25 = "arith.cmpi"(%9, %4) {predicate = 2 : i64} : (index, index) -> i1 | |
%26 = "arith.subi"(%0, %9) : (index, index) -> index | |
%27 = "arith.select"(%25, %26, %9) : (i1, index, index) -> index | |
%28 = "arith.divsi"(%27, %1) : (index, index) -> index | |
%29 = "arith.subi"(%0, %28) : (index, index) -> index | |
%30 = "arith.select"(%25, %29, %28) : (i1, index, index) -> index | |
%31 = "arith.addi"(%24, %30) : (index, index) -> index | |
%32 = "memref.load"(%12, %31) : (memref<?xf32, #spirv.storage_class<StorageBuffer>>, index) -> f32 | |
%33 = "arith.muli"(%16, %6) : (index, index) -> index | |
%34 = "arith.muli"(%18, %6) : (index, index) -> index | |
%35 = "arith.addi"(%33, %34) : (index, index) -> index | |
%36 = "arith.addi"(%35, %22) : (index, index) -> index | |
%37 = "arith.addi"(%36, %17) : (index, index) -> index | |
%38 = "arith.cmpi"(%10, %4) {predicate = 2 : i64} : (index, index) -> i1 | |
%39 = "arith.subi"(%0, %10) : (index, index) -> index | |
%40 = "arith.select"(%38, %39, %10) : (i1, index, index) -> index | |
%41 = "arith.divsi"(%40, %1) : (index, index) -> index | |
%42 = "arith.subi"(%0, %41) : (index, index) -> index | |
%43 = "arith.select"(%38, %42, %41) : (i1, index, index) -> index | |
%44 = "arith.addi"(%37, %43) : (index, index) -> index | |
"memref.store"(%32, %14, %44) : (f32, memref<?xf32, #spirv.storage_class<StorageBuffer>>, index) -> () | |
"func.return"() : () -> () | |
}) {function_type = () -> (), spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [32, 1, 1]>, sym_name = "forward_dispatch_3"} : () -> () | |
}) {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} : () -> () | |
"hal.executable.variant_end"() : () -> () | |
}) {sym_name = "vulkan_spirv_fb", target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}>} : () -> () | |
%inserted_slice_724 = tensor.insert_slice %10 into %12[0, 0] [2, 160] [1, 1] : tensor<2x160xf32> into tensor<2x320xf32> | |
^ | |
/home/prashant/stable.mlir:804:27: error: failed to serialize executables | |
%inserted_slice_724 = tensor.insert_slice %10 into %12[0, 0] [2, 160] [1, 1] : tensor<2x160xf32> into tensor<2x320xf32> | |
^ | |
/home/prashant/stable.mlir:24:3: note: called from | |
func.func @forward(%arg0: tensor<1x4x96x96xf32>, %arg1: tensor<1xf32>, %arg2: tensor<2x64x1024xf32>, %arg3: tensor<f32>) -> tensor<1x4x96x96xf32> { | |
^ | |
/home/prashant/stable.mlir:804:27: note: see current operation: | |
"hal.executable"() ({ | |
"hal.executable.variant"() ({ | |
"hal.executable.export"() ({ | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
%0 = "arith.constant"() {value = 5 : index} : () -> index | |
%1 = "arith.constant"() {value = 2 : index} : () -> index | |
%2 = "arith.constant"() {value = 1 : index} : () -> index | |
"hal.return"(%0, %1, %2) : (index, index, index) -> () | |
}) {layout = #hal.pipeline.layout<push_constants = 2, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "forward_dispatch_3", translation_info = #iree_codegen.translation_info<SPIRVBaseDistribute>, workgroup_size = [32 : index, 1 : index, 1 : index]} : () -> () | |
"builtin.module"() ({ | |
"spirv.GlobalVariable"() {binding = 0 : i32, descriptor_set = 0 : i32, sym_name = "__resource_var_0_0_", type = !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>} : () -> () | |
"spirv.GlobalVariable"() {binding = 1 : i32, descriptor_set = 0 : i32, sym_name = "__resource_var_0_1_", type = !spirv.ptr<!spirv.struct<(!spirv.rtarray<f32, stride=4> [0])>, StorageBuffer>} : () -> () | |
"func.func"() ({ | |
%0 = "arith.constant"() {value = -1 : index} : () -> index | |
%1 = "arith.constant"() {value = 4 : index} : () -> index | |
%2 = "arith.constant"() {value = 32 : index} : () -> index | |
%3 = "arith.constant"() {value = 160 : index} : () -> index | |
%4 = "arith.constant"() {value = 0 : index} : () -> index | |
%5 = "arith.constant"() {value = 640 : index} : () -> index | |
%6 = "arith.constant"() {value = 320 : index} : () -> index | |
%7 = "hal.interface.constant.load"() {index = 0 : index} : () -> i32 | |
%8 = "hal.interface.constant.load"() {index = 1 : index} : () -> i32 | |
%9 = "arith.index_castui"(%7) : (i32) -> index | |
%10 = "arith.index_castui"(%8) : (i32) -> index | |
%11 = "hal.interface.binding.subspan"(%9, %6) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operand_segment_sizes = array<i32: 1, 1>, set = 0 : index} : (index, index) -> memref<?xf32, #spirv.storage_class<StorageBuffer>> | |
%12 = "hal.interface.binding.subspan"(%4, %6) {alignment = 64 : index, binding = 0 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operand_segment_sizes = array<i32: 1, 1>, set = 0 : index} : (index, index) -> memref<?xf32, #spirv.storage_class<StorageBuffer>> | |
%13 = "hal.interface.binding.subspan"(%10, %5) {alignment = 64 : index, binding = 1 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operand_segment_sizes = array<i32: 1, 1>, set = 0 : index} : (index, index) -> memref<?xf32, #spirv.storage_class<StorageBuffer>> | |
%14 = "hal.interface.binding.subspan"(%4, %5) {alignment = 64 : index, binding = 1 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operand_segment_sizes = array<i32: 1, 1>, set = 0 : index} : (index, index) -> memref<?xf32, #spirv.storage_class<StorageBuffer>> | |
%15 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%16 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%17 = "gpu.thread_id"() {dimension = #gpu<dim x>} : () -> index | |
%18 = "gpu.thread_id"() {dimension = #gpu<dim y>} : () -> index | |
%19 = "arith.muli"(%16, %3) : (index, index) -> index | |
%20 = "arith.muli"(%18, %3) : (index, index) -> index | |
%21 = "arith.addi"(%19, %20) : (index, index) -> index | |
%22 = "arith.muli"(%15, %2) : (index, index) -> index | |
%23 = "arith.addi"(%21, %22) : (index, index) -> index | |
%24 = "arith.addi"(%23, %17) : (index, index) -> index | |
%25 = "arith.cmpi"(%9, %4) {predicate = 2 : i64} : (index, index) -> i1 | |
%26 = "arith.subi"(%0, %9) : (index, index) -> index | |
%27 = "arith.select"(%25, %26, %9) : (i1, index, index) -> index | |
%28 = "arith.divsi"(%27, %1) : (index, index) -> index | |
%29 = "arith.subi"(%0, %28) : (index, index) -> index | |
%30 = "arith.select"(%25, %29, %28) : (i1, index, index) -> index | |
%31 = "arith.addi"(%24, %30) : (index, index) -> index | |
%32 = "memref.load"(%12, %31) : (memref<?xf32, #spirv.storage_class<StorageBuffer>>, index) -> f32 | |
%33 = "arith.muli"(%16, %6) : (index, index) -> index | |
%34 = "arith.muli"(%18, %6) : (index, index) -> index | |
%35 = "arith.addi"(%33, %34) : (index, index) -> index | |
%36 = "arith.addi"(%35, %22) : (index, index) -> index | |
%37 = "arith.addi"(%36, %17) : (index, index) -> index | |
%38 = "arith.cmpi"(%10, %4) {predicate = 2 : i64} : (index, index) -> i1 | |
%39 = "arith.subi"(%0, %10) : (index, index) -> index | |
%40 = "arith.select"(%38, %39, %10) : (i1, index, index) -> index | |
%41 = "arith.divsi"(%40, %1) : (index, index) -> index | |
%42 = "arith.subi"(%0, %41) : (index, index) -> index | |
%43 = "arith.select"(%38, %42, %41) : (i1, index, index) -> index | |
%44 = "arith.addi"(%37, %43) : (index, index) -> index | |
"memref.store"(%32, %14, %44) : (f32, memref<?xf32, #spirv.storage_class<StorageBuffer>>, index) -> () | |
"func.return"() : () -> () | |
}) {function_type = () -> (), spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [32, 1, 1]>, sym_name = "forward_dispatch_3"} : () -> () | |
}) {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>} : () -> () | |
"hal.executable.variant_end"() : () -> () | |
}) {sym_name = "vulkan_spirv_fb", target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}>} : () -> () | |
"hal.executable_end"() : () -> () | |
}) {sym_name = "forward_dispatch_3", sym_visibility = "private"} : () -> () | |
%inserted_slice_724 = tensor.insert_slice %10 into %12[0, 0] [2, 160] [1, 1] : tensor<2x160xf32> into tensor<2x320xf32> | |
^ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment