Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created February 21, 2025 17:45
Show Gist options
  • Save pashu123/dd6648263b59f7270fdc7bb7c10384ee to your computer and use it in GitHub Desktop.
Save pashu123/dd6648263b59f7270fdc7bb7c10384ee to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
// -----// IR Dump After TileAndDistributeToWorkgroupsUsingForallOpPass (iree-codegen-tile-and-distribute-to-workgroups-using-forall-op) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After ConfigTrackingCanonicalizerPass (iree-codegen-config-tracking-canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After ConvertAttentionToOnlineAttentionPass (iree-linalg-ext-convert-attention-to-online-attention) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After ConfigTrackingCanonicalizerPass (iree-codegen-config-tracking-canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After GPUPromoteMatmulOperandsPass (iree-codegen-gpu-promote-matmul-operands) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After GPUApplyTilingLevelPass (iree-codegen-gpu-apply-tiling-level) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After LoopCoalescing (affine-loop-coalescing) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After ConfigTrackingCanonicalizerPass (iree-codegen-config-tracking-canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = tensor.empty() : tensor<64x1280xf32>
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%14 = arith.extf %in : f16 to f32
linalg.yield %14 : f32
} -> tensor<64x1280xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.addf %in, %out : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After GPUApplyTilingLevelPass (iree-codegen-gpu-apply-tiling-level) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%14 = tensor.empty() : tensor<64x256xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice : tensor<64x256xf16>) outs(%14 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%17 = arith.extf %in : f16 to f32
linalg.yield %17 : f32
} -> tensor<64x256xf32>
%16 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%15 : tensor<64x256xf32>) outs(%arg1 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%17 = arith.addf %in, %out : f32
linalg.yield %17 : f32
} -> tensor<64x256xf32>
scf.yield %16 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%14 = arith.addf %in, %init : f32
linalg.yield %14 : f32
}
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%reduced : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After LoopCoalescing (affine-loop-coalescing) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%14 = tensor.empty() : tensor<64x256xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice : tensor<64x256xf16>) outs(%14 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%17 = arith.extf %in : f16 to f32
linalg.yield %17 : f32
} -> tensor<64x256xf32>
%16 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%15 : tensor<64x256xf32>) outs(%arg1 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%17 = arith.addf %in, %out : f32
linalg.yield %17 : f32
} -> tensor<64x256xf32>
scf.yield %16 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%14 = arith.addf %in, %init : f32
linalg.yield %14 : f32
}
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%reduced : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After ConfigTrackingCanonicalizerPass (iree-codegen-config-tracking-canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%14 = tensor.empty() : tensor<64x256xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice : tensor<64x256xf16>) outs(%14 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%17 = arith.extf %in : f16 to f32
linalg.yield %17 : f32
} -> tensor<64x256xf32>
%16 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%15 : tensor<64x256xf32>) outs(%arg1 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%17 = arith.addf %in, %out : f32
linalg.yield %17 : f32
} -> tensor<64x256xf32>
scf.yield %16 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%14 = arith.addf %in, %init : f32
linalg.yield %14 : f32
}
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%reduced : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice : tensor<64x256xf16>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%16 = arith.extf %in : f16 to f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%14 : tensor<64x256xf32>) outs(%arg1 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%16 = arith.addf %in, %out : f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
scf.yield %15 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%14 = arith.addf %in, %init : f32
linalg.yield %14 : f32
}
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%reduced : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After DecomposeAttentionPass (iree-linalg-ext-decompose-attention) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice : tensor<64x256xf16>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%16 = arith.extf %in : f16 to f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%14 : tensor<64x256xf32>) outs(%arg1 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%16 = arith.addf %in, %out : f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
scf.yield %15 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%14 = arith.addf %in, %init : f32
linalg.yield %14 : f32
}
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%reduced : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After ConfigTrackingCanonicalizerPass (iree-codegen-config-tracking-canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice : tensor<64x256xf16>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%16 = arith.extf %in : f16 to f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%14 : tensor<64x256xf32>) outs(%arg1 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%16 = arith.addf %in, %out : f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
scf.yield %15 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%14 = arith.addf %in, %init : f32
linalg.yield %14 : f32
}
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%reduced : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice : tensor<64x256xf16>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%16 = arith.extf %in : f16 to f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%14 : tensor<64x256xf32>) outs(%arg1 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%16 = arith.addf %in, %out : f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
scf.yield %15 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%14 = arith.addf %in, %init : f32
linalg.yield %14 : f32
}
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%reduced : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After DecomposeHorizontallyFusedGemmsPass (iree-codegen-gpu-decompose-horizontally-fused-gemms) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice : tensor<64x256xf16>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%16 = arith.extf %in : f16 to f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%14 : tensor<64x256xf32>) outs(%arg1 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%16 = arith.addf %in, %out : f32
linalg.yield %16 : f32
} -> tensor<64x256xf32>
scf.yield %15 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%14 = arith.addf %in, %init : f32
linalg.yield %14 : f32
}
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%reduced : tensor<64xf32>) outs(%8 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%14 = arith.divf %in, %cst : f32
linalg.yield %14 : f32
} -> tensor<64xf32>
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After LLVMGPUConfigureTensorLayoutsPass (iree-llvmgpu-configure-tensor-layouts) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%17 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%18 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%19 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%17 : tensor<64x256xf16>) outs(%18 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%25 = arith.extf %in : f16 to f32
linalg.yield %25 : f32
} -> tensor<64x256xf32>
%20 = iree_vector_ext.to_layout %19 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%21 : tensor<64x256xf32>) outs(%22 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%25 = arith.addf %in, %out : f32
linalg.yield %25 : f32
} -> tensor<64x256xf32>
%24 = iree_vector_ext.to_layout %23 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %24 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%12 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%17 = arith.addf %in, %init : f32
linalg.yield %17 : f32
}
%13 = iree_vector_ext.to_layout %reduced to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%14 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%15 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%13 : tensor<64xf32>) outs(%14 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%17 = arith.divf %in, %cst : f32
linalg.yield %17 : f32
} -> tensor<64xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %16, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After IREELoopInvariantCodeMotionPass (iree-loop-invariant-code-motion) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%18 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%19 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%18 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%25 = arith.extf %in : f16 to f32
linalg.yield %25 : f32
} -> tensor<64x256xf32>
%20 = iree_vector_ext.to_layout %19 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%21 : tensor<64x256xf32>) outs(%22 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%25 = arith.addf %in, %out : f32
linalg.yield %25 : f32
} -> tensor<64x256xf32>
%24 = iree_vector_ext.to_layout %23 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %24 : tensor<64x256xf32>
}
%reduced = linalg.reduce ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) dimensions = [1]
(%in: f32, %init: f32) {
%18 = arith.addf %in, %init : f32
linalg.yield %18 : f32
}
%14 = iree_vector_ext.to_layout %reduced to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%15 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%14 : tensor<64xf32>) outs(%15 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%18 = arith.divf %in, %cst : f32
linalg.yield %18 : f32
} -> tensor<64xf32>
%17 = iree_vector_ext.to_layout %16 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %17, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After LinalgGeneralizeNamedOpsPass (linalg-generalize-named-ops) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%19 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%26 = arith.extf %in : f16 to f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%24 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22 : tensor<64x256xf32>) outs(%23 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%26 = arith.addf %in, %out : f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%25 = iree_vector_ext.to_layout %24 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %25 : tensor<64x256xf32>
}
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%19 = arith.addf %in, %out : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%15 : tensor<64xf32>) outs(%16 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%19 = arith.divf %in, %cst : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %18, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After VectorExtFoldUnitExtentDimsPass (iree-vector-ext-fold-unit-extent-dims) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%19 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%26 = arith.extf %in : f16 to f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%24 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22 : tensor<64x256xf32>) outs(%23 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %out: f32):
%26 = arith.addf %in, %out : f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%25 = iree_vector_ext.to_layout %24 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %25 : tensor<64x256xf32>
}
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%19 = arith.addf %in, %out : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%15 : tensor<64xf32>) outs(%16 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%19 = arith.divf %in, %cst : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %18, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After LinalgFoldUnitExtentDimsPass (linalg-fold-unit-extent-dims) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%19 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%27 = arith.extf %in : f16 to f32
linalg.yield %27 : f32
} -> tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%24 = tensor.empty() : tensor<64x256xf32>
%25 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %23 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%24 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %in_1: f32, %out: f32):
%27 = arith.addf %in, %in_1 : f32
linalg.yield %27 : f32
} -> tensor<64x256xf32>
%26 = iree_vector_ext.to_layout %25 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %26 : tensor<64x256xf32>
}
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%19 = arith.addf %in, %out : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%15 : tensor<64xf32>) outs(%16 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%19 = arith.divf %in, %cst : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %18, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%19 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%27 = arith.extf %in : f16 to f32
linalg.yield %27 : f32
} -> tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%24 = tensor.empty() : tensor<64x256xf32>
%25 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %23 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%24 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %in_1: f32, %out: f32):
%27 = arith.addf %in, %in_1 : f32
linalg.yield %27 : f32
} -> tensor<64x256xf32>
%26 = iree_vector_ext.to_layout %25 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %26 : tensor<64x256xf32>
}
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%19 = arith.addf %in, %out : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%15 : tensor<64xf32>) outs(%16 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%19 = arith.divf %in, %cst : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %18, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%19 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%26 = arith.extf %in : f16 to f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%24 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %23 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %in_1: f32, %out: f32):
%26 = arith.addf %in, %in_1 : f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%25 = iree_vector_ext.to_layout %24 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %25 : tensor<64x256xf32>
}
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%19 = arith.addf %in, %out : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%15 : tensor<64xf32>) outs(%16 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%19 = arith.divf %in, %cst : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %18, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After OptimizeTensorInsertExtractSlicesPass (iree-codegen-optimize-tensor-insert-extract-slices) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%19 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%26 = arith.extf %in : f16 to f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%24 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %23 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %in_1: f32, %out: f32):
%26 = arith.addf %in, %in_1 : f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%25 = iree_vector_ext.to_layout %24 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %25 : tensor<64x256xf32>
}
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%19 = arith.addf %in, %out : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%15 : tensor<64xf32>) outs(%16 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%19 = arith.divf %in, %cst : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %18, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After DecomposeConvolutionToLowerDimOpsPass (iree-codegen-decompose-convolution-to-lower-dim-ops) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%19 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%26 = arith.extf %in : f16 to f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%24 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %23 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %in_1: f32, %out: f32):
%26 = arith.addf %in, %in_1 : f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%25 = iree_vector_ext.to_layout %24 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %25 : tensor<64x256xf32>
}
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%19 = arith.addf %in, %out : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%15 : tensor<64xf32>) outs(%16 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%19 = arith.divf %in, %cst : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %18, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After DecomposeIm2colPass (iree-linalg-ext-decompose-im2col) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 1.280000e+03 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%13 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%19 = iree_vector_ext.to_layout %extracted_slice to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf16>
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<64x256xf16>) outs(%12 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%26 = arith.extf %in : f16 to f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%23 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
%24 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %23 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %in_1: f32, %out: f32):
%26 = arith.addf %in, %in_1 : f32
linalg.yield %26 : f32
} -> tensor<64x256xf32>
%25 = iree_vector_ext.to_layout %24 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : tensor<64x256xf32>
scf.yield %25 : tensor<64x256xf32>
}
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%19 = arith.addf %in, %out : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%16 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%15 : tensor<64xf32>) outs(%16 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%19 = arith.divf %in, %cst : f32
linalg.yield %19 : f32
} -> tensor<64xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : tensor<64xf32>
flow.dispatch.tensor.store %18, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After VectorizeIREEVectorExtOpsPass (iree-vector-ext-vectorize-ops) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst_0 = arith.constant 1.280000e+03 : f32
%cst_1 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64xf32>
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%cst_1 : f32) outs(%8 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64xf32>
%10 = tensor.empty() : tensor<64x256xf32>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst_1 : f32) outs(%10 : tensor<64x256xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<64x256xf32>
%12 = vector.transfer_read %10[%c0, %c0], %cst_1 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%14 = tensor.empty() : tensor<64x256xf32>
%15 = vector.transfer_write %13, %14[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
%16 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %11) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%31 = vector.transfer_read %extracted_slice[%c0, %c0], %cst {in_bounds = [true, true]} : tensor<64x256xf16>, vector<64x256xf16>
%32 = iree_vector_ext.to_layout %31 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%33 = tensor.empty() : tensor<64x256xf16>
%34 = vector.transfer_write %32, %33[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf16>, tensor<64x256xf16>
%35 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%34 : tensor<64x256xf16>) outs(%15 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f16, %out: f32):
%50 = arith.extf %in : f16 to f32
linalg.yield %50 : f32
} -> tensor<64x256xf32>
%36 = vector.transfer_read %35[%c0, %c0], %cst_1 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%37 = iree_vector_ext.to_layout %36 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%38 = iree_vector_ext.to_layout %37 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%39 = tensor.empty() : tensor<64x256xf32>
%40 = vector.transfer_write %38, %39[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
%41 = vector.transfer_read %arg1[%c0, %c0], %cst_1 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%42 = iree_vector_ext.to_layout %41 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%43 = tensor.empty() : tensor<64x256xf32>
%44 = vector.transfer_write %42, %43[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
%45 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%40, %44 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%10 : tensor<64x256xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{partial_reduction = [0, 256], subgroup_basis = [[1, 1], [0, 1]], thread = [0, 8], thread_basis = [[64, 32], [0, 1]], workgroup = [64, 0]}>} {
^bb0(%in: f32, %in_2: f32, %out: f32):
%50 = arith.addf %in, %in_2 : f32
linalg.yield %50 : f32
} -> tensor<64x256xf32>
%46 = vector.transfer_read %45[%c0, %c0], %cst_1 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%47 = iree_vector_ext.to_layout %46 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%48 = tensor.empty() : tensor<64x256xf32>
%49 = vector.transfer_write %47, %48[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
scf.yield %49 : tensor<64x256xf32>
}
%17 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%16 : tensor<64x256xf32>) outs(%9 : tensor<64xf32>) {
^bb0(%in: f32, %out: f32):
%31 = arith.addf %in, %out : f32
linalg.yield %31 : f32
} -> tensor<64xf32>
%18 = vector.transfer_read %17[%c0], %cst_1 {in_bounds = [true]} : tensor<64xf32>, vector<64xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%20 = tensor.empty() : tensor<64xf32>
%21 = vector.transfer_write %19, %20[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
%22 = vector.transfer_read %8[%c0], %cst_1 {in_bounds = [true]} : tensor<64xf32>, vector<64xf32>
%23 = iree_vector_ext.to_layout %22 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%24 = tensor.empty() : tensor<64xf32>
%25 = vector.transfer_write %23, %24[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
%26 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%21 : tensor<64xf32>) outs(%25 : tensor<64xf32>) attrs = {lowering_config = #iree_gpu.lowering_config<{subgroup_basis = [[1], [0]], thread = [0], thread_basis = [[64], [0]], workgroup = [64]}>} {
^bb0(%in: f32, %out: f32):
%31 = arith.divf %in, %cst_0 : f32
linalg.yield %31 : f32
} -> tensor<64xf32>
%27 = vector.transfer_read %26[%c0], %cst_1 {in_bounds = [true]} : tensor<64xf32>, vector<64xf32>
%28 = iree_vector_ext.to_layout %27 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%29 = tensor.empty() : tensor<64xf32>
%30 = vector.transfer_write %28, %29[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %30, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After GenericVectorizationPass (iree-codegen-generic-vectorization) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst_3 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64x256xf32>
%9 = vector.transfer_write %cst_0, %8[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
%10 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %9) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%18 = vector.transfer_read %extracted_slice[%c0, %c0], %cst_2 {in_bounds = [true, true]} : tensor<64x256xf16>, vector<64x256xf16>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%20 = arith.extf %19 : vector<64x256xf16> to vector<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%23 = vector.transfer_read %arg1[%c0, %c0], %cst_3 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%24 = iree_vector_ext.to_layout %23 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%25 = arith.addf %22, %24 : vector<64x256xf32>
%26 = iree_vector_ext.to_layout %25 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%27 = tensor.empty() : tensor<64x256xf32>
%28 = vector.transfer_write %26, %27[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
scf.yield %28 : tensor<64x256xf32>
}
%11 = vector.transfer_read %10[%c0, %c0], %cst_3 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%12 = vector.multi_reduction <add>, %11, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%14 = arith.divf %13, %cst : vector<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%16 = tensor.empty() : tensor<64xf32>
%17 = vector.transfer_write %15, %16[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %17, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst_3 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64x256xf32>
%9 = vector.transfer_write %cst_0, %8[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
%10 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %9) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%18 = vector.transfer_read %extracted_slice[%c0, %c0], %cst_2 {in_bounds = [true, true]} : tensor<64x256xf16>, vector<64x256xf16>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%20 = arith.extf %19 : vector<64x256xf16> to vector<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%23 = vector.transfer_read %arg1[%c0, %c0], %cst_3 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%24 = iree_vector_ext.to_layout %23 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%25 = arith.addf %22, %24 : vector<64x256xf32>
%26 = iree_vector_ext.to_layout %25 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%27 = tensor.empty() : tensor<64x256xf32>
%28 = vector.transfer_write %26, %27[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
scf.yield %28 : tensor<64x256xf32>
}
%11 = vector.transfer_read %10[%c0, %c0], %cst_3 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%12 = vector.multi_reduction <add>, %11, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%14 = arith.divf %13, %cst : vector<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%16 = tensor.empty() : tensor<64xf32>
%17 = vector.transfer_write %15, %16[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %17, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%cst_3 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = tensor.empty() : tensor<64x256xf32>
%9 = vector.transfer_write %cst_0, %8[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
%10 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %9) -> (tensor<64x256xf32>) {
%extracted_slice = tensor.extract_slice %7[0, %arg0] [64, 256] [1, 1] : tensor<64x1280xf16> to tensor<64x256xf16>
%18 = vector.transfer_read %extracted_slice[%c0, %c0], %cst_2 {in_bounds = [true, true]} : tensor<64x256xf16>, vector<64x256xf16>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%20 = arith.extf %19 : vector<64x256xf16> to vector<64x256xf32>
%21 = iree_vector_ext.to_layout %20 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%23 = vector.transfer_read %arg1[%c0, %c0], %cst_3 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%24 = iree_vector_ext.to_layout %23 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%25 = arith.addf %22, %24 : vector<64x256xf32>
%26 = iree_vector_ext.to_layout %25 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%27 = vector.transfer_write %26, %8[%c0, %c0] {in_bounds = [true, true]} : vector<64x256xf32>, tensor<64x256xf32>
scf.yield %27 : tensor<64x256xf32>
}
%11 = vector.transfer_read %10[%c0, %c0], %cst_3 {in_bounds = [true, true]} : tensor<64x256xf32>, vector<64x256xf32>
%12 = vector.multi_reduction <add>, %11, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%14 = arith.divf %13, %cst : vector<64xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%16 = tensor.empty() : tensor<64xf32>
%17 = vector.transfer_write %15, %16[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %17, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After OptimizeTensorInsertExtractSlicesPass (iree-codegen-optimize-tensor-insert-extract-slices) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%15 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%17 = arith.extf %16 : vector<64x256xf16> to vector<64x256xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = arith.addf %19, %20 : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %22 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = tensor.empty() : tensor<64xf32>
%14 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %14, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%15 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%17 = arith.extf %16 : vector<64x256xf16> to vector<64x256xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = arith.addf %19, %20 : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %22 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = tensor.empty() : tensor<64xf32>
%14 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %14, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%15 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%17 = arith.extf %16 : vector<64x256xf16> to vector<64x256xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = arith.addf %19, %20 : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %22 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = tensor.empty() : tensor<64xf32>
%14 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %14, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After GPUVectorAllocPass (iree-codegen-gpu-vector-alloc) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%15 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%17 = arith.extf %16 : vector<64x256xf16> to vector<64x256xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = arith.addf %19, %20 : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %22 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = tensor.empty() : tensor<64xf32>
%14 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %14, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%15 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%17 = arith.extf %16 : vector<64x256xf16> to vector<64x256xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = arith.addf %19, %20 : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %22 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = tensor.empty() : tensor<64xf32>
%14 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %14, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%15 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%17 = arith.extf %16 : vector<64x256xf16> to vector<64x256xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = arith.addf %19, %20 : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %22 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = tensor.empty() : tensor<64xf32>
%14 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %14, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After GPUCombineValueBarriersPass (iree-codegen-gpu-combine-value-barriers) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%15 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%17 = arith.extf %16 : vector<64x256xf16> to vector<64x256xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = arith.addf %19, %20 : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %22 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = tensor.empty() : tensor<64xf32>
%14 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %14, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After EliminateEmptyTensorsPass (iree-eliminate-empty-tensors) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%16 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%17 = iree_vector_ext.to_layout %16 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%18 = arith.extf %17 : vector<64x256xf16> to vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %19 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%22 = arith.addf %20, %21 : vector<64x256xf32>
%23 = iree_vector_ext.to_layout %22 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %23 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [64], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<64xf32>> -> tensor<64xf32>
%14 = tensor.empty() : tensor<64xf32>
%15 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %15, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After EmptyTensorToAllocTensor (empty-tensor-to-alloc-tensor) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16>
%8 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%15 = vector.transfer_read %7[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : tensor<64x1280xf16>, vector<64x256xf16>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%17 = arith.extf %16 : vector<64x256xf16> to vector<64x256xf32>
%18 = iree_vector_ext.to_layout %17 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%20 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%21 = arith.addf %19, %20 : vector<64x256xf32>
%22 = iree_vector_ext.to_layout %21 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %22 : vector<64x256xf32>
}
%9 = vector.multi_reduction <add>, %8, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%10 = iree_vector_ext.to_layout %9 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%11 = arith.divf %10, %cst : vector<64xf32>
%12 = iree_vector_ext.to_layout %11 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%13 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [64], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<64xf32>> -> tensor<64xf32>
%14 = vector.transfer_write %12, %13[%c0] {in_bounds = [true]} : vector<64xf32>, tensor<64xf32>
flow.dispatch.tensor.store %14, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>>
return
}
// -----// IR Dump After IREEComprehensiveBufferizePass (iree-codegen-iree-comprehensive-bufferize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.copy %6, %6 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After ResolveShapedTypeResultDims (resolve-shaped-type-result-dims) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.copy %6, %6 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After CleanupBufferAllocViewPass (iree-codegen-cleanup-buffer-alloc-view) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After Canonicalizer (canonicalize) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After CSE (cse) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After HoistStaticallyBoundAllocationsPass (iree-codegen-hoist-statically-bound-allocations) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
// -----// IR Dump After LLVMGPUCastTypeToFitMMAPass (iree-llvmgpu-cast-type-to-fit-mma) //----- //
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
%cst = arith.constant dense<1.280000e+03> : vector<64xf32>
%cst_0 = arith.constant dense<0.000000e+00> : vector<64x256xf32>
%cst_1 = arith.constant dense<0.000000e+00> : vector<64xf32>
%cst_2 = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1280 = arith.constant 1280 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
%4:2 = util.assume.int
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>],
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>]
: index, index
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %5, 64 : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %6, 64 : memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg0 = %c0 to %c1280 step %c256 iter_args(%arg1 = %cst_0) -> (vector<64x256xf32>) {
%12 = vector.transfer_read %5[%c0, %arg0], %cst_2 {in_bounds = [true, true]} : memref<64x1280xf16, strided<[1280, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x256xf16>
%13 = iree_vector_ext.to_layout %12 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf16>
%14 = arith.extf %13 : vector<64x256xf16> to vector<64x256xf32>
%15 = iree_vector_ext.to_layout %14 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%16 = iree_vector_ext.to_layout %15 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%17 = iree_vector_ext.to_layout %arg1 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
%18 = arith.addf %16, %17 : vector<64x256xf32>
%19 = iree_vector_ext.to_layout %18 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [64, 32], element_tile = [1, 8], subgroup_strides = [0, 0], thread_strides = [32, 1]>) : vector<64x256xf32>
scf.yield %19 : vector<64x256xf32>
}
%8 = vector.multi_reduction <add>, %7, %cst_1 [1] : vector<64x256xf32> to vector<64xf32>
%9 = iree_vector_ext.to_layout %8 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
%10 = arith.divf %9, %cst : vector<64xf32>
%11 = iree_vector_ext.to_layout %10 to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1], batch_tile = [1], outer_tile = [1], thread_tile = [64], element_tile = [1], subgroup_strides = [0], thread_strides = [1]>) : vector<64xf32>
vector.transfer_write %11, %6[%c0] {in_bounds = [true]} : vector<64xf32>, memref<64xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return
}
within split at test_this_now.mlir:1 offset :10:3: error: 'func.func' op failed to distribute
func.func @encode() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute workgroup_size = [64, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true, use_igemm_convolution = false>}>} {
^
within split at test_this_now.mlir:1 offset :10:3: note: see current operation:
"func.func"() <{function_type = () -> (), sym_name = "encode"}> ({
%0 = "arith.constant"() <{value = dense<0.000000e+00> : vector<1xf32>}> : () -> vector<1xf32>
%1 = "arith.constant"() <{value = dense<0.000000e+00> : vector<1x1x1x1x1x8xf16>}> : () -> vector<1x1x1x1x1x8xf16>
%2 = "arith.constant"() <{value = 0 : index}> : () -> index
%3 = "arith.constant"() <{value = 1280 : index}> : () -> index
%4 = "arith.constant"() <{value = 256 : index}> : () -> index
%5 = "arith.constant"() <{value = 0.000000e+00 : f16}> : () -> f16
%6 = "arith.constant"() <{value = dense<0.000000e+00> : vector<1x1x1xf32>}> : () -> vector<1x1x1xf32>
%7 = "arith.constant"() <{value = dense<0.000000e+00> : vector<1x1x1x1x1x8xf32>}> : () -> vector<1x1x1x1x1x8xf32>
%8 = "arith.constant"() <{value = dense<1.280000e+03> : vector<1x1x1xf32>}> : () -> vector<1x1x1xf32>
%9 = "gpu.thread_id"() <{dimension = #gpu<dim z>}> : () -> index
%10 = "gpu.thread_id"() <{dimension = #gpu<dim y>}> : () -> index
%11 = "gpu.thread_id"() <{dimension = #gpu<dim x>}> : () -> index
%12 = "affine.linearize_index"(%9, %10, %11) <{disjoint, operandSegmentSizes = array<i32: 3, 0>, static_basis = array<i64: 1, 1, 64>}> : (index, index, index) -> index
%13 = "hal.interface.constant.load"() {layout = #hal.pipeline.layout<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index} : () -> i32
%14 = "hal.interface.constant.load"() {layout = #hal.pipeline.layout<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 1 : index} : () -> i32
%15 = "arith.index_castui"(%13) : (i32) -> index
%16 = "arith.index_castui"(%14) : (i32) -> index
%17:2 = "util.assume.int"(%15, %16) <{assumptions = [[#util.int.assumption<umin = 688128, umax = 688128, udiv = 688128>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 262144, umax = 262144, udiv = 262144>, #util.int.assumption<umin = 98304, umax = 98304, udiv = 98304>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 589824, umax = 589824, udiv = 589824>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 589824, umax = 589824, udiv = 589824>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 589824, umax = 589824, udiv = 589824>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 589824, umax = 589824, udiv = 589824>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 589824, umax = 589824, udiv = 589824>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 589824, umax = 589824, udiv = 589824>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 589824, umax = 589824, udiv = 589824>, #util.int.assumption<umin = 425984, umax = 425984, udiv = 425984>, #util.int.assumption<umin = 589824, umax = 589824, udiv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment