Skip to content

Instantly share code, notes, and snippets.

@banach-space
Last active September 21, 2023 15:05
Show Gist options
  • Save banach-space/3e2c8154a19d075efb609a03f25bd743 to your computer and use it in GitHub Desktop.
Save banach-space/3e2c8154a19d075efb609a03f25bd743 to your computer and use it in GitHub Desktop.
// -----// IR Dump After CleanupBufferAllocView (iree-codegen-cleanup-buffer-alloc-view) //----- //
func.func @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32() {
%c0_i32 = arith.constant 0 : i32
%c10 = arith.constant 10 : index
%c20 = arith.constant 20 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c5 = arith.constant 5 : index
%c3 = arith.constant 3 : index
%c9 = arith.constant 9 : index
%cst = arith.constant dense<0> : vector<1xi32>
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>>
%alloca_0 = memref.alloca() {alignment = 64 : i64} : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>>
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %0, 64 : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %1, 64 : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>>
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %2, 64 : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%workgroup_count_y = hal.interface.workgroup.count[1] : index
%3 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_id_y]
%4 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_count_y]
%5 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_id_x]
%6 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_count_x]
scf.for %arg0 = %3 to %c10 step %4 {
scf.for %arg1 = %5 to %c20 step %6 {
%subview = memref.subview %2[0, %arg0, %arg1, 0] [1, 2, 5, 1] [1, 1, 1, 1] : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_1 = memref.subview %0[0, %arg0, %arg1, 0] [1, 2, 13, 1] [1, 1, 1, 1] : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
scf.for %arg2 = %c0 to %c2 step %c1 {
scf.for %arg3 = %c0 to %c5 step %c1 {
%subview_2 = memref.subview %subview_1[0, %arg2, %arg3, 0] [1, 1, 9, 1] [1, 1, 1, 1] : memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_3 = memref.subview %subview[0, %arg2, %arg3, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
vector.transfer_write %cst, %subview_3[%c0, %c0, %c0, %c0] {in_bounds = [true]} : vector<1xi32>, memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_4 = memref.subview %subview_3[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%cast = memref.cast %subview_4 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>
%7 = scf.for %arg4 = %c0 to %c9 step %c3 iter_args(%arg5 = %cast) -> (memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) {
%subview_5 = memref.subview %subview_2[0, 0, %arg4, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_6 = memref.subview %1[0, %arg4, 0] [1, 3, 1] [1, 1, 1] : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_7 = memref.subview %subview_5[0, 0, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_8 = memref.subview %subview_6[0, 0, 0] [1, 3, 1] [1, 1, 1] : memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%subview_7 : memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca_0 : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
}
%collapse_shape = memref.collapse_shape %alloca_0 [[0, 1, 2]] : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> into memref<3xi32, #hal.descriptor_type<storage_buffer>>
%collapse_shape_9 = memref.collapse_shape %subview_8 [[0, 1]] : memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> into memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg5 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
}
%collapse_shape_10 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> into memref<1xi32, #hal.descriptor_type<storage_buffer>>
%8 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, #hal.descriptor_type<storage_buffer>>, vector<3xi32>
%9 = vector.transfer_read %collapse_shape_9[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<3xi32>
%10 = vector.transfer_read %collapse_shape_10[%c0], %c0_i32 {in_bounds = [true]} : memref<1xi32, #hal.descriptor_type<storage_buffer>>, vector<1xi32>
%11 = vector.extract_strided_slice %8 {offsets = [0], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
%12 = vector.extract_strided_slice %8 {offsets = [1], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
%13 = vector.extract_strided_slice %8 {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
%14 = vector.extract %9[0] : vector<3xi32>
%15 = vector.extract %9[1] : vector<3xi32>
%16 = vector.extract %9[2] : vector<3xi32>
%17 = vector.outerproduct %11, %14, %10 {kind = #vector.kind<add>} : vector<1xi32>, i32
%18 = vector.outerproduct %12, %15, %17 {kind = #vector.kind<add>} : vector<1xi32>, i32
%19 = vector.outerproduct %13, %16, %18 {kind = #vector.kind<add>} : vector<1xi32>, i32
vector.transfer_write %19, %collapse_shape_10[%c0] {in_bounds = [true]} : vector<1xi32>, memref<1xi32, #hal.descriptor_type<storage_buffer>>
%cast_11 = memref.cast %alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>
scf.yield %cast_11 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>
}
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%7 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_4 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
}
}
}
}
}
return
}
// -----// IR Dump After OptimizeVectorTransfer (iree-codegen-optimize-vector-transfer) //----- //
func.func @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32() {
%c0_i32 = arith.constant 0 : i32
%c10 = arith.constant 10 : index
%c20 = arith.constant 20 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c5 = arith.constant 5 : index
%c3 = arith.constant 3 : index
%c9 = arith.constant 9 : index
%cst = arith.constant dense<0> : vector<1xi32>
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>>
%alloca_0 = memref.alloca() {alignment = 64 : i64} : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>>
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %0, 64 : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %1, 64 : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>>
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %2, 64 : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%workgroup_count_y = hal.interface.workgroup.count[1] : index
%3 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_id_y]
%4 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_count_y]
%5 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_id_x]
%6 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_count_x]
%collapse_shape = memref.collapse_shape %alloca_0 [[0, 1, 2]] : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> into memref<3xi32, #hal.descriptor_type<storage_buffer>>
%collapse_shape_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> into memref<1xi32, #hal.descriptor_type<storage_buffer>>
%cast = memref.cast %alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview = memref.subview %collapse_shape_1[0] [1] [1] : memref<1xi32, #hal.descriptor_type<storage_buffer>> to memref<i32, #hal.descriptor_type<storage_buffer>>
%7 = vector.transfer_read %subview[], %c0_i32 : memref<i32, #hal.descriptor_type<storage_buffer>>, vector<i32>
%8 = vector.shape_cast %7 : vector<i32> to vector<1xi32>
%9 = scf.for %arg0 = %3 to %c10 step %4 iter_args(%arg1 = %8) -> (vector<1xi32>) {
%11 = scf.for %arg2 = %5 to %c20 step %6 iter_args(%arg3 = %arg1) -> (vector<1xi32>) {
%subview_3 = memref.subview %2[0, %arg0, %arg2, 0] [1, 2, 5, 1] [1, 1, 1, 1] : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_4 = memref.subview %0[0, %arg0, %arg2, 0] [1, 2, 13, 1] [1, 1, 1, 1] : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%12 = scf.for %arg4 = %c0 to %c2 step %c1 iter_args(%arg5 = %arg3) -> (vector<1xi32>) {
%13 = scf.for %arg6 = %c0 to %c5 step %c1 iter_args(%arg7 = %arg5) -> (vector<1xi32>) {
%subview_5 = memref.subview %subview_4[0, %arg4, %arg6, 0] [1, 1, 9, 1] [1, 1, 1, 1] : memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_6 = memref.subview %subview_3[0, %arg4, %arg6, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_7 = memref.subview %subview_6[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<i32, strided<[], offset: ?>, #hal.descriptor_type<storage_buffer>>
%14 = vector.shape_cast %cst : vector<1xi32> to vector<i32>
vector.transfer_write %14, %subview_7[] : vector<i32>, memref<i32, strided<[], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_8 = memref.subview %subview_6[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%cast_9 = memref.cast %subview_8 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>
%15:2 = scf.for %arg8 = %c0 to %c9 step %c3 iter_args(%arg9 = %cast_9, %arg10 = %arg7) -> (memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<1xi32>) {
%subview_10 = memref.subview %subview_5[0, 0, %arg8, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_11 = memref.subview %1[0, %arg8, 0] [1, 3, 1] [1, 1, 1] : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_12 = memref.subview %subview_10[0, 0, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_13 = memref.subview %subview_11[0, 0, 0] [1, 3, 1] [1, 1, 1] : memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%subview_12 : memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca_0 : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
}
%collapse_shape_14 = memref.collapse_shape %subview_13 [[0, 1]] : memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> into memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg9 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
}
%16 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, #hal.descriptor_type<storage_buffer>>, vector<3xi32>
%17 = vector.transfer_read %collapse_shape_14[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<3xi32>
%18 = vector.extract_strided_slice %16 {offsets = [0], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
%19 = vector.extract_strided_slice %16 {offsets = [1], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
%20 = vector.extract_strided_slice %16 {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
%21 = vector.extract %17[0] : vector<3xi32>
%22 = vector.extract %17[1] : vector<3xi32>
%23 = vector.extract %17[2] : vector<3xi32>
%24 = vector.outerproduct %18, %21, %arg10 {kind = #vector.kind<add>} : vector<1xi32>, i32
%25 = vector.outerproduct %19, %22, %24 {kind = #vector.kind<add>} : vector<1xi32>, i32
%26 = vector.outerproduct %20, %23, %25 {kind = #vector.kind<add>} : vector<1xi32>, i32
scf.yield %cast, %26 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<1xi32>
}
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%15#0 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_8 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
}
scf.yield %15#1 : vector<1xi32>
}
scf.yield %13 : vector<1xi32>
}
scf.yield %12 : vector<1xi32>
}
scf.yield %11 : vector<1xi32>
}
%subview_2 = memref.subview %collapse_shape_1[0] [1] [1] : memref<1xi32, #hal.descriptor_type<storage_buffer>> to memref<i32, #hal.descriptor_type<storage_buffer>>
%10 = vector.shape_cast %9 : vector<1xi32> to vector<i32>
vector.transfer_write %10, %subview_2[] : vector<i32>, memref<i32, #hal.descriptor_type<storage_buffer>>
return
}
; ModuleID = 'files/module_pipeline_dispatch_0_embedded_elf_arm_64.codegen.bc'
source_filename = "pipeline_dispatch_0"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-unknown-eabi-elf"
%iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 }
%iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 }
%iree_hal_executable_src_loc_v0_t = type { i32, i32, ptr }
%iree_hal_executable_library_v0_t = type { ptr, %iree_hal_executable_import_table_v0_t, %iree_hal_executable_export_table_v0_t, %iree_hal_executable_constant_table_v0_t }
%iree_hal_executable_import_table_v0_t = type { i32, ptr }
%iree_hal_executable_export_table_v0_t = type { i32, ptr, ptr, ptr, ptr, ptr }
%iree_hal_executable_constant_table_v0_t = type { i32 }
%iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr }
%iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, ptr, i32 }
@0 = private constant [20 x i8] c"pipeline_dispatch_0\00", align 1
@iree_hal_executable_library_query_v0_header = private constant %iree_hal_executable_library_header_t { i32 3, ptr @0, i32 0, i32 0 }
@iree_hal_executable_library_query_v0_funcs = private constant [1 x ptr] [ptr @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32]
@iree_hal_executable_library_query_v0_attrs = private constant [1 x %iree_hal_executable_dispatch_attrs_v0_t] zeroinitializer
@1 = private constant [65 x i8] c"pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32\00", align 1
@iree_hal_executable_library_query_v0_names = private constant [1 x ptr] [ptr @1]
@2 = private constant [1 x i8] zeroinitializer, align 1
@iree_hal_executable_library_query_v0_tags = private constant [1 x ptr] [ptr @2]
@3 = private constant [64 x i8] c"/home/andwar02/work/VOSA//test_standalone_ops/conv2d_plain.mlir\00", align 1
@iree_hal_executable_library_query_v0_src_locs = private constant [1 x %iree_hal_executable_src_loc_v0_t] [%iree_hal_executable_src_loc_v0_t { i32 2, i32 63, ptr @3 }]
@iree_hal_executable_library_query_v0 = private constant %iree_hal_executable_library_v0_t { ptr @iree_hal_executable_library_query_v0_header, %iree_hal_executable_import_table_v0_t zeroinitializer, %iree_hal_executable_export_table_v0_t { i32 1, ptr @iree_hal_executable_library_query_v0_funcs, ptr @iree_hal_executable_library_query_v0_attrs, ptr @iree_hal_executable_library_query_v0_names, ptr @iree_hal_executable_library_query_v0_tags, ptr @iree_hal_executable_library_query_v0_src_locs }, %iree_hal_executable_constant_table_v0_t zeroinitializer }
declare ptr @malloc(i64) #0
declare void @free(ptr) #0
define internal i32 @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32(ptr noalias nonnull align 16 %0, ptr noalias nonnull align 16 %1, ptr noalias nonnull align 16 %2) #0 !dbg !3 {
%4 = alloca i32, i64 1, align 64, !dbg !79
%5 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %4, 0, !dbg !79
%6 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %5, ptr %4, 1, !dbg !79
%7 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %6, i64 0, 2, !dbg !79
%8 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %7, i64 1, 3, 0, !dbg !79
%9 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %8, i64 1, 3, 1, !dbg !79
%10 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %9, i64 1, 3, 2, !dbg !79
%11 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %10, i64 1, 4, 0, !dbg !79
%12 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %11, i64 1, 4, 1, !dbg !79
%13 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %12, i64 1, 4, 2, !dbg !79
%14 = alloca i32, i64 3, align 64, !dbg !79
%15 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80
%16 = extractvalue %iree_hal_executable_dispatch_state_v0_t %15, 10, !dbg !80
%17 = load ptr, ptr %16, align 8, !dbg !80
%18 = ptrtoint ptr %17 to i64, !dbg !80
%19 = and i64 %18, 63, !dbg !80
%20 = icmp eq i64 %19, 0, !dbg !80
call void @llvm.assume(i1 %20), !dbg !80
%21 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80
%22 = extractvalue %iree_hal_executable_dispatch_state_v0_t %21, 10, !dbg !80
%23 = getelementptr ptr, ptr %22, i32 1, !dbg !80
%24 = load ptr, ptr %23, align 8, !dbg !80
%25 = ptrtoint ptr %24 to i64, !dbg !80
%26 = and i64 %25, 63, !dbg !80
%27 = icmp eq i64 %26, 0, !dbg !80
call void @llvm.assume(i1 %27), !dbg !80
%28 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !79
%29 = extractvalue %iree_hal_executable_dispatch_state_v0_t %28, 10, !dbg !79
%30 = getelementptr ptr, ptr %29, i32 2, !dbg !79
%31 = load ptr, ptr %30, align 8, !dbg !79
%32 = ptrtoint ptr %31 to i64, !dbg !79
%33 = and i64 %32, 63, !dbg !79
%34 = icmp eq i64 %33, 0, !dbg !79
call void @llvm.assume(i1 %34), !dbg !79
%35 = load %iree_hal_executable_workgroup_state_v0_t, ptr %2, align 8, !dbg !79
%36 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 0, !dbg !79
%37 = zext i32 %36 to i64, !dbg !79
%38 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 1, !dbg !79
%39 = zext i32 %38 to i64, !dbg !79
%40 = getelementptr i32, ptr %4, i64 0, !dbg !79
%41 = load i32, ptr %40, align 4, !dbg !79
%42 = insertelement <1 x i32> undef, i32 %41, i32 0, !dbg !79
%43 = extractelement <1 x i32> %42, i64 0, !dbg !79
%44 = insertelement <1 x i32> zeroinitializer, i32 %43, i64 0, !dbg !79
br label %45, !dbg !79
45: ; preds = %159, %3
%46 = phi i64 [ %160, %159 ], [ 0, %3 ]
%47 = phi <1 x i32> [ %51, %159 ], [ %44, %3 ]
%48 = icmp slt i64 %46, 2, !dbg !79
br i1 %48, label %49, label %161, !dbg !79
49: ; preds = %144, %45
%50 = phi i64 [ %158, %144 ], [ 0, %45 ]
%51 = phi <1 x i32> [ %80, %144 ], [ %47, %45 ]
%52 = icmp slt i64 %50, 5, !dbg !79
br i1 %52, label %53, label %159, !dbg !79
53: ; preds = %49
%54 = mul i64 %39, 2, !dbg !79
%55 = add i64 %46, %54, !dbg !79
%56 = mul i64 %37, 5, !dbg !79
%57 = add i64 %50, %56, !dbg !79
%58 = mul i64 %55, 20, !dbg !79
%59 = add i64 0, %58, !dbg !79
%60 = add i64 %59, %57, !dbg !79
%61 = add i64 %60, 0, !dbg !79
%62 = getelementptr i32, ptr %31, i64 %61, !dbg !79
store i32 0, ptr %62, align 4, !dbg !79
%63 = mul i64 %46, 20, !dbg !79
%64 = mul i64 %39, 40, !dbg !79
%65 = add i64 %63, %64, !dbg !79
%66 = add i64 %65, %50, !dbg !79
%67 = add i64 %66, %56, !dbg !79
%68 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %31, 0, !dbg !79
%69 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %68, ptr %31, 1, !dbg !79
%70 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %69, i64 %67, 2, !dbg !79
%71 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %70, i64 1, 3, 0, !dbg !79
%72 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %71, i64 200, 4, 0, !dbg !79
%73 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, i64 1, 3, 1, !dbg !79
%74 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %73, i64 20, 4, 1, !dbg !79
%75 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %74, i64 1, 3, 2, !dbg !79
%76 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %75, i64 1, 4, 2, !dbg !79
br label %77, !dbg !79
77: ; preds = %99, %53
%78 = phi i64 [ %143, %99 ], [ 0, %53 ]
%79 = phi { ptr, ptr, i64, [3 x i64], [3 x i64] } [ %13, %99 ], [ %76, %53 ]
%80 = phi <1 x i32> [ %142, %99 ], [ %51, %53 ]
%81 = icmp slt i64 %78, 9, !dbg !79
br i1 %81, label %82, label %144, !dbg !79
82: ; preds = %85, %77
%83 = phi i64 [ %98, %85 ], [ 0, %77 ]
%84 = icmp slt i64 %83, 3, !dbg !79
br i1 %84, label %85, label %99, !dbg !79
85: ; preds = %82
%86 = add i64 %56, %50, !dbg !79
%87 = add i64 %86, %78, !dbg !79
%88 = add i64 %87, %83, !dbg !79
%89 = mul i64 %55, 28, !dbg !79
%90 = add i64 0, %89, !dbg !79
%91 = add i64 %90, %88, !dbg !79
%92 = add i64 %91, 0, !dbg !79
%93 = getelementptr i32, ptr %17, i64 %92, !dbg !79
%94 = load i32, ptr %93, align 4, !dbg !79
%95 = add i64 0, %83, !dbg !79
%96 = add i64 %95, 0, !dbg !79
%97 = getelementptr i32, ptr %14, i64 %96, !dbg !79
store i32 %94, ptr %97, align 4, !dbg !79
%98 = add i64 %83, 1, !dbg !79
br label %82, !dbg !79
99: ; preds = %82
%100 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 1, !dbg !79
%101 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 2, !dbg !79
%102 = getelementptr i32, ptr %100, i64 %101, !dbg !79
%103 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 0, !dbg !79
%104 = mul i64 %103, 0, !dbg !79
%105 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 1, !dbg !79
%106 = mul i64 %105, 0, !dbg !79
%107 = add i64 %104, %106, !dbg !79
%108 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 2, !dbg !79
%109 = mul i64 %108, 0, !dbg !79
%110 = add i64 %107, %109, !dbg !79
%111 = getelementptr i32, ptr %102, i64 %110, !dbg !79
%112 = load i32, ptr %111, align 4, !dbg !79
store i32 %112, ptr %40, align 4, !dbg !79
%113 = load <3 x i32>, ptr %14, align 4, !dbg !79
%114 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> zeroinitializer, !dbg !79
%115 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> <i32 1>, !dbg !79
%116 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> <i32 2>, !dbg !79
%117 = add i64 0, %78, !dbg !79
%118 = add i64 %117, 0, !dbg !79
%119 = getelementptr i32, ptr %24, i64 %118, !dbg !79
%120 = load i32, ptr %119, align 4, !dbg !79
%121 = add i64 %78, 1, !dbg !79
%122 = add i64 0, %121, !dbg !79
%123 = add i64 %122, 0, !dbg !79
%124 = getelementptr i32, ptr %24, i64 %123, !dbg !79
%125 = load i32, ptr %124, align 4, !dbg !79
%126 = add i64 %78, 2, !dbg !79
%127 = add i64 0, %126, !dbg !79
%128 = add i64 %127, 0, !dbg !79
%129 = getelementptr i32, ptr %24, i64 %128, !dbg !79
%130 = load i32, ptr %129, align 4, !dbg !79
%131 = insertelement <1 x i32> undef, i32 %120, i32 0, !dbg !79
%132 = shufflevector <1 x i32> %131, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79
%133 = mul <1 x i32> %114, %132, !dbg !79
%134 = add <1 x i32> %133, %80, !dbg !79
%135 = insertelement <1 x i32> undef, i32 %125, i32 0, !dbg !79
%136 = shufflevector <1 x i32> %135, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79
%137 = mul <1 x i32> %115, %136, !dbg !79
%138 = add <1 x i32> %137, %134, !dbg !79
%139 = insertelement <1 x i32> undef, i32 %130, i32 0, !dbg !79
%140 = shufflevector <1 x i32> %139, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79
%141 = mul <1 x i32> %116, %140, !dbg !79
%142 = add <1 x i32> %141, %138, !dbg !79
%143 = add i64 %78, 3, !dbg !79
br label %77, !dbg !79
144: ; preds = %77
%145 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 1, !dbg !79
%146 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 2, !dbg !79
%147 = getelementptr i32, ptr %145, i64 %146, !dbg !79
%148 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 0, !dbg !79
%149 = mul i64 %148, 0, !dbg !79
%150 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 1, !dbg !79
%151 = mul i64 %150, 0, !dbg !79
%152 = add i64 %149, %151, !dbg !79
%153 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 2, !dbg !79
%154 = mul i64 %153, 0, !dbg !79
%155 = add i64 %152, %154, !dbg !79
%156 = getelementptr i32, ptr %147, i64 %155, !dbg !79
%157 = load i32, ptr %156, align 4, !dbg !79
store i32 %157, ptr %62, align 4, !dbg !79
%158 = add i64 %50, 1, !dbg !79
br label %49, !dbg !79
159: ; preds = %49
%160 = add i64 %46, 1, !dbg !79
br label %45, !dbg !79
161: ; preds = %45
%162 = extractelement <1 x i32> %47, i64 0, !dbg !79
%163 = insertelement <1 x i32> zeroinitializer, i32 %162, i64 0, !dbg !79
%164 = extractelement <1 x i32> %163, i64 0, !dbg !79
store i32 %164, ptr %40, align 4, !dbg !79
ret i32 0, !dbg !79
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1
; Function Attrs: uwtable
define dso_local dllexport ptr @iree_hal_executable_library_query(i32 %0, ptr %1) #2 {
entry:
%2 = icmp eq i32 %0, 3
%3 = select i1 %2, ptr @iree_hal_executable_library_query_v0, ptr null
ret ptr %3
}
attributes #0 = { "frame-pointer"="all" "hot" "no-builtins" "nonlazybind" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
attributes #2 = { uwtable "nonlazybind" }
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C17, file: !2, producer: "IREE", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!2 = !DIFile(filename: "-", directory: "")
!3 = distinct !DISubprogram(name: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", linkageName: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", scope: !2, file: !2, line: 1, type: !4, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1)
!4 = !DISubroutineType(cc: DW_CC_normal, types: !5)
!5 = !{!6, !7, !38, !67}
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
!8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_environment_v0_t", baseType: !10)
!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_environment_v0_t", scope: !11, file: !11, line: 246, size: 768, elements: !12)
!11 = !DIFile(filename: "runtime/src/iree/hal/local/executable_library.h", directory: ".")
!12 = !{!13, !21, !24, !27, !29}
!13 = !DIDerivedType(tag: DW_TAG_member, name: "constants", baseType: !14, size: 64)
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64)
!15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !16)
!16 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !17, size: 2048, elements: !19)
!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", baseType: !18)
!18 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
!19 = !{!20}
!20 = !DISubrange(count: 64)
!21 = !DIDerivedType(tag: DW_TAG_member, name: "import_thunk", baseType: !22, size: 64, offset: 64)
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64)
!23 = !DIBasicType(name: "void", encoding: DW_ATE_address)
!24 = !DIDerivedType(tag: DW_TAG_member, name: "import_funcs", baseType: !25, size: 64, offset: 128)
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64)
!26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22)
!27 = !DIDerivedType(tag: DW_TAG_member, name: "import_contexts", baseType: !28, size: 64, offset: 192)
!28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64)
!29 = !DIDerivedType(tag: DW_TAG_member, name: "processor", baseType: !30, offset: 256)
!30 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_processor_v0_t", scope: !11, file: !11, line: 227, size: 512, elements: !31)
!31 = !{!32}
!32 = !DIDerivedType(tag: DW_TAG_member, name: "data", baseType: !33)
!33 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !34, size: 512, elements: !36)
!34 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", baseType: !35)
!35 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned)
!36 = !{!37}
!37 = !DISubrange(count: 8)
!38 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !39, size: 64)
!39 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !40)
!40 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_dispatch_state_v0_t", baseType: !41)
!41 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_dispatch_state_v0_t", scope: !11, file: !11, line: 275, size: 384, elements: !42)
!42 = !{!43, !44, !45, !48, !49, !50, !51, !52, !55, !56, !57, !62}
!43 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_x", baseType: !17, size: 32)
!44 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_y", baseType: !17, size: 32, offset: 32)
!45 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_z", baseType: !46, size: 16, offset: 64)
!46 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", baseType: !47)
!47 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
!48 = !DIDerivedType(tag: DW_TAG_member, name: "push_constant_count", baseType: !46, size: 16, offset: 80)
!49 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_x", baseType: !17, size: 32, offset: 96)
!50 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_y", baseType: !17, size: 32, offset: 128)
!51 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_z", baseType: !46, size: 16, offset: 160)
!52 = !DIDerivedType(tag: DW_TAG_member, name: "max_concurrency", baseType: !53, size: 8, offset: 176)
!53 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", baseType: !54)
!54 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
!55 = !DIDerivedType(tag: DW_TAG_member, name: "binding_count", baseType: !53, size: 8, offset: 184)
!56 = !DIDerivedType(tag: DW_TAG_member, name: "push_constants", baseType: !14, size: 64, offset: 192)
!57 = !DIDerivedType(tag: DW_TAG_member, name: "binding_ptrs", baseType: !58, size: 64, offset: 256)
!58 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !59, size: 64)
!59 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !60)
!60 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !61, size: 4096, elements: !19)
!61 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !53, size: 64)
!62 = !DIDerivedType(tag: DW_TAG_member, name: "binding_lengths", baseType: !63, size: 64, offset: 320)
!63 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !64, size: 64)
!64 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !65)
!65 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !66, size: 4096, elements: !19)
!66 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", baseType: !34)
!67 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !68, size: 64)
!68 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !69)
!69 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_workgroup_state_v0_t", baseType: !70)
!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_workgroup_state_v0_t", scope: !11, file: !11, line: 321, size: 256, elements: !71)
!71 = !{!72, !73, !74, !75, !76, !77, !78}
!72 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_x", baseType: !17, size: 32)
!73 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_y", baseType: !17, size: 32, offset: 32)
!74 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_z", baseType: !46, size: 16, offset: 64)
!75 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", baseType: !46, size: 16, offset: 80)
!76 = !DIDerivedType(tag: DW_TAG_member, name: "processor_id", baseType: !17, size: 32, offset: 96)
!77 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory", baseType: !22, size: 64, offset: 128)
!78 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory_size", baseType: !17, size: 32, offset: 192)
!79 = !DILocation(line: 7, column: 10, scope: !3, inlinedAt: !80)
!80 = !DILocation(line: 2, column: 3, scope: !3)
; ModuleID = 'files/module_pipeline_dispatch_0_embedded_elf_arm_64.codegen.bc'
source_filename = "pipeline_dispatch_0"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-unknown-eabi-elf"
%iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 }
%iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 }
%iree_hal_executable_src_loc_v0_t = type { i32, i32, ptr }
%iree_hal_executable_library_v0_t = type { ptr, %iree_hal_executable_import_table_v0_t, %iree_hal_executable_export_table_v0_t, %iree_hal_executable_constant_table_v0_t }
%iree_hal_executable_import_table_v0_t = type { i32, ptr }
%iree_hal_executable_export_table_v0_t = type { i32, ptr, ptr, ptr, ptr, ptr }
%iree_hal_executable_constant_table_v0_t = type { i32 }
%iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr }
%iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, ptr, i32 }
@0 = private constant [20 x i8] c"pipeline_dispatch_0\00", align 1
@iree_hal_executable_library_query_v0_header = private constant %iree_hal_executable_library_header_t { i32 3, ptr @0, i32 0, i32 0 }
@iree_hal_executable_library_query_v0_funcs = private constant [1 x ptr] [ptr @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32]
@iree_hal_executable_library_query_v0_attrs = private constant [1 x %iree_hal_executable_dispatch_attrs_v0_t] zeroinitializer
@1 = private constant [65 x i8] c"pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32\00", align 1
@iree_hal_executable_library_query_v0_names = private constant [1 x ptr] [ptr @1]
@2 = private constant [1 x i8] zeroinitializer, align 1
@iree_hal_executable_library_query_v0_tags = private constant [1 x ptr] [ptr @2]
@3 = private constant [64 x i8] c"/home/andwar02/work/VOSA//test_standalone_ops/conv2d_plain.mlir\00", align 1
@iree_hal_executable_library_query_v0_src_locs = private constant [1 x %iree_hal_executable_src_loc_v0_t] [%iree_hal_executable_src_loc_v0_t { i32 2, i32 63, ptr @3 }]
@iree_hal_executable_library_query_v0 = private constant %iree_hal_executable_library_v0_t { ptr @iree_hal_executable_library_query_v0_header, %iree_hal_executable_import_table_v0_t zeroinitializer, %iree_hal_executable_export_table_v0_t { i32 1, ptr @iree_hal_executable_library_query_v0_funcs, ptr @iree_hal_executable_library_query_v0_attrs, ptr @iree_hal_executable_library_query_v0_names, ptr @iree_hal_executable_library_query_v0_tags, ptr @iree_hal_executable_library_query_v0_src_locs }, %iree_hal_executable_constant_table_v0_t zeroinitializer }
declare ptr @malloc(i64) #0
declare void @free(ptr) #0
define internal i32 @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32(ptr noalias nonnull align 16 %0, ptr noalias nonnull align 16 %1, ptr noalias nonnull align 16 %2) #0 !dbg !3 {
%4 = alloca i32, i64 1, align 64, !dbg !79
%5 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %4, 0, !dbg !79
%6 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %5, ptr %4, 1, !dbg !79
%7 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %6, i64 0, 2, !dbg !79
%8 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %7, i64 1, 3, 0, !dbg !79
%9 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %8, i64 1, 3, 1, !dbg !79
%10 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %9, i64 1, 3, 2, !dbg !79
%11 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %10, i64 1, 4, 0, !dbg !79
%12 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %11, i64 1, 4, 1, !dbg !79
%13 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %12, i64 1, 4, 2, !dbg !79
%14 = alloca i32, i64 3, align 64, !dbg !79
%15 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80
%16 = extractvalue %iree_hal_executable_dispatch_state_v0_t %15, 10, !dbg !80
%17 = load ptr, ptr %16, align 8, !dbg !80
%18 = ptrtoint ptr %17 to i64, !dbg !80
%19 = and i64 %18, 63, !dbg !80
%20 = icmp eq i64 %19, 0, !dbg !80
call void @llvm.assume(i1 %20), !dbg !80
%21 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80
%22 = extractvalue %iree_hal_executable_dispatch_state_v0_t %21, 10, !dbg !80
%23 = getelementptr ptr, ptr %22, i32 1, !dbg !80
%24 = load ptr, ptr %23, align 8, !dbg !80
%25 = ptrtoint ptr %24 to i64, !dbg !80
%26 = and i64 %25, 63, !dbg !80
%27 = icmp eq i64 %26, 0, !dbg !80
call void @llvm.assume(i1 %27), !dbg !80
%28 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !79
%29 = extractvalue %iree_hal_executable_dispatch_state_v0_t %28, 10, !dbg !79
%30 = getelementptr ptr, ptr %29, i32 2, !dbg !79
%31 = load ptr, ptr %30, align 8, !dbg !79
%32 = ptrtoint ptr %31 to i64, !dbg !79
%33 = and i64 %32, 63, !dbg !79
%34 = icmp eq i64 %33, 0, !dbg !79
call void @llvm.assume(i1 %34), !dbg !79
%35 = load %iree_hal_executable_workgroup_state_v0_t, ptr %2, align 8, !dbg !79
%36 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 0, !dbg !79
%37 = zext i32 %36 to i64, !dbg !79
%38 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 1, !dbg !79
%39 = zext i32 %38 to i64, !dbg !79
br label %40, !dbg !79
40: ; preds = %159, %3
%41 = phi i64 [ %160, %159 ], [ 0, %3 ]
%42 = icmp slt i64 %41, 2, !dbg !79
br i1 %42, label %43, label %161, !dbg !79
43: ; preds = %144, %40
%44 = phi i64 [ %158, %144 ], [ 0, %40 ]
%45 = icmp slt i64 %44, 5, !dbg !79
br i1 %45, label %46, label %159, !dbg !79
46: ; preds = %43
%47 = mul i64 %39, 2, !dbg !79
%48 = add i64 %41, %47, !dbg !79
%49 = mul i64 %37, 5, !dbg !79
%50 = add i64 %44, %49, !dbg !79
%51 = mul i64 %48, 20, !dbg !79
%52 = add i64 0, %51, !dbg !79
%53 = add i64 %52, %50, !dbg !79
%54 = add i64 %53, 0, !dbg !79
%55 = getelementptr i32, ptr %31, i64 %54, !dbg !79
store i32 0, ptr %55, align 4, !dbg !79
%56 = mul i64 %41, 20, !dbg !79
%57 = mul i64 %39, 40, !dbg !79
%58 = add i64 %56, %57, !dbg !79
%59 = add i64 %58, %44, !dbg !79
%60 = add i64 %59, %49, !dbg !79
%61 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %31, 0, !dbg !79
%62 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %61, ptr %31, 1, !dbg !79
%63 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %62, i64 %60, 2, !dbg !79
%64 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %63, i64 1, 3, 0, !dbg !79
%65 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %64, i64 200, 4, 0, !dbg !79
%66 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %65, i64 1, 3, 1, !dbg !79
%67 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %66, i64 20, 4, 1, !dbg !79
%68 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %67, i64 1, 3, 2, !dbg !79
%69 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %68, i64 1, 4, 2, !dbg !79
br label %70, !dbg !79
70: ; preds = %91, %46
%71 = phi i64 [ %143, %91 ], [ 0, %46 ]
%72 = phi { ptr, ptr, i64, [3 x i64], [3 x i64] } [ %13, %91 ], [ %69, %46 ]
%73 = icmp slt i64 %71, 9, !dbg !79
br i1 %73, label %74, label %144, !dbg !79
74: ; preds = %77, %70
%75 = phi i64 [ %90, %77 ], [ 0, %70 ]
%76 = icmp slt i64 %75, 3, !dbg !79
br i1 %76, label %77, label %91, !dbg !79
77: ; preds = %74
%78 = add i64 %49, %44, !dbg !79
%79 = add i64 %78, %71, !dbg !79
%80 = add i64 %79, %75, !dbg !79
%81 = mul i64 %48, 28, !dbg !79
%82 = add i64 0, %81, !dbg !79
%83 = add i64 %82, %80, !dbg !79
%84 = add i64 %83, 0, !dbg !79
%85 = getelementptr i32, ptr %17, i64 %84, !dbg !79
%86 = load i32, ptr %85, align 4, !dbg !79
%87 = add i64 0, %75, !dbg !79
%88 = add i64 %87, 0, !dbg !79
%89 = getelementptr i32, ptr %14, i64 %88, !dbg !79
store i32 %86, ptr %89, align 4, !dbg !79
%90 = add i64 %75, 1, !dbg !79
br label %74, !dbg !79
91: ; preds = %74
%92 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 1, !dbg !79
%93 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 2, !dbg !79
%94 = getelementptr i32, ptr %92, i64 %93, !dbg !79
%95 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 0, !dbg !79
%96 = mul i64 %95, 0, !dbg !79
%97 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 1, !dbg !79
%98 = mul i64 %97, 0, !dbg !79
%99 = add i64 %96, %98, !dbg !79
%100 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 2, !dbg !79
%101 = mul i64 %100, 0, !dbg !79
%102 = add i64 %99, %101, !dbg !79
%103 = getelementptr i32, ptr %94, i64 %102, !dbg !79
%104 = load i32, ptr %103, align 4, !dbg !79
%105 = getelementptr i32, ptr %4, i64 0, !dbg !79
store i32 %104, ptr %105, align 4, !dbg !79
%106 = load <3 x i32>, ptr %14, align 4, !dbg !79
%107 = load i32, ptr %105, align 4, !dbg !79
%108 = insertelement <1 x i32> undef, i32 %107, i32 0, !dbg !79
%109 = extractelement <1 x i32> %108, i64 0, !dbg !79
%110 = insertelement <1 x i32> zeroinitializer, i32 %109, i64 0, !dbg !79
%111 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> zeroinitializer, !dbg !79
%112 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> <i32 1>, !dbg !79
%113 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> <i32 2>, !dbg !79
%114 = add i64 0, %71, !dbg !79
%115 = add i64 %114, 0, !dbg !79
%116 = getelementptr i32, ptr %24, i64 %115, !dbg !79
%117 = load i32, ptr %116, align 4, !dbg !79
%118 = add i64 %71, 1, !dbg !79
%119 = add i64 0, %118, !dbg !79
%120 = add i64 %119, 0, !dbg !79
%121 = getelementptr i32, ptr %24, i64 %120, !dbg !79
%122 = load i32, ptr %121, align 4, !dbg !79
%123 = add i64 %71, 2, !dbg !79
%124 = add i64 0, %123, !dbg !79
%125 = add i64 %124, 0, !dbg !79
%126 = getelementptr i32, ptr %24, i64 %125, !dbg !79
%127 = load i32, ptr %126, align 4, !dbg !79
%128 = insertelement <1 x i32> undef, i32 %117, i32 0, !dbg !79
%129 = shufflevector <1 x i32> %128, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79
%130 = mul <1 x i32> %111, %129, !dbg !79
%131 = add <1 x i32> %130, %110, !dbg !79
%132 = insertelement <1 x i32> undef, i32 %122, i32 0, !dbg !79
%133 = shufflevector <1 x i32> %132, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79
%134 = mul <1 x i32> %112, %133, !dbg !79
%135 = add <1 x i32> %134, %131, !dbg !79
%136 = insertelement <1 x i32> undef, i32 %127, i32 0, !dbg !79
%137 = shufflevector <1 x i32> %136, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79
%138 = mul <1 x i32> %113, %137, !dbg !79
%139 = add <1 x i32> %138, %135, !dbg !79
%140 = extractelement <1 x i32> %139, i64 0, !dbg !79
%141 = insertelement <1 x i32> zeroinitializer, i32 %140, i64 0, !dbg !79
%142 = extractelement <1 x i32> %141, i64 0, !dbg !79
store i32 %142, ptr %105, align 4, !dbg !79
%143 = add i64 %71, 3, !dbg !79
br label %70, !dbg !79
144: ; preds = %70
%145 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 1, !dbg !79
%146 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 2, !dbg !79
%147 = getelementptr i32, ptr %145, i64 %146, !dbg !79
%148 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 0, !dbg !79
%149 = mul i64 %148, 0, !dbg !79
%150 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 1, !dbg !79
%151 = mul i64 %150, 0, !dbg !79
%152 = add i64 %149, %151, !dbg !79
%153 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 2, !dbg !79
%154 = mul i64 %153, 0, !dbg !79
%155 = add i64 %152, %154, !dbg !79
%156 = getelementptr i32, ptr %147, i64 %155, !dbg !79
%157 = load i32, ptr %156, align 4, !dbg !79
store i32 %157, ptr %55, align 4, !dbg !79
%158 = add i64 %44, 1, !dbg !79
br label %43, !dbg !79
159: ; preds = %43
%160 = add i64 %41, 1, !dbg !79
br label %40, !dbg !79
161: ; preds = %40
ret i32 0, !dbg !79
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1
; Function Attrs: uwtable
define dso_local dllexport ptr @iree_hal_executable_library_query(i32 %0, ptr %1) #2 {
entry:
%2 = icmp eq i32 %0, 3
%3 = select i1 %2, ptr @iree_hal_executable_library_query_v0, ptr null
ret ptr %3
}
attributes #0 = { "frame-pointer"="all" "hot" "no-builtins" "nonlazybind" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
attributes #2 = { uwtable "nonlazybind" }
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C17, file: !2, producer: "IREE", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!2 = !DIFile(filename: "-", directory: "")
!3 = distinct !DISubprogram(name: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", linkageName: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", scope: !2, file: !2, line: 1, type: !4, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1)
!4 = !DISubroutineType(cc: DW_CC_normal, types: !5)
!5 = !{!6, !7, !38, !67}
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
!8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_environment_v0_t", baseType: !10)
!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_environment_v0_t", scope: !11, file: !11, line: 246, size: 768, elements: !12)
!11 = !DIFile(filename: "runtime/src/iree/hal/local/executable_library.h", directory: ".")
!12 = !{!13, !21, !24, !27, !29}
!13 = !DIDerivedType(tag: DW_TAG_member, name: "constants", baseType: !14, size: 64)
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64)
!15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !16)
!16 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !17, size: 2048, elements: !19)
!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", baseType: !18)
!18 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
!19 = !{!20}
!20 = !DISubrange(count: 64)
!21 = !DIDerivedType(tag: DW_TAG_member, name: "import_thunk", baseType: !22, size: 64, offset: 64)
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64)
!23 = !DIBasicType(name: "void", encoding: DW_ATE_address)
!24 = !DIDerivedType(tag: DW_TAG_member, name: "import_funcs", baseType: !25, size: 64, offset: 128)
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64)
!26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22)
!27 = !DIDerivedType(tag: DW_TAG_member, name: "import_contexts", baseType: !28, size: 64, offset: 192)
!28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64)
!29 = !DIDerivedType(tag: DW_TAG_member, name: "processor", baseType: !30, offset: 256)
!30 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_processor_v0_t", scope: !11, file: !11, line: 227, size: 512, elements: !31)
!31 = !{!32}
!32 = !DIDerivedType(tag: DW_TAG_member, name: "data", baseType: !33)
!33 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !34, size: 512, elements: !36)
!34 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", baseType: !35)
!35 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned)
!36 = !{!37}
!37 = !DISubrange(count: 8)
!38 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !39, size: 64)
!39 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !40)
!40 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_dispatch_state_v0_t", baseType: !41)
!41 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_dispatch_state_v0_t", scope: !11, file: !11, line: 275, size: 384, elements: !42)
!42 = !{!43, !44, !45, !48, !49, !50, !51, !52, !55, !56, !57, !62}
!43 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_x", baseType: !17, size: 32)
!44 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_y", baseType: !17, size: 32, offset: 32)
!45 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_z", baseType: !46, size: 16, offset: 64)
!46 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", baseType: !47)
!47 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
!48 = !DIDerivedType(tag: DW_TAG_member, name: "push_constant_count", baseType: !46, size: 16, offset: 80)
!49 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_x", baseType: !17, size: 32, offset: 96)
!50 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_y", baseType: !17, size: 32, offset: 128)
!51 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_z", baseType: !46, size: 16, offset: 160)
!52 = !DIDerivedType(tag: DW_TAG_member, name: "max_concurrency", baseType: !53, size: 8, offset: 176)
!53 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", baseType: !54)
!54 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
!55 = !DIDerivedType(tag: DW_TAG_member, name: "binding_count", baseType: !53, size: 8, offset: 184)
!56 = !DIDerivedType(tag: DW_TAG_member, name: "push_constants", baseType: !14, size: 64, offset: 192)
!57 = !DIDerivedType(tag: DW_TAG_member, name: "binding_ptrs", baseType: !58, size: 64, offset: 256)
!58 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !59, size: 64)
!59 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !60)
!60 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !61, size: 4096, elements: !19)
!61 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !53, size: 64)
!62 = !DIDerivedType(tag: DW_TAG_member, name: "binding_lengths", baseType: !63, size: 64, offset: 320)
!63 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !64, size: 64)
!64 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !65)
!65 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !66, size: 4096, elements: !19)
!66 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", baseType: !34)
!67 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !68, size: 64)
!68 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !69)
!69 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_workgroup_state_v0_t", baseType: !70)
!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_workgroup_state_v0_t", scope: !11, file: !11, line: 321, size: 256, elements: !71)
!71 = !{!72, !73, !74, !75, !76, !77, !78}
!72 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_x", baseType: !17, size: 32)
!73 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_y", baseType: !17, size: 32, offset: 32)
!74 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_z", baseType: !46, size: 16, offset: 64)
!75 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", baseType: !46, size: 16, offset: 80)
!76 = !DIDerivedType(tag: DW_TAG_member, name: "processor_id", baseType: !17, size: 32, offset: 96)
!77 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory", baseType: !22, size: 64, offset: 128)
!78 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory_size", baseType: !17, size: 32, offset: 192)
!79 = !DILocation(line: 7, column: 10, scope: !3, inlinedAt: !80)
!80 = !DILocation(line: 2, column: 3, scope: !3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment