Last active
September 21, 2023 15:05
-
-
Save banach-space/3e2c8154a19d075efb609a03f25bd743 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After CleanupBufferAllocView (iree-codegen-cleanup-buffer-alloc-view) //----- // | |
func.func @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32() { | |
%c0_i32 = arith.constant 0 : i32 | |
%c10 = arith.constant 10 : index | |
%c20 = arith.constant 20 : index | |
%c0 = arith.constant 0 : index | |
%c1 = arith.constant 1 : index | |
%c2 = arith.constant 2 : index | |
%c5 = arith.constant 5 : index | |
%c3 = arith.constant 3 : index | |
%c9 = arith.constant 9 : index | |
%cst = arith.constant dense<0> : vector<1xi32> | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> | |
%alloca_0 = memref.alloca() {alignment = 64 : i64} : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %0, 64 : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %1, 64 : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %2, 64 : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_id_y] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_count_y] | |
%5 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_id_x] | |
%6 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c10 step %4 { | |
scf.for %arg1 = %5 to %c20 step %6 { | |
%subview = memref.subview %2[0, %arg0, %arg1, 0] [1, 2, 5, 1] [1, 1, 1, 1] : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_1 = memref.subview %0[0, %arg0, %arg1, 0] [1, 2, 13, 1] [1, 1, 1, 1] : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg2 = %c0 to %c2 step %c1 { | |
scf.for %arg3 = %c0 to %c5 step %c1 { | |
%subview_2 = memref.subview %subview_1[0, %arg2, %arg3, 0] [1, 1, 9, 1] [1, 1, 1, 1] : memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_3 = memref.subview %subview[0, %arg2, %arg3, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
vector.transfer_write %cst, %subview_3[%c0, %c0, %c0, %c0] {in_bounds = [true]} : vector<1xi32>, memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_4 = memref.subview %subview_3[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%cast = memref.cast %subview_4 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%7 = scf.for %arg4 = %c0 to %c9 step %c3 iter_args(%arg5 = %cast) -> (memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
%subview_5 = memref.subview %subview_2[0, 0, %arg4, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_6 = memref.subview %1[0, %arg4, 0] [1, 3, 1] [1, 1, 1] : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_7 = memref.subview %subview_5[0, 0, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_8 = memref.subview %subview_6[0, 0, 0] [1, 3, 1] [1, 1, 1] : memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%subview_7 : memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca_0 : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: i32, %out: i32): | |
linalg.yield %in : i32 | |
} | |
%collapse_shape = memref.collapse_shape %alloca_0 [[0, 1, 2]] : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> into memref<3xi32, #hal.descriptor_type<storage_buffer>> | |
%collapse_shape_9 = memref.collapse_shape %subview_8 [[0, 1]] : memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> into memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg5 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: i32, %out: i32): | |
linalg.yield %in : i32 | |
} | |
%collapse_shape_10 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> into memref<1xi32, #hal.descriptor_type<storage_buffer>> | |
%8 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, #hal.descriptor_type<storage_buffer>>, vector<3xi32> | |
%9 = vector.transfer_read %collapse_shape_9[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<3xi32> | |
%10 = vector.transfer_read %collapse_shape_10[%c0], %c0_i32 {in_bounds = [true]} : memref<1xi32, #hal.descriptor_type<storage_buffer>>, vector<1xi32> | |
%11 = vector.extract_strided_slice %8 {offsets = [0], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
%12 = vector.extract_strided_slice %8 {offsets = [1], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
%13 = vector.extract_strided_slice %8 {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
%14 = vector.extract %9[0] : vector<3xi32> | |
%15 = vector.extract %9[1] : vector<3xi32> | |
%16 = vector.extract %9[2] : vector<3xi32> | |
%17 = vector.outerproduct %11, %14, %10 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
%18 = vector.outerproduct %12, %15, %17 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
%19 = vector.outerproduct %13, %16, %18 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
vector.transfer_write %19, %collapse_shape_10[%c0] {in_bounds = [true]} : vector<1xi32>, memref<1xi32, #hal.descriptor_type<storage_buffer>> | |
%cast_11 = memref.cast %alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.yield %cast_11 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%7 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_4 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: i32, %out: i32): | |
linalg.yield %in : i32 | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After OptimizeVectorTransfer (iree-codegen-optimize-vector-transfer) //----- // | |
func.func @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32() { | |
%c0_i32 = arith.constant 0 : i32 | |
%c10 = arith.constant 10 : index | |
%c20 = arith.constant 20 : index | |
%c0 = arith.constant 0 : index | |
%c1 = arith.constant 1 : index | |
%c2 = arith.constant 2 : index | |
%c5 = arith.constant 5 : index | |
%c3 = arith.constant 3 : index | |
%c9 = arith.constant 9 : index | |
%cst = arith.constant dense<0> : vector<1xi32> | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> | |
%alloca_0 = memref.alloca() {alignment = 64 : i64} : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %0, 64 : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %1, 64 : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %2, 64 : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_id_y] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%workgroup_count_y] | |
%5 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_id_x] | |
%6 = affine.apply affine_map<()[s0] -> (s0 * 5)>()[%workgroup_count_x] | |
%collapse_shape = memref.collapse_shape %alloca_0 [[0, 1, 2]] : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>> into memref<3xi32, #hal.descriptor_type<storage_buffer>> | |
%collapse_shape_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> into memref<1xi32, #hal.descriptor_type<storage_buffer>> | |
%cast = memref.cast %alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview = memref.subview %collapse_shape_1[0] [1] [1] : memref<1xi32, #hal.descriptor_type<storage_buffer>> to memref<i32, #hal.descriptor_type<storage_buffer>> | |
%7 = vector.transfer_read %subview[], %c0_i32 : memref<i32, #hal.descriptor_type<storage_buffer>>, vector<i32> | |
%8 = vector.shape_cast %7 : vector<i32> to vector<1xi32> | |
%9 = scf.for %arg0 = %3 to %c10 step %4 iter_args(%arg1 = %8) -> (vector<1xi32>) { | |
%11 = scf.for %arg2 = %5 to %c20 step %6 iter_args(%arg3 = %arg1) -> (vector<1xi32>) { | |
%subview_3 = memref.subview %2[0, %arg0, %arg2, 0] [1, 2, 5, 1] [1, 1, 1, 1] : memref<1x10x20x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_4 = memref.subview %0[0, %arg0, %arg2, 0] [1, 2, 13, 1] [1, 1, 1, 1] : memref<1x10x28x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%12 = scf.for %arg4 = %c0 to %c2 step %c1 iter_args(%arg5 = %arg3) -> (vector<1xi32>) { | |
%13 = scf.for %arg6 = %c0 to %c5 step %c1 iter_args(%arg7 = %arg5) -> (vector<1xi32>) { | |
%subview_5 = memref.subview %subview_4[0, %arg4, %arg6, 0] [1, 1, 9, 1] [1, 1, 1, 1] : memref<1x2x13x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_6 = memref.subview %subview_3[0, %arg4, %arg6, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x2x5x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_7 = memref.subview %subview_6[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<i32, strided<[], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%14 = vector.shape_cast %cst : vector<1xi32> to vector<i32> | |
vector.transfer_write %14, %subview_7[] : vector<i32>, memref<i32, strided<[], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_8 = memref.subview %subview_6[0, 0, 0, 0] [1, 1, 1, 1] [1, 1, 1, 1] : memref<1x1x1x1xi32, strided<[200, 20, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%cast_9 = memref.cast %subview_8 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%15:2 = scf.for %arg8 = %c0 to %c9 step %c3 iter_args(%arg9 = %cast_9, %arg10 = %arg7) -> (memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<1xi32>) { | |
%subview_10 = memref.subview %subview_5[0, 0, %arg8, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x9x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_11 = memref.subview %1[0, %arg8, 0] [1, 3, 1] [1, 1, 1] : memref<1x9x1xi32, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_12 = memref.subview %subview_10[0, 0, 0, 0] [1, 1, 3, 1] [1, 1, 1, 1] : memref<1x1x3x1xi32, strided<[280, 28, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_13 = memref.subview %subview_11[0, 0, 0] [1, 3, 1] [1, 1, 1] : memref<1x3x1xi32, strided<[9, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%subview_12 : memref<1x3x1xi32, strided<[280, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca_0 : memref<1x3x1xi32, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: i32, %out: i32): | |
linalg.yield %in : i32 | |
} | |
%collapse_shape_14 = memref.collapse_shape %subview_13 [[0, 1]] : memref<3x1xi32, strided<[1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> into memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg9 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca : memref<1x1x1xi32, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: i32, %out: i32): | |
linalg.yield %in : i32 | |
} | |
%16 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, #hal.descriptor_type<storage_buffer>>, vector<3xi32> | |
%17 = vector.transfer_read %collapse_shape_14[%c0], %c0_i32 {in_bounds = [true]} : memref<3xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<3xi32> | |
%18 = vector.extract_strided_slice %16 {offsets = [0], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
%19 = vector.extract_strided_slice %16 {offsets = [1], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
%20 = vector.extract_strided_slice %16 {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> | |
%21 = vector.extract %17[0] : vector<3xi32> | |
%22 = vector.extract %17[1] : vector<3xi32> | |
%23 = vector.extract %17[2] : vector<3xi32> | |
%24 = vector.outerproduct %18, %21, %arg10 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
%25 = vector.outerproduct %19, %22, %24 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
%26 = vector.outerproduct %20, %23, %25 {kind = #vector.kind<add>} : vector<1xi32>, i32 | |
scf.yield %cast, %26 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<1xi32> | |
} | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%15#0 : memref<1x1x1xi32, strided<[?, ?, ?], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_8 : memref<1x1x1xi32, strided<[200, 20, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: i32, %out: i32): | |
linalg.yield %in : i32 | |
} | |
scf.yield %15#1 : vector<1xi32> | |
} | |
scf.yield %13 : vector<1xi32> | |
} | |
scf.yield %12 : vector<1xi32> | |
} | |
scf.yield %11 : vector<1xi32> | |
} | |
%subview_2 = memref.subview %collapse_shape_1[0] [1] [1] : memref<1xi32, #hal.descriptor_type<storage_buffer>> to memref<i32, #hal.descriptor_type<storage_buffer>> | |
%10 = vector.shape_cast %9 : vector<1xi32> to vector<i32> | |
vector.transfer_write %10, %subview_2[] : vector<i32>, memref<i32, #hal.descriptor_type<storage_buffer>> | |
return | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'files/module_pipeline_dispatch_0_embedded_elf_arm_64.codegen.bc' | |
source_filename = "pipeline_dispatch_0" | |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" | |
target triple = "aarch64-unknown-unknown-eabi-elf" | |
%iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 } | |
%iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 } | |
%iree_hal_executable_src_loc_v0_t = type { i32, i32, ptr } | |
%iree_hal_executable_library_v0_t = type { ptr, %iree_hal_executable_import_table_v0_t, %iree_hal_executable_export_table_v0_t, %iree_hal_executable_constant_table_v0_t } | |
%iree_hal_executable_import_table_v0_t = type { i32, ptr } | |
%iree_hal_executable_export_table_v0_t = type { i32, ptr, ptr, ptr, ptr, ptr } | |
%iree_hal_executable_constant_table_v0_t = type { i32 } | |
%iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr } | |
%iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, ptr, i32 } | |
@0 = private constant [20 x i8] c"pipeline_dispatch_0\00", align 1 | |
@iree_hal_executable_library_query_v0_header = private constant %iree_hal_executable_library_header_t { i32 3, ptr @0, i32 0, i32 0 } | |
@iree_hal_executable_library_query_v0_funcs = private constant [1 x ptr] [ptr @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32] | |
@iree_hal_executable_library_query_v0_attrs = private constant [1 x %iree_hal_executable_dispatch_attrs_v0_t] zeroinitializer | |
@1 = private constant [65 x i8] c"pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32\00", align 1 | |
@iree_hal_executable_library_query_v0_names = private constant [1 x ptr] [ptr @1] | |
@2 = private constant [1 x i8] zeroinitializer, align 1 | |
@iree_hal_executable_library_query_v0_tags = private constant [1 x ptr] [ptr @2] | |
@3 = private constant [64 x i8] c"/home/andwar02/work/VOSA//test_standalone_ops/conv2d_plain.mlir\00", align 1 | |
@iree_hal_executable_library_query_v0_src_locs = private constant [1 x %iree_hal_executable_src_loc_v0_t] [%iree_hal_executable_src_loc_v0_t { i32 2, i32 63, ptr @3 }] | |
@iree_hal_executable_library_query_v0 = private constant %iree_hal_executable_library_v0_t { ptr @iree_hal_executable_library_query_v0_header, %iree_hal_executable_import_table_v0_t zeroinitializer, %iree_hal_executable_export_table_v0_t { i32 1, ptr @iree_hal_executable_library_query_v0_funcs, ptr @iree_hal_executable_library_query_v0_attrs, ptr @iree_hal_executable_library_query_v0_names, ptr @iree_hal_executable_library_query_v0_tags, ptr @iree_hal_executable_library_query_v0_src_locs }, %iree_hal_executable_constant_table_v0_t zeroinitializer } | |
declare ptr @malloc(i64) #0 | |
declare void @free(ptr) #0 | |
define internal i32 @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32(ptr noalias nonnull align 16 %0, ptr noalias nonnull align 16 %1, ptr noalias nonnull align 16 %2) #0 !dbg !3 { | |
%4 = alloca i32, i64 1, align 64, !dbg !79 | |
%5 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %4, 0, !dbg !79 | |
%6 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %5, ptr %4, 1, !dbg !79 | |
%7 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %6, i64 0, 2, !dbg !79 | |
%8 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %7, i64 1, 3, 0, !dbg !79 | |
%9 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %8, i64 1, 3, 1, !dbg !79 | |
%10 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %9, i64 1, 3, 2, !dbg !79 | |
%11 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %10, i64 1, 4, 0, !dbg !79 | |
%12 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %11, i64 1, 4, 1, !dbg !79 | |
%13 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %12, i64 1, 4, 2, !dbg !79 | |
%14 = alloca i32, i64 3, align 64, !dbg !79 | |
%15 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80 | |
%16 = extractvalue %iree_hal_executable_dispatch_state_v0_t %15, 10, !dbg !80 | |
%17 = load ptr, ptr %16, align 8, !dbg !80 | |
%18 = ptrtoint ptr %17 to i64, !dbg !80 | |
%19 = and i64 %18, 63, !dbg !80 | |
%20 = icmp eq i64 %19, 0, !dbg !80 | |
call void @llvm.assume(i1 %20), !dbg !80 | |
%21 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80 | |
%22 = extractvalue %iree_hal_executable_dispatch_state_v0_t %21, 10, !dbg !80 | |
%23 = getelementptr ptr, ptr %22, i32 1, !dbg !80 | |
%24 = load ptr, ptr %23, align 8, !dbg !80 | |
%25 = ptrtoint ptr %24 to i64, !dbg !80 | |
%26 = and i64 %25, 63, !dbg !80 | |
%27 = icmp eq i64 %26, 0, !dbg !80 | |
call void @llvm.assume(i1 %27), !dbg !80 | |
%28 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !79 | |
%29 = extractvalue %iree_hal_executable_dispatch_state_v0_t %28, 10, !dbg !79 | |
%30 = getelementptr ptr, ptr %29, i32 2, !dbg !79 | |
%31 = load ptr, ptr %30, align 8, !dbg !79 | |
%32 = ptrtoint ptr %31 to i64, !dbg !79 | |
%33 = and i64 %32, 63, !dbg !79 | |
%34 = icmp eq i64 %33, 0, !dbg !79 | |
call void @llvm.assume(i1 %34), !dbg !79 | |
%35 = load %iree_hal_executable_workgroup_state_v0_t, ptr %2, align 8, !dbg !79 | |
%36 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 0, !dbg !79 | |
%37 = zext i32 %36 to i64, !dbg !79 | |
%38 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 1, !dbg !79 | |
%39 = zext i32 %38 to i64, !dbg !79 | |
%40 = getelementptr i32, ptr %4, i64 0, !dbg !79 | |
%41 = load i32, ptr %40, align 4, !dbg !79 | |
%42 = insertelement <1 x i32> undef, i32 %41, i32 0, !dbg !79 | |
%43 = extractelement <1 x i32> %42, i64 0, !dbg !79 | |
%44 = insertelement <1 x i32> zeroinitializer, i32 %43, i64 0, !dbg !79 | |
br label %45, !dbg !79 | |
45: ; preds = %159, %3 | |
%46 = phi i64 [ %160, %159 ], [ 0, %3 ] | |
%47 = phi <1 x i32> [ %51, %159 ], [ %44, %3 ] | |
%48 = icmp slt i64 %46, 2, !dbg !79 | |
br i1 %48, label %49, label %161, !dbg !79 | |
49: ; preds = %144, %45 | |
%50 = phi i64 [ %158, %144 ], [ 0, %45 ] | |
%51 = phi <1 x i32> [ %80, %144 ], [ %47, %45 ] | |
%52 = icmp slt i64 %50, 5, !dbg !79 | |
br i1 %52, label %53, label %159, !dbg !79 | |
53: ; preds = %49 | |
%54 = mul i64 %39, 2, !dbg !79 | |
%55 = add i64 %46, %54, !dbg !79 | |
%56 = mul i64 %37, 5, !dbg !79 | |
%57 = add i64 %50, %56, !dbg !79 | |
%58 = mul i64 %55, 20, !dbg !79 | |
%59 = add i64 0, %58, !dbg !79 | |
%60 = add i64 %59, %57, !dbg !79 | |
%61 = add i64 %60, 0, !dbg !79 | |
%62 = getelementptr i32, ptr %31, i64 %61, !dbg !79 | |
store i32 0, ptr %62, align 4, !dbg !79 | |
%63 = mul i64 %46, 20, !dbg !79 | |
%64 = mul i64 %39, 40, !dbg !79 | |
%65 = add i64 %63, %64, !dbg !79 | |
%66 = add i64 %65, %50, !dbg !79 | |
%67 = add i64 %66, %56, !dbg !79 | |
%68 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %31, 0, !dbg !79 | |
%69 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %68, ptr %31, 1, !dbg !79 | |
%70 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %69, i64 %67, 2, !dbg !79 | |
%71 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %70, i64 1, 3, 0, !dbg !79 | |
%72 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %71, i64 200, 4, 0, !dbg !79 | |
%73 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, i64 1, 3, 1, !dbg !79 | |
%74 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %73, i64 20, 4, 1, !dbg !79 | |
%75 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %74, i64 1, 3, 2, !dbg !79 | |
%76 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %75, i64 1, 4, 2, !dbg !79 | |
br label %77, !dbg !79 | |
77: ; preds = %99, %53 | |
%78 = phi i64 [ %143, %99 ], [ 0, %53 ] | |
%79 = phi { ptr, ptr, i64, [3 x i64], [3 x i64] } [ %13, %99 ], [ %76, %53 ] | |
%80 = phi <1 x i32> [ %142, %99 ], [ %51, %53 ] | |
%81 = icmp slt i64 %78, 9, !dbg !79 | |
br i1 %81, label %82, label %144, !dbg !79 | |
82: ; preds = %85, %77 | |
%83 = phi i64 [ %98, %85 ], [ 0, %77 ] | |
%84 = icmp slt i64 %83, 3, !dbg !79 | |
br i1 %84, label %85, label %99, !dbg !79 | |
85: ; preds = %82 | |
%86 = add i64 %56, %50, !dbg !79 | |
%87 = add i64 %86, %78, !dbg !79 | |
%88 = add i64 %87, %83, !dbg !79 | |
%89 = mul i64 %55, 28, !dbg !79 | |
%90 = add i64 0, %89, !dbg !79 | |
%91 = add i64 %90, %88, !dbg !79 | |
%92 = add i64 %91, 0, !dbg !79 | |
%93 = getelementptr i32, ptr %17, i64 %92, !dbg !79 | |
%94 = load i32, ptr %93, align 4, !dbg !79 | |
%95 = add i64 0, %83, !dbg !79 | |
%96 = add i64 %95, 0, !dbg !79 | |
%97 = getelementptr i32, ptr %14, i64 %96, !dbg !79 | |
store i32 %94, ptr %97, align 4, !dbg !79 | |
%98 = add i64 %83, 1, !dbg !79 | |
br label %82, !dbg !79 | |
99: ; preds = %82 | |
%100 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 1, !dbg !79 | |
%101 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 2, !dbg !79 | |
%102 = getelementptr i32, ptr %100, i64 %101, !dbg !79 | |
%103 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 0, !dbg !79 | |
%104 = mul i64 %103, 0, !dbg !79 | |
%105 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 1, !dbg !79 | |
%106 = mul i64 %105, 0, !dbg !79 | |
%107 = add i64 %104, %106, !dbg !79 | |
%108 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 2, !dbg !79 | |
%109 = mul i64 %108, 0, !dbg !79 | |
%110 = add i64 %107, %109, !dbg !79 | |
%111 = getelementptr i32, ptr %102, i64 %110, !dbg !79 | |
%112 = load i32, ptr %111, align 4, !dbg !79 | |
store i32 %112, ptr %40, align 4, !dbg !79 | |
%113 = load <3 x i32>, ptr %14, align 4, !dbg !79 | |
%114 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> zeroinitializer, !dbg !79 | |
%115 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> <i32 1>, !dbg !79 | |
%116 = shufflevector <3 x i32> %113, <3 x i32> %113, <1 x i32> <i32 2>, !dbg !79 | |
%117 = add i64 0, %78, !dbg !79 | |
%118 = add i64 %117, 0, !dbg !79 | |
%119 = getelementptr i32, ptr %24, i64 %118, !dbg !79 | |
%120 = load i32, ptr %119, align 4, !dbg !79 | |
%121 = add i64 %78, 1, !dbg !79 | |
%122 = add i64 0, %121, !dbg !79 | |
%123 = add i64 %122, 0, !dbg !79 | |
%124 = getelementptr i32, ptr %24, i64 %123, !dbg !79 | |
%125 = load i32, ptr %124, align 4, !dbg !79 | |
%126 = add i64 %78, 2, !dbg !79 | |
%127 = add i64 0, %126, !dbg !79 | |
%128 = add i64 %127, 0, !dbg !79 | |
%129 = getelementptr i32, ptr %24, i64 %128, !dbg !79 | |
%130 = load i32, ptr %129, align 4, !dbg !79 | |
%131 = insertelement <1 x i32> undef, i32 %120, i32 0, !dbg !79 | |
%132 = shufflevector <1 x i32> %131, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
%133 = mul <1 x i32> %114, %132, !dbg !79 | |
%134 = add <1 x i32> %133, %80, !dbg !79 | |
%135 = insertelement <1 x i32> undef, i32 %125, i32 0, !dbg !79 | |
%136 = shufflevector <1 x i32> %135, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
%137 = mul <1 x i32> %115, %136, !dbg !79 | |
%138 = add <1 x i32> %137, %134, !dbg !79 | |
%139 = insertelement <1 x i32> undef, i32 %130, i32 0, !dbg !79 | |
%140 = shufflevector <1 x i32> %139, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
%141 = mul <1 x i32> %116, %140, !dbg !79 | |
%142 = add <1 x i32> %141, %138, !dbg !79 | |
%143 = add i64 %78, 3, !dbg !79 | |
br label %77, !dbg !79 | |
144: ; preds = %77 | |
%145 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 1, !dbg !79 | |
%146 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 2, !dbg !79 | |
%147 = getelementptr i32, ptr %145, i64 %146, !dbg !79 | |
%148 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 0, !dbg !79 | |
%149 = mul i64 %148, 0, !dbg !79 | |
%150 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 1, !dbg !79 | |
%151 = mul i64 %150, 0, !dbg !79 | |
%152 = add i64 %149, %151, !dbg !79 | |
%153 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %79, 4, 2, !dbg !79 | |
%154 = mul i64 %153, 0, !dbg !79 | |
%155 = add i64 %152, %154, !dbg !79 | |
%156 = getelementptr i32, ptr %147, i64 %155, !dbg !79 | |
%157 = load i32, ptr %156, align 4, !dbg !79 | |
store i32 %157, ptr %62, align 4, !dbg !79 | |
%158 = add i64 %50, 1, !dbg !79 | |
br label %49, !dbg !79 | |
159: ; preds = %49 | |
%160 = add i64 %46, 1, !dbg !79 | |
br label %45, !dbg !79 | |
161: ; preds = %45 | |
%162 = extractelement <1 x i32> %47, i64 0, !dbg !79 | |
%163 = insertelement <1 x i32> zeroinitializer, i32 %162, i64 0, !dbg !79 | |
%164 = extractelement <1 x i32> %163, i64 0, !dbg !79 | |
store i32 %164, ptr %40, align 4, !dbg !79 | |
ret i32 0, !dbg !79 | |
} | |
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) | |
declare void @llvm.assume(i1 noundef) #1 | |
; Function Attrs: uwtable | |
define dso_local dllexport ptr @iree_hal_executable_library_query(i32 %0, ptr %1) #2 { | |
entry: | |
%2 = icmp eq i32 %0, 3 | |
%3 = select i1 %2, ptr @iree_hal_executable_library_query_v0, ptr null | |
ret ptr %3 | |
} | |
attributes #0 = { "frame-pointer"="all" "hot" "no-builtins" "nonlazybind" } | |
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } | |
attributes #2 = { uwtable "nonlazybind" } | |
!llvm.module.flags = !{!0} | |
!llvm.dbg.cu = !{!1} | |
!0 = !{i32 2, !"Debug Info Version", i32 3} | |
!1 = distinct !DICompileUnit(language: DW_LANG_C17, file: !2, producer: "IREE", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) | |
!2 = !DIFile(filename: "-", directory: "") | |
!3 = distinct !DISubprogram(name: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", linkageName: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", scope: !2, file: !2, line: 1, type: !4, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1) | |
!4 = !DISubroutineType(cc: DW_CC_normal, types: !5) | |
!5 = !{!6, !7, !38, !67} | |
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) | |
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) | |
!8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9) | |
!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_environment_v0_t", baseType: !10) | |
!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_environment_v0_t", scope: !11, file: !11, line: 246, size: 768, elements: !12) | |
!11 = !DIFile(filename: "runtime/src/iree/hal/local/executable_library.h", directory: ".") | |
!12 = !{!13, !21, !24, !27, !29} | |
!13 = !DIDerivedType(tag: DW_TAG_member, name: "constants", baseType: !14, size: 64) | |
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64) | |
!15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !16) | |
!16 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !17, size: 2048, elements: !19) | |
!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", baseType: !18) | |
!18 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) | |
!19 = !{!20} | |
!20 = !DISubrange(count: 64) | |
!21 = !DIDerivedType(tag: DW_TAG_member, name: "import_thunk", baseType: !22, size: 64, offset: 64) | |
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64) | |
!23 = !DIBasicType(name: "void", encoding: DW_ATE_address) | |
!24 = !DIDerivedType(tag: DW_TAG_member, name: "import_funcs", baseType: !25, size: 64, offset: 128) | |
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64) | |
!26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22) | |
!27 = !DIDerivedType(tag: DW_TAG_member, name: "import_contexts", baseType: !28, size: 64, offset: 192) | |
!28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64) | |
!29 = !DIDerivedType(tag: DW_TAG_member, name: "processor", baseType: !30, offset: 256) | |
!30 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_processor_v0_t", scope: !11, file: !11, line: 227, size: 512, elements: !31) | |
!31 = !{!32} | |
!32 = !DIDerivedType(tag: DW_TAG_member, name: "data", baseType: !33) | |
!33 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !34, size: 512, elements: !36) | |
!34 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", baseType: !35) | |
!35 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned) | |
!36 = !{!37} | |
!37 = !DISubrange(count: 8) | |
!38 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !39, size: 64) | |
!39 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !40) | |
!40 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_dispatch_state_v0_t", baseType: !41) | |
!41 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_dispatch_state_v0_t", scope: !11, file: !11, line: 275, size: 384, elements: !42) | |
!42 = !{!43, !44, !45, !48, !49, !50, !51, !52, !55, !56, !57, !62} | |
!43 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_x", baseType: !17, size: 32) | |
!44 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_y", baseType: !17, size: 32, offset: 32) | |
!45 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_z", baseType: !46, size: 16, offset: 64) | |
!46 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", baseType: !47) | |
!47 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) | |
!48 = !DIDerivedType(tag: DW_TAG_member, name: "push_constant_count", baseType: !46, size: 16, offset: 80) | |
!49 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_x", baseType: !17, size: 32, offset: 96) | |
!50 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_y", baseType: !17, size: 32, offset: 128) | |
!51 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_z", baseType: !46, size: 16, offset: 160) | |
!52 = !DIDerivedType(tag: DW_TAG_member, name: "max_concurrency", baseType: !53, size: 8, offset: 176) | |
!53 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", baseType: !54) | |
!54 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) | |
!55 = !DIDerivedType(tag: DW_TAG_member, name: "binding_count", baseType: !53, size: 8, offset: 184) | |
!56 = !DIDerivedType(tag: DW_TAG_member, name: "push_constants", baseType: !14, size: 64, offset: 192) | |
!57 = !DIDerivedType(tag: DW_TAG_member, name: "binding_ptrs", baseType: !58, size: 64, offset: 256) | |
!58 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !59, size: 64) | |
!59 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !60) | |
!60 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !61, size: 4096, elements: !19) | |
!61 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !53, size: 64) | |
!62 = !DIDerivedType(tag: DW_TAG_member, name: "binding_lengths", baseType: !63, size: 64, offset: 320) | |
!63 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !64, size: 64) | |
!64 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !65) | |
!65 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !66, size: 4096, elements: !19) | |
!66 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", baseType: !34) | |
!67 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !68, size: 64) | |
!68 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !69) | |
!69 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_workgroup_state_v0_t", baseType: !70) | |
!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_workgroup_state_v0_t", scope: !11, file: !11, line: 321, size: 256, elements: !71) | |
!71 = !{!72, !73, !74, !75, !76, !77, !78} | |
!72 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_x", baseType: !17, size: 32) | |
!73 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_y", baseType: !17, size: 32, offset: 32) | |
!74 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_z", baseType: !46, size: 16, offset: 64) | |
!75 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", baseType: !46, size: 16, offset: 80) | |
!76 = !DIDerivedType(tag: DW_TAG_member, name: "processor_id", baseType: !17, size: 32, offset: 96) | |
!77 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory", baseType: !22, size: 64, offset: 128) | |
!78 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory_size", baseType: !17, size: 32, offset: 192) | |
!79 = !DILocation(line: 7, column: 10, scope: !3, inlinedAt: !80) | |
!80 = !DILocation(line: 2, column: 3, scope: !3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'files/module_pipeline_dispatch_0_embedded_elf_arm_64.codegen.bc' | |
source_filename = "pipeline_dispatch_0" | |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" | |
target triple = "aarch64-unknown-unknown-eabi-elf" | |
%iree_hal_executable_library_header_t = type { i32, ptr, i32, i32 } | |
%iree_hal_executable_dispatch_attrs_v0_t = type { i16, i16 } | |
%iree_hal_executable_src_loc_v0_t = type { i32, i32, ptr } | |
%iree_hal_executable_library_v0_t = type { ptr, %iree_hal_executable_import_table_v0_t, %iree_hal_executable_export_table_v0_t, %iree_hal_executable_constant_table_v0_t } | |
%iree_hal_executable_import_table_v0_t = type { i32, ptr } | |
%iree_hal_executable_export_table_v0_t = type { i32, ptr, ptr, ptr, ptr, ptr } | |
%iree_hal_executable_constant_table_v0_t = type { i32 } | |
%iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr } | |
%iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, ptr, i32 } | |
@0 = private constant [20 x i8] c"pipeline_dispatch_0\00", align 1 | |
@iree_hal_executable_library_query_v0_header = private constant %iree_hal_executable_library_header_t { i32 3, ptr @0, i32 0, i32 0 } | |
@iree_hal_executable_library_query_v0_funcs = private constant [1 x ptr] [ptr @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32] | |
@iree_hal_executable_library_query_v0_attrs = private constant [1 x %iree_hal_executable_dispatch_attrs_v0_t] zeroinitializer | |
@1 = private constant [65 x i8] c"pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32\00", align 1 | |
@iree_hal_executable_library_query_v0_names = private constant [1 x ptr] [ptr @1] | |
@2 = private constant [1 x i8] zeroinitializer, align 1 | |
@iree_hal_executable_library_query_v0_tags = private constant [1 x ptr] [ptr @2] | |
@3 = private constant [64 x i8] c"/home/andwar02/work/VOSA//test_standalone_ops/conv2d_plain.mlir\00", align 1 | |
@iree_hal_executable_library_query_v0_src_locs = private constant [1 x %iree_hal_executable_src_loc_v0_t] [%iree_hal_executable_src_loc_v0_t { i32 2, i32 63, ptr @3 }] | |
@iree_hal_executable_library_query_v0 = private constant %iree_hal_executable_library_v0_t { ptr @iree_hal_executable_library_query_v0_header, %iree_hal_executable_import_table_v0_t zeroinitializer, %iree_hal_executable_export_table_v0_t { i32 1, ptr @iree_hal_executable_library_query_v0_funcs, ptr @iree_hal_executable_library_query_v0_attrs, ptr @iree_hal_executable_library_query_v0_names, ptr @iree_hal_executable_library_query_v0_tags, ptr @iree_hal_executable_library_query_v0_src_locs }, %iree_hal_executable_constant_table_v0_t zeroinitializer } | |
declare ptr @malloc(i64) #0 | |
declare void @free(ptr) #0 | |
define internal i32 @pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32(ptr noalias nonnull align 16 %0, ptr noalias nonnull align 16 %1, ptr noalias nonnull align 16 %2) #0 !dbg !3 { | |
%4 = alloca i32, i64 1, align 64, !dbg !79 | |
%5 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %4, 0, !dbg !79 | |
%6 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %5, ptr %4, 1, !dbg !79 | |
%7 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %6, i64 0, 2, !dbg !79 | |
%8 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %7, i64 1, 3, 0, !dbg !79 | |
%9 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %8, i64 1, 3, 1, !dbg !79 | |
%10 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %9, i64 1, 3, 2, !dbg !79 | |
%11 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %10, i64 1, 4, 0, !dbg !79 | |
%12 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %11, i64 1, 4, 1, !dbg !79 | |
%13 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %12, i64 1, 4, 2, !dbg !79 | |
%14 = alloca i32, i64 3, align 64, !dbg !79 | |
%15 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80 | |
%16 = extractvalue %iree_hal_executable_dispatch_state_v0_t %15, 10, !dbg !80 | |
%17 = load ptr, ptr %16, align 8, !dbg !80 | |
%18 = ptrtoint ptr %17 to i64, !dbg !80 | |
%19 = and i64 %18, 63, !dbg !80 | |
%20 = icmp eq i64 %19, 0, !dbg !80 | |
call void @llvm.assume(i1 %20), !dbg !80 | |
%21 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !80 | |
%22 = extractvalue %iree_hal_executable_dispatch_state_v0_t %21, 10, !dbg !80 | |
%23 = getelementptr ptr, ptr %22, i32 1, !dbg !80 | |
%24 = load ptr, ptr %23, align 8, !dbg !80 | |
%25 = ptrtoint ptr %24 to i64, !dbg !80 | |
%26 = and i64 %25, 63, !dbg !80 | |
%27 = icmp eq i64 %26, 0, !dbg !80 | |
call void @llvm.assume(i1 %27), !dbg !80 | |
%28 = load %iree_hal_executable_dispatch_state_v0_t, ptr %1, align 8, !dbg !79 | |
%29 = extractvalue %iree_hal_executable_dispatch_state_v0_t %28, 10, !dbg !79 | |
%30 = getelementptr ptr, ptr %29, i32 2, !dbg !79 | |
%31 = load ptr, ptr %30, align 8, !dbg !79 | |
%32 = ptrtoint ptr %31 to i64, !dbg !79 | |
%33 = and i64 %32, 63, !dbg !79 | |
%34 = icmp eq i64 %33, 0, !dbg !79 | |
call void @llvm.assume(i1 %34), !dbg !79 | |
%35 = load %iree_hal_executable_workgroup_state_v0_t, ptr %2, align 8, !dbg !79 | |
%36 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 0, !dbg !79 | |
%37 = zext i32 %36 to i64, !dbg !79 | |
%38 = extractvalue %iree_hal_executable_workgroup_state_v0_t %35, 1, !dbg !79 | |
%39 = zext i32 %38 to i64, !dbg !79 | |
br label %40, !dbg !79 | |
40: ; preds = %159, %3 | |
%41 = phi i64 [ %160, %159 ], [ 0, %3 ] | |
%42 = icmp slt i64 %41, 2, !dbg !79 | |
br i1 %42, label %43, label %161, !dbg !79 | |
43: ; preds = %144, %40 | |
%44 = phi i64 [ %158, %144 ], [ 0, %40 ] | |
%45 = icmp slt i64 %44, 5, !dbg !79 | |
br i1 %45, label %46, label %159, !dbg !79 | |
46: ; preds = %43 | |
%47 = mul i64 %39, 2, !dbg !79 | |
%48 = add i64 %41, %47, !dbg !79 | |
%49 = mul i64 %37, 5, !dbg !79 | |
%50 = add i64 %44, %49, !dbg !79 | |
%51 = mul i64 %48, 20, !dbg !79 | |
%52 = add i64 0, %51, !dbg !79 | |
%53 = add i64 %52, %50, !dbg !79 | |
%54 = add i64 %53, 0, !dbg !79 | |
%55 = getelementptr i32, ptr %31, i64 %54, !dbg !79 | |
store i32 0, ptr %55, align 4, !dbg !79 | |
%56 = mul i64 %41, 20, !dbg !79 | |
%57 = mul i64 %39, 40, !dbg !79 | |
%58 = add i64 %56, %57, !dbg !79 | |
%59 = add i64 %58, %44, !dbg !79 | |
%60 = add i64 %59, %49, !dbg !79 | |
%61 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } undef, ptr %31, 0, !dbg !79 | |
%62 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %61, ptr %31, 1, !dbg !79 | |
%63 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %62, i64 %60, 2, !dbg !79 | |
%64 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %63, i64 1, 3, 0, !dbg !79 | |
%65 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %64, i64 200, 4, 0, !dbg !79 | |
%66 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %65, i64 1, 3, 1, !dbg !79 | |
%67 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %66, i64 20, 4, 1, !dbg !79 | |
%68 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %67, i64 1, 3, 2, !dbg !79 | |
%69 = insertvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %68, i64 1, 4, 2, !dbg !79 | |
br label %70, !dbg !79 | |
70: ; preds = %91, %46 | |
%71 = phi i64 [ %143, %91 ], [ 0, %46 ] | |
%72 = phi { ptr, ptr, i64, [3 x i64], [3 x i64] } [ %13, %91 ], [ %69, %46 ] | |
%73 = icmp slt i64 %71, 9, !dbg !79 | |
br i1 %73, label %74, label %144, !dbg !79 | |
74: ; preds = %77, %70 | |
%75 = phi i64 [ %90, %77 ], [ 0, %70 ] | |
%76 = icmp slt i64 %75, 3, !dbg !79 | |
br i1 %76, label %77, label %91, !dbg !79 | |
77: ; preds = %74 | |
%78 = add i64 %49, %44, !dbg !79 | |
%79 = add i64 %78, %71, !dbg !79 | |
%80 = add i64 %79, %75, !dbg !79 | |
%81 = mul i64 %48, 28, !dbg !79 | |
%82 = add i64 0, %81, !dbg !79 | |
%83 = add i64 %82, %80, !dbg !79 | |
%84 = add i64 %83, 0, !dbg !79 | |
%85 = getelementptr i32, ptr %17, i64 %84, !dbg !79 | |
%86 = load i32, ptr %85, align 4, !dbg !79 | |
%87 = add i64 0, %75, !dbg !79 | |
%88 = add i64 %87, 0, !dbg !79 | |
%89 = getelementptr i32, ptr %14, i64 %88, !dbg !79 | |
store i32 %86, ptr %89, align 4, !dbg !79 | |
%90 = add i64 %75, 1, !dbg !79 | |
br label %74, !dbg !79 | |
91: ; preds = %74 | |
%92 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 1, !dbg !79 | |
%93 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 2, !dbg !79 | |
%94 = getelementptr i32, ptr %92, i64 %93, !dbg !79 | |
%95 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 0, !dbg !79 | |
%96 = mul i64 %95, 0, !dbg !79 | |
%97 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 1, !dbg !79 | |
%98 = mul i64 %97, 0, !dbg !79 | |
%99 = add i64 %96, %98, !dbg !79 | |
%100 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 2, !dbg !79 | |
%101 = mul i64 %100, 0, !dbg !79 | |
%102 = add i64 %99, %101, !dbg !79 | |
%103 = getelementptr i32, ptr %94, i64 %102, !dbg !79 | |
%104 = load i32, ptr %103, align 4, !dbg !79 | |
%105 = getelementptr i32, ptr %4, i64 0, !dbg !79 | |
store i32 %104, ptr %105, align 4, !dbg !79 | |
%106 = load <3 x i32>, ptr %14, align 4, !dbg !79 | |
%107 = load i32, ptr %105, align 4, !dbg !79 | |
%108 = insertelement <1 x i32> undef, i32 %107, i32 0, !dbg !79 | |
%109 = extractelement <1 x i32> %108, i64 0, !dbg !79 | |
%110 = insertelement <1 x i32> zeroinitializer, i32 %109, i64 0, !dbg !79 | |
%111 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> zeroinitializer, !dbg !79 | |
%112 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> <i32 1>, !dbg !79 | |
%113 = shufflevector <3 x i32> %106, <3 x i32> %106, <1 x i32> <i32 2>, !dbg !79 | |
%114 = add i64 0, %71, !dbg !79 | |
%115 = add i64 %114, 0, !dbg !79 | |
%116 = getelementptr i32, ptr %24, i64 %115, !dbg !79 | |
%117 = load i32, ptr %116, align 4, !dbg !79 | |
%118 = add i64 %71, 1, !dbg !79 | |
%119 = add i64 0, %118, !dbg !79 | |
%120 = add i64 %119, 0, !dbg !79 | |
%121 = getelementptr i32, ptr %24, i64 %120, !dbg !79 | |
%122 = load i32, ptr %121, align 4, !dbg !79 | |
%123 = add i64 %71, 2, !dbg !79 | |
%124 = add i64 0, %123, !dbg !79 | |
%125 = add i64 %124, 0, !dbg !79 | |
%126 = getelementptr i32, ptr %24, i64 %125, !dbg !79 | |
%127 = load i32, ptr %126, align 4, !dbg !79 | |
%128 = insertelement <1 x i32> undef, i32 %117, i32 0, !dbg !79 | |
%129 = shufflevector <1 x i32> %128, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
%130 = mul <1 x i32> %111, %129, !dbg !79 | |
%131 = add <1 x i32> %130, %110, !dbg !79 | |
%132 = insertelement <1 x i32> undef, i32 %122, i32 0, !dbg !79 | |
%133 = shufflevector <1 x i32> %132, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
%134 = mul <1 x i32> %112, %133, !dbg !79 | |
%135 = add <1 x i32> %134, %131, !dbg !79 | |
%136 = insertelement <1 x i32> undef, i32 %127, i32 0, !dbg !79 | |
%137 = shufflevector <1 x i32> %136, <1 x i32> undef, <1 x i32> zeroinitializer, !dbg !79 | |
%138 = mul <1 x i32> %113, %137, !dbg !79 | |
%139 = add <1 x i32> %138, %135, !dbg !79 | |
%140 = extractelement <1 x i32> %139, i64 0, !dbg !79 | |
%141 = insertelement <1 x i32> zeroinitializer, i32 %140, i64 0, !dbg !79 | |
%142 = extractelement <1 x i32> %141, i64 0, !dbg !79 | |
store i32 %142, ptr %105, align 4, !dbg !79 | |
%143 = add i64 %71, 3, !dbg !79 | |
br label %70, !dbg !79 | |
144: ; preds = %70 | |
%145 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 1, !dbg !79 | |
%146 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 2, !dbg !79 | |
%147 = getelementptr i32, ptr %145, i64 %146, !dbg !79 | |
%148 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 0, !dbg !79 | |
%149 = mul i64 %148, 0, !dbg !79 | |
%150 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 1, !dbg !79 | |
%151 = mul i64 %150, 0, !dbg !79 | |
%152 = add i64 %149, %151, !dbg !79 | |
%153 = extractvalue { ptr, ptr, i64, [3 x i64], [3 x i64] } %72, 4, 2, !dbg !79 | |
%154 = mul i64 %153, 0, !dbg !79 | |
%155 = add i64 %152, %154, !dbg !79 | |
%156 = getelementptr i32, ptr %147, i64 %155, !dbg !79 | |
%157 = load i32, ptr %156, align 4, !dbg !79 | |
store i32 %157, ptr %55, align 4, !dbg !79 | |
%158 = add i64 %44, 1, !dbg !79 | |
br label %43, !dbg !79 | |
159: ; preds = %43 | |
%160 = add i64 %41, 1, !dbg !79 | |
br label %40, !dbg !79 | |
161: ; preds = %40 | |
ret i32 0, !dbg !79 | |
} | |
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) | |
declare void @llvm.assume(i1 noundef) #1 | |
; Function Attrs: uwtable | |
define dso_local dllexport ptr @iree_hal_executable_library_query(i32 %0, ptr %1) #2 { | |
entry: | |
%2 = icmp eq i32 %0, 3 | |
%3 = select i1 %2, ptr @iree_hal_executable_library_query_v0, ptr null | |
ret ptr %3 | |
} | |
attributes #0 = { "frame-pointer"="all" "hot" "no-builtins" "nonlazybind" } | |
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } | |
attributes #2 = { uwtable "nonlazybind" } | |
!llvm.module.flags = !{!0} | |
!llvm.dbg.cu = !{!1} | |
!0 = !{i32 2, !"Debug Info Version", i32 3} | |
!1 = distinct !DICompileUnit(language: DW_LANG_C17, file: !2, producer: "IREE", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) | |
!2 = !DIFile(filename: "-", directory: "") | |
!3 = distinct !DISubprogram(name: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", linkageName: "pipeline_dispatch_0_depthwise_conv_2d_nhwc_hwc_1x10x20x1x1x9_i32", scope: !2, file: !2, line: 1, type: !4, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1) | |
!4 = !DISubroutineType(cc: DW_CC_normal, types: !5) | |
!5 = !{!6, !7, !38, !67} | |
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) | |
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) | |
!8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9) | |
!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_environment_v0_t", baseType: !10) | |
!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_environment_v0_t", scope: !11, file: !11, line: 246, size: 768, elements: !12) | |
!11 = !DIFile(filename: "runtime/src/iree/hal/local/executable_library.h", directory: ".") | |
!12 = !{!13, !21, !24, !27, !29} | |
!13 = !DIDerivedType(tag: DW_TAG_member, name: "constants", baseType: !14, size: 64) | |
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64) | |
!15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !16) | |
!16 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !17, size: 2048, elements: !19) | |
!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", baseType: !18) | |
!18 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) | |
!19 = !{!20} | |
!20 = !DISubrange(count: 64) | |
!21 = !DIDerivedType(tag: DW_TAG_member, name: "import_thunk", baseType: !22, size: 64, offset: 64) | |
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64) | |
!23 = !DIBasicType(name: "void", encoding: DW_ATE_address) | |
!24 = !DIDerivedType(tag: DW_TAG_member, name: "import_funcs", baseType: !25, size: 64, offset: 128) | |
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64) | |
!26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22) | |
!27 = !DIDerivedType(tag: DW_TAG_member, name: "import_contexts", baseType: !28, size: 64, offset: 192) | |
!28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64) | |
!29 = !DIDerivedType(tag: DW_TAG_member, name: "processor", baseType: !30, offset: 256) | |
!30 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_processor_v0_t", scope: !11, file: !11, line: 227, size: 512, elements: !31) | |
!31 = !{!32} | |
!32 = !DIDerivedType(tag: DW_TAG_member, name: "data", baseType: !33) | |
!33 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !34, size: 512, elements: !36) | |
!34 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", baseType: !35) | |
!35 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned) | |
!36 = !{!37} | |
!37 = !DISubrange(count: 8) | |
!38 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !39, size: 64) | |
!39 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !40) | |
!40 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_dispatch_state_v0_t", baseType: !41) | |
!41 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_dispatch_state_v0_t", scope: !11, file: !11, line: 275, size: 384, elements: !42) | |
!42 = !{!43, !44, !45, !48, !49, !50, !51, !52, !55, !56, !57, !62} | |
!43 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_x", baseType: !17, size: 32) | |
!44 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_y", baseType: !17, size: 32, offset: 32) | |
!45 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_size_z", baseType: !46, size: 16, offset: 64) | |
!46 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", baseType: !47) | |
!47 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) | |
!48 = !DIDerivedType(tag: DW_TAG_member, name: "push_constant_count", baseType: !46, size: 16, offset: 80) | |
!49 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_x", baseType: !17, size: 32, offset: 96) | |
!50 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_y", baseType: !17, size: 32, offset: 128) | |
!51 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_count_z", baseType: !46, size: 16, offset: 160) | |
!52 = !DIDerivedType(tag: DW_TAG_member, name: "max_concurrency", baseType: !53, size: 8, offset: 176) | |
!53 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", baseType: !54) | |
!54 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) | |
!55 = !DIDerivedType(tag: DW_TAG_member, name: "binding_count", baseType: !53, size: 8, offset: 184) | |
!56 = !DIDerivedType(tag: DW_TAG_member, name: "push_constants", baseType: !14, size: 64, offset: 192) | |
!57 = !DIDerivedType(tag: DW_TAG_member, name: "binding_ptrs", baseType: !58, size: 64, offset: 256) | |
!58 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !59, size: 64) | |
!59 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !60) | |
!60 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !61, size: 4096, elements: !19) | |
!61 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !53, size: 64) | |
!62 = !DIDerivedType(tag: DW_TAG_member, name: "binding_lengths", baseType: !63, size: 64, offset: 320) | |
!63 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !64, size: 64) | |
!64 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !65) | |
!65 = !DICompositeType(tag: DW_TAG_array_type, scope: !11, file: !11, line: 227, baseType: !66, size: 4096, elements: !19) | |
!66 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", baseType: !34) | |
!67 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !68, size: 64) | |
!68 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !69) | |
!69 = !DIDerivedType(tag: DW_TAG_typedef, name: "iree_hal_executable_workgroup_state_v0_t", baseType: !70) | |
!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "iree_hal_executable_workgroup_state_v0_t", scope: !11, file: !11, line: 321, size: 256, elements: !71) | |
!71 = !{!72, !73, !74, !75, !76, !77, !78} | |
!72 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_x", baseType: !17, size: 32) | |
!73 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_y", baseType: !17, size: 32, offset: 32) | |
!74 = !DIDerivedType(tag: DW_TAG_member, name: "workgroup_id_z", baseType: !46, size: 16, offset: 64) | |
!75 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", baseType: !46, size: 16, offset: 80) | |
!76 = !DIDerivedType(tag: DW_TAG_member, name: "processor_id", baseType: !17, size: 32, offset: 96) | |
!77 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory", baseType: !22, size: 64, offset: 128) | |
!78 = !DIDerivedType(tag: DW_TAG_member, name: "local_memory_size", baseType: !17, size: 32, offset: 192) | |
!79 = !DILocation(line: 7, column: 10, scope: !3, inlinedAt: !80) | |
!80 = !DILocation(line: 2, column: 3, scope: !3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment