Last active
October 28, 2021 20:50
-
-
Save antiagainst/49acb0ab6171df9acf3bba433ee5daef to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump Before LinalgBufferize //----- // | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c3 = arith.constant 3 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%2 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> -> tensor<2x16x16x8x3xf32> | |
%3 = tensor.extract_slice %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : tensor<2x16x16x8x3xf32> to tensor<2x16x16x3xf32> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%4 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %4 to %c16 step %5 { | |
%6 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%7 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %7 to %c16 step %8 { | |
%9 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%10 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %10 to %c3 step %11 { | |
%12 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%13 = tensor.extract_slice %3[0, %arg0, %arg1, %arg2] [2, %6, %9, %12] [1, 1, 1, 1] : tensor<2x16x16x3xf32> to tensor<2x?x?x?xf32> | |
flow.dispatch.tensor.store %13, %0, offsets = [0, %arg0, %arg1, 0, %arg2], sizes = [2, %6, %9, 1, %12], strides = [1, 1, 1, 1, 1] : tensor<2x?x?x?xf32> -> !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LinalgBufferize //----- // | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c3 = arith.constant 3 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%2 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%3 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%4 = flow.dispatch.tensor.load %3, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> -> tensor<2x16x16x8x3xf32> | |
%5 = memref.subview %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%6 = tensor.extract_slice %4[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : tensor<2x16x16x8x3xf32> to tensor<2x16x16x3xf32> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%7 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %7 to %c16 step %8 { | |
%9 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%10 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %10 to %c16 step %11 { | |
%12 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%13 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%14 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %13 to %c3 step %14 { | |
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%16 = memref.subview %5[0, %arg0, %arg1, %arg2] [2, %9, %12, %15] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%17 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%18 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%19 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%20 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %17, %18, 1, %19] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%16, %20) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
%21 = tensor.extract_slice %6[0, %arg0, %arg1, %arg2] [2, %9, %12, %15] [1, 1, 1, 1] : tensor<2x16x16x3xf32> to tensor<2x?x?x?xf32> | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump Before ResolveShapedTypeResultDims //----- // | |
module { | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c3 = arith.constant 3 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%2 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%3 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%4 = flow.dispatch.tensor.load %3, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> -> tensor<2x16x16x8x3xf32> | |
%5 = memref.subview %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%6 = tensor.extract_slice %4[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : tensor<2x16x16x8x3xf32> to tensor<2x16x16x3xf32> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%7 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %7 to %c16 step %8 { | |
%9 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%10 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %10 to %c16 step %11 { | |
%12 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%13 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%14 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %13 to %c3 step %14 { | |
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%16 = memref.subview %5[0, %arg0, %arg1, %arg2] [2, %9, %12, %15] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%17 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%18 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%19 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%20 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %17, %18, 1, %19] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%16, %20) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
%21 = tensor.extract_slice %6[0, %arg0, %arg1, %arg2] [2, %9, %12, %15] [1, 1, 1, 1] : tensor<2x16x16x3xf32> to tensor<2x?x?x?xf32> | |
} | |
} | |
} | |
return | |
} | |
hal.interface private @io { | |
hal.interface.binding public @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write" | |
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read" | |
} | |
} | |
// -----// IR Dump After ResolveShapedTypeResultDims //----- // | |
module { | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c3 = arith.constant 3 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%2 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%3 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%4 = memref.subview %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %5 to %c16 step %6 { | |
%7 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %8 to %c16 step %9 { | |
%10 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%12 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %11 to %c3 step %12 { | |
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%14 = memref.subview %4[0, %arg0, %arg1, %arg2] [2, %7, %10, %13] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%16 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%17 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%18 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %15, %16, 1, %17] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%14, %18) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
hal.interface private @io { | |
hal.interface.binding public @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write" | |
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read" | |
} | |
} | |
// -----// IR Dump Before Canonicalizer //----- // | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c3 = arith.constant 3 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%2 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%3 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%4 = memref.subview %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %5 to %c16 step %6 { | |
%7 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %8 to %c16 step %9 { | |
%10 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%12 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %11 to %c3 step %12 { | |
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%14 = memref.subview %4[0, %arg0, %arg1, %arg2] [2, %7, %10, %13] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%16 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%17 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%18 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %15, %16, 1, %17] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%14, %18) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%2 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%3 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%4 = memref.subview %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %5 to %c16 step %6 { | |
%7 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %8 to %c16 step %9 { | |
%10 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%12 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %11 to %c3 step %12 { | |
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%14 = memref.subview %4[0, %arg0, %arg1, %arg2] [2, %7, %10, %13] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%16 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%17 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%18 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %15, %16, 1, %17] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%14, %18) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump Before CSE //----- // | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%2 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%3 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%4 = memref.subview %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %5 to %c16 step %6 { | |
%7 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %8 to %c16 step %9 { | |
%10 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%12 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %11 to %c3 step %12 { | |
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%14 = memref.subview %4[0, %arg0, %arg1, %arg2] [2, %7, %10, %13] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%15 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%16 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%17 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%18 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %15, %16, 1, %17] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%14, %18) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%2 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%3 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%4 = memref.subview %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %5 to %c16 step %6 { | |
%7 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %8 to %c16 step %9 { | |
%10 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%12 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %11 to %c3 step %12 { | |
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%14 = memref.subview %4[0, %arg0, %arg1, %arg2] [2, %7, %10, %13] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%15 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %7, %10, 1, %13] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%14, %15) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump Before CleanupBufferAllocView //----- // | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32> | |
%2 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%3 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> | |
%4 = memref.subview %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %5 to %c16 step %6 { | |
%7 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %8 to %c16 step %9 { | |
%10 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%11 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%12 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %11 to %c3 step %12 { | |
%13 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%14 = memref.subview %4[0, %arg0, %arg1, %arg2] [2, %7, %10, %13] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%15 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %7, %10, 1, %13] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%14, %15) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After CleanupBufferAllocView //----- // | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%2 = memref.subview %1[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%3 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%4 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %3 to %c16 step %4 { | |
%5 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%7 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %6 to %c16 step %7 { | |
%8 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%10 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %9 to %c3 step %10 { | |
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%12 = memref.subview %2[0, %arg0, %arg1, %arg2] [2, %5, %8, %11] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%13 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %5, %8, 1, %11] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%12, %13) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump Before SPIRVLowerExecutableTarget //----- // | |
hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>}> { | |
hal.executable.entry_point public @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32 attributes {interface = @io, ordinal = 0 : index} | |
builtin.module { | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%2 = memref.subview %1[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%3 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%4 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %3 to %c16 step %4 { | |
%5 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%7 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %6 to %c16 step %7 { | |
%8 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%10 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %9 to %c3 step %10 { | |
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%12 = memref.subview %2[0, %arg0, %arg1, %arg2] [2, %5, %8, %11] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%13 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %5, %8, 1, %11] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%12, %13) : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
hal.interface private @io { | |
hal.interface.binding public @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write" | |
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read" | |
} | |
} | |
} | |
// -----// IR Dump Before SetNumWorkgroups //----- // | |
hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>}> { | |
hal.executable.entry_point public @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32 attributes {interface = @io, ordinal = 0 : index, translation.info = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [1, 4, 1]>, workgroup_size = [1 : index, 4 : index, 1 : index]} | |
builtin.module { | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%2 = memref.subview %1[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_size_x = hal.interface.workgroup.size[0] : index | |
%workgroup_size_y = hal.interface.workgroup.size[1] : index | |
%workgroup_size_z = hal.interface.workgroup.size[2] : index | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%3 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_id_z] | |
%4 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_z, %workgroup_count_z] | |
scf.for %arg0 = %3 to %c16 step %4 { | |
%5 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg0)[%workgroup_size_z] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_id_y] | |
%7 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_y, %workgroup_count_y] | |
scf.for %arg1 = %6 to %c16 step %7 { | |
%8 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 16)>(%arg1)[%workgroup_size_y] | |
%9 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_id_x] | |
%10 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%workgroup_size_x, %workgroup_count_x] | |
scf.for %arg2 = %9 to %c3 step %10 { | |
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg2)[%workgroup_size_x] | |
%12 = memref.subview %2[0, %arg0, %arg1, %arg2] [2, %5, %8, %11] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%13 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %5, %8, 1, %11] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%12, %13) {__internal_linalg_transform__ = "workgroup", lowering.config = #iree_codegen.lowering.config<tile_sizes = [[0, 1, 4, 1], [0, 1, 1, 1]], native_vector_size = []>} : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
hal.interface private @io { | |
hal.interface.binding public @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write" | |
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read" | |
} | |
} | |
} | |
// -----// IR Dump After SetNumWorkgroups //----- // | |
hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>}> { | |
hal.executable.entry_point public @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32 attributes {interface = @io, ordinal = 0 : index, translation.info = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [1, 4, 1]>, workgroup_size = [1 : index, 4 : index, 1 : index]} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg1] | |
hal.return %arg0, %0, %arg2 : index, index, index | |
} | |
builtin.module { | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%2 = memref.subview %1[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%3 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c1, %workgroup_id_z] | |
%4 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c1, %workgroup_count_z] | |
scf.for %arg0 = %3 to %c16 step %4 { | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c4, %workgroup_id_y] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c4, %workgroup_count_y] | |
scf.for %arg1 = %5 to %c16 step %6 { | |
%7 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c1, %workgroup_id_x] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c1, %workgroup_count_x] | |
scf.for %arg2 = %7 to %c3 step %8 { | |
%9 = memref.subview %2[0, %arg0, %arg1, %arg2] [2, %c1, %c4, %c1] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%10 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %c1, %c4, 1, %c1] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%9, %10) {__internal_linalg_transform__ = "workgroup", lowering.config = #iree_codegen.lowering.config<tile_sizes = [[0, 1, 4, 1], [0, 1, 1, 1]], native_vector_size = []>} : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
hal.interface private @io { | |
hal.interface.binding public @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write" | |
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read" | |
} | |
} | |
} | |
// -----// IR Dump Before Canonicalizer //----- // | |
hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>}> { | |
hal.executable.entry_point public @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32 attributes {interface = @io, ordinal = 0 : index, translation.info = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [1, 4, 1]>, workgroup_size = [1 : index, 4 : index, 1 : index]} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg1] | |
hal.return %arg0, %0, %arg2 : index, index, index | |
} | |
builtin.module { | |
func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c3 = arith.constant 3 : index | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : memref<2x16x16x16x3xf32> | |
%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<2x16x16x8x3xf32> | |
%2 = memref.subview %1[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : memref<2x16x16x8x3xf32> to memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%3 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c1, %workgroup_id_z] | |
%4 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c1, %workgroup_count_z] | |
scf.for %arg0 = %3 to %c16 step %4 { | |
%5 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c4, %workgroup_id_y] | |
%6 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c4, %workgroup_count_y] | |
scf.for %arg1 = %5 to %c16 step %6 { | |
%7 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c1, %workgroup_id_x] | |
%8 = affine.apply affine_map<()[s0, s1] -> (s1 * s0)>()[%c1, %workgroup_count_x] | |
scf.for %arg2 = %7 to %c3 step %8 { | |
%9 = memref.subview %2[0, %arg0, %arg1, %arg2] [2, %c1, %c4, %c1] [1, 1, 1, 1] : memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%10 = memref.subview %0[0, %arg0, %arg1, 0, %arg2] [2, %c1, %c4, 1, %c1] [1, 1, 1, 1, 1] : memref<2x16x16x16x3xf32> to memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
linalg.copy(%9, %10) {__internal_linalg_transform__ = "workgroup", lowering.config = #iree_codegen.lowering.config<tile_sizes = [[0, 1, 4, 1], [0, 1, 1, 1]], native_vector_size = []>} : memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 768 + d2 * 48 + d3)>> | |
} | |
} | |
} | |
return | |
} | |
hal.interface private @io { | |
hal.interface.binding public @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write" | |
hal.interface.binding public @s0b1_ro_external, set=0, binding=1, type="StorageBuffer", access="Read" | |
} | |
} | |
} | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/keras/engine/functional.py:420:0: error: 'linalg.copy' op inferred input/output operand #1 has shape's dimension #1 to be 1, but found 4 | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer.py:1031:0: note: called from | |
/usr/local/google/home/antiagainst/iree/integrations/tensorflow/e2e/keras/layers/layers_test.py:512:0: note: called from | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/autograph/core/function_wrappers.py:117:0: note: called from | |
/usr/local/google/home/antiagainst/iree/integrations/tensorflow/e2e/keras/layers/layers_test.py:512:0: note: called from | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py:975:0: note: called from | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py:668:0: note: called from | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py:999:0: note: called from | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/eager/function.py:3291:0: note: called from | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/eager/function.py:3456:0: note: called from | |
/usr/local/google/home/antiagainst/.pyenv/versions/3.9.4/lib/python3.9/site-packages/tensorflow/python/keras/engine/functional.py:420:0: note: see current operation: "linalg.copy"(%14, %15) ( { | |
^bb0(%arg3: f32, %arg4: f32): // no predecessors | |
"linalg.yield"(%arg3) : (f32) -> () | |
}) {__internal_linalg_transform__ = "workgroup", lowering.config = #iree_codegen.lowering.config<tile_sizes = [[0, 1, 4, 1], [0, 1, 1, 1]], native_vector_size = []>} : (memref<2x1x4x1xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x4x1x1xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 48 + d2 * 3 + d3)>>) -> () | |
// -----// IR Dump After Canonicalizer Failed //----- // | |
"hal.executable.variant"() ( { | |
"hal.executable.entry_point"() ( { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%0 = "affine.apply"(%arg1) {map = affine_map<()[s0] -> (s0 ceildiv 4)>} : (index) -> index | |
"hal.return"(%arg0, %0, %arg2) : (index, index, index) -> () | |
}) {interface = @io, ordinal = 0 : index, sym_name = "UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32", translation.info = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [1, 4, 1]>, workgroup_size = [1 : index, 4 : index, 1 : index]} : () -> () | |
"builtin.module"() ( { | |
"builtin.func"() ( { | |
%0 = "arith.constant"() {value = 3 : index} : () -> index | |
%1 = "arith.constant"() {value = 16 : index} : () -> index | |
%2 = "arith.constant"() {value = 0 : index} : () -> index | |
%3 = "hal.interface.binding.subspan"(%2) {binding = @io::@s0b0_rw_external, operand_segment_sizes = dense<[1, 0, 0]> : vector<3xi32>} : (index) -> memref<2x16x16x16x3xf32> | |
%4 = "hal.interface.binding.subspan"(%2) {binding = @io::@s0b1_ro_external, operand_segment_sizes = dense<[1, 0, 0]> : vector<3xi32>} : (index) -> memref<2x16x16x8x3xf32> | |
%5 = "memref.subview"(%4) {operand_segment_sizes = dense<[1, 0, 0, 0]> : vector<4xi32>, static_offsets = [0, 0, 0, 0, 0], static_sizes = [2, 16, 16, 1, 3], static_strides = [1, 1, 1, 1, 1]} : (memref<2x16x16x8x3xf32>) -> memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>> | |
%6 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%7 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
%8 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%9 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
%10 = "hal.interface.workgroup.id"() {dimension = 2 : index} : () -> index | |
%11 = "hal.interface.workgroup.count"() {dimension = 2 : index} : () -> index | |
"scf.for"(%10, %1, %11) ( { | |
^bb0(%arg0: index): // no predecessors | |
%12 = "affine.apply"(%8) {map = affine_map<()[s0] -> (s0 * 4)>} : (index) -> index | |
%13 = "affine.apply"(%9) {map = affine_map<()[s0] -> (s0 * 4)>} : (index) -> index | |
"scf.for"(%12, %1, %13) ( { | |
^bb0(%arg1: index): // no predecessors | |
"scf.for"(%6, %0, %7) ( { | |
^bb0(%arg2: index): // no predecessors | |
%14 = "memref.subview"(%5, %arg0, %arg1, %arg2) {operand_segment_sizes = dense<[1, 3, 0, 0]> : vector<4xi32>, static_offsets = [0, -9223372036854775808, -9223372036854775808, -9223372036854775808], static_sizes = [2, 1, 4, 1], static_strides = [1, 1, 1, 1]} : (memref<2x16x16x3xf32, affine_map<(d0, d1, d2, d3) -> (d0 * 6144 + d1 * 384 + d2 * 24 + d3)>>, index, index, index) -> memref<2x1x4x1xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>> | |
%15 = "memref.subview"(%3, %arg0, %arg1, %arg2) {operand_segment_sizes = dense<[1, 3, 0, 0]> : vector<4xi32>, static_offsets = [0, -9223372036854775808, -9223372036854775808, 0, -9223372036854775808], static_sizes = [2, 1, 4, 1, 1], static_strides = [1, 1, 1, 1, 1]} : (memref<2x16x16x16x3xf32>, index, index, index) -> memref<2x4x1x1xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 48 + d2 * 3 + d3)>> | |
"linalg.copy"(%14, %15) ( { | |
^bb0(%arg3: f32, %arg4: f32): // no predecessors | |
"linalg.yield"(%arg3) : (f32) -> () | |
}) {__internal_linalg_transform__ = "workgroup", lowering.config = #iree_codegen.lowering.config<tile_sizes = [[0, 1, 4, 1], [0, 1, 1, 1]], native_vector_size = []>} : (memref<2x1x4x1xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 6144 + s0 + d1 * 384 + d2 * 24 + d3)>>, memref<2x4x1x1xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 12288 + s0 + d1 * 48 + d2 * 3 + d3)>>) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"std.return"() : () -> () | |
}) {sym_name = "UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32", type = () -> ()} : () -> () | |
"hal.interface"() ( { | |
"hal.interface.binding"() {access = 3 : i32, binding = 0 : index, set = 0 : index, sym_name = "s0b0_rw_external", type = 7 : i32} : () -> () | |
"hal.interface.binding"() {access = 1 : i32, binding = 1 : index, set = 0 : index, sym_name = "s0b1_ro_external", type = 7 : i32} : () -> () | |
"hal.interface_end"() : () -> () | |
}) {sym_name = "io", sym_visibility = "private"} : () -> () | |
}) : () -> () | |
"hal.executable.variant_end"() : () -> () | |
}) {sym_name = "vulkan_spirv_fb", target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>}>} : () -> () |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment