Created
April 22, 2024 16:19
-
-
Save pashu123/530bcfea69329faccaf6633931dab239 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After TypePropagation (iree-codegen-type-propagation) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
// -----// IR Dump After BubbleUpOrdinalOps (iree-codegen-bubble-up-ordinal-ops) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
// -----// IR Dump After BufferizeCopyOnlyDispatches (iree-codegen-bufferize-copy-only-dispatches) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
// -----// IR Dump After DecomposeSoftmax (iree-codegen-decompose-softmax) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
// -----// IR Dump After MaterializeUserConfigs (iree-codegen-materialize-user-configs) //----- // | |
module { | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
} | |
// -----// IR Dump After RematerializeParallelOps (iree-codegen-rematerialize-parallel-ops) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
// -----// IR Dump After ExpandF16OpToF32 (iree-llvmcpu-expand-f16-op-to-f32) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
// -----// IR Dump After CPUMaterializeEncoding (iree-codegen-cpu-materialize-encoding) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
// -----// IR Dump After EraseHALDescriptorTypeFromMemRef (iree-codegen-erase-hal-descriptor-type-from-memref) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
// -----// IR Dump After LLVMCPUSelectLoweringStrategy (iree-llvmcpu-select-lowering-strategy) //----- // | |
module { | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
} | |
// -----// IR Dump After ConfigureTargetExecutableVariantsPass (iree-hal-configure-target-executable-variants) //----- // | |
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-avx512er,-ccmp,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,+gfni,-avxvnniint16,-amx-fp16,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,+avx512bitalg,+rdpru,+clwb,+mmx,+sse2,+rdseed,+avx512vbmi2,-prefetchi,+rdpid,-fma4,+avx512vbmi,+shstk,+vaes,-waitpkg,-sgx,+fxsr,+avx512dq,+sse4a", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>) { | |
hal.executable.export public @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack ordinal(0) layout(#hal.pipeline.layout<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>]} { | |
^bb0(%arg0: !hal.device, %arg1: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
} | |
} | |
// -----// IR Dump After ConfigureExecutablesPass (iree-hal-configure-executables) //----- // | |
hal.executable public @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1 { | |
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-avx512er,-ccmp,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,+gfni,-avxvnniint16,-amx-fp16,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,+avx512bitalg,+rdpru,+clwb,+mmx,+sse2,+rdseed,+avx512vbmi2,-prefetchi,+rdpid,-fma4,+avx512vbmi,+shstk,+vaes,-waitpkg,-sgx,+fxsr,+avx512dq,+sse4a", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>) { | |
hal.executable.export public @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack ordinal(0) layout(#hal.pipeline.layout<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>]} { | |
^bb0(%arg0: !hal.device, %arg1: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
} | |
} | |
} | |
// -----// IR Dump After LowerExecutableUsingTransformDialect (iree-codegen-lower-executable-using-transform-dialect) //----- // | |
module { | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = flow.dispatch.workload.ordinal %13, 0 : index | |
%16 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0], sizes = [8640, 3200], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<8640x3200xf16> | |
%18 = tensor.empty(%15) : tensor<?x540x3200x16x1xf16> | |
%19 = tensor.empty(%15) : tensor<?x8640x3200xf16> | |
%20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%17 : tensor<8640x3200xf16>) outs(%19 : tensor<?x8640x3200xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x8640x3200xf16> | |
%pack = tensor.pack %20 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %18 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x8640x3200xf16> -> tensor<?x540x3200x16x1xf16> | |
flow.dispatch.tensor.store %pack, %16, offsets = [0, 0, 0, 0, 0], sizes = [%15, 540, 3200, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x540x3200x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%15} | |
return | |
} | |
} | |
// -----// IR Dump After TileAndDistributeToWorkgroups (iree-codegen-tile-and-distribute-to-workgroups) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c64 = arith.constant 64 : index | |
%c4 = arith.constant 4 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%24 = flow.dispatch.tensor.load %14, offsets = [%23, %arg2], sizes = [%c64, %c64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<?x?xf16> | |
%25 = tensor.empty(%18) : tensor<?x64x64xf16> | |
%cast = tensor.cast %24 : tensor<?x?xf16> to tensor<64x64xf16> | |
%26 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cast : tensor<64x64xf16>) outs(%25 : tensor<?x64x64xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x64x64xf16> | |
%27 = tensor.empty(%18) : tensor<?x4x64x16x1xf16> | |
%pack = tensor.pack %26 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %27 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x64x64xf16> -> tensor<?x4x64x16x1xf16> | |
%cast_0 = tensor.cast %pack : tensor<?x4x64x16x1xf16> to tensor<?x?x?x16x1xf16> | |
%28 = arith.extui %2 : i32 to i64 | |
%29 = arith.extui %3 : i32 to i64 | |
%30 = arith.shli %29, %c32_i64 : i64 | |
%31 = arith.ori %28, %30 : i64 | |
%32 = arith.index_castui %31 : i64 to index | |
flow.dispatch.tensor.store %cast_0, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, %c4, %c64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x?x?x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%32} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After ConvertToDestinationPassingStyle (iree-codegen-convert-to-destination-passing-style) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c64 = arith.constant 64 : index | |
%c4 = arith.constant 4 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = arith.extui %2 : i32 to i64 | |
%24 = arith.extui %3 : i32 to i64 | |
%25 = arith.shli %24, %c32_i64 : i64 | |
%26 = arith.ori %23, %25 : i64 | |
%27 = arith.index_castui %26 : i64 to index | |
%28 = arith.extui %2 : i32 to i64 | |
%29 = arith.extui %3 : i32 to i64 | |
%30 = arith.shli %29, %c32_i64 : i64 | |
%31 = arith.ori %28, %30 : i64 | |
%32 = arith.index_castui %31 : i64 to index | |
%33 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%32] | |
%34 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%33, %c4, %c64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%27} -> tensor<?x?x?x16x1xf16> | |
%cast = tensor.cast %34 : tensor<?x?x?x16x1xf16> to tensor<?x4x64x16x1xf16> | |
%35 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%36 = flow.dispatch.tensor.load %14, offsets = [%35, %arg2], sizes = [%c64, %c64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<?x?xf16> | |
%37 = tensor.empty(%18) : tensor<?x64x64xf16> | |
%cast_0 = tensor.cast %36 : tensor<?x?xf16> to tensor<64x64xf16> | |
%38 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cast_0 : tensor<64x64xf16>) outs(%37 : tensor<?x64x64xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x64x64xf16> | |
%pack = tensor.pack %38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %cast {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x64x64xf16> -> tensor<?x4x64x16x1xf16> | |
%cast_1 = tensor.cast %pack : tensor<?x4x64x16x1xf16> to tensor<?x?x?x16x1xf16> | |
%39 = arith.extui %2 : i32 to i64 | |
%40 = arith.extui %3 : i32 to i64 | |
%41 = arith.shli %40, %c32_i64 : i64 | |
%42 = arith.ori %39, %41 : i64 | |
%43 = arith.index_castui %42 : i64 to index | |
flow.dispatch.tensor.store %cast_1, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, %c4, %c64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x?x?x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%43} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After FoldAffineMinInDistributedLoops (iree-codegen-fold-affinemin-in-distributed-loops) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c64 = arith.constant 64 : index | |
%c4 = arith.constant 4 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = arith.extui %2 : i32 to i64 | |
%24 = arith.extui %3 : i32 to i64 | |
%25 = arith.shli %24, %c32_i64 : i64 | |
%26 = arith.ori %23, %25 : i64 | |
%27 = arith.index_castui %26 : i64 to index | |
%28 = arith.extui %2 : i32 to i64 | |
%29 = arith.extui %3 : i32 to i64 | |
%30 = arith.shli %29, %c32_i64 : i64 | |
%31 = arith.ori %28, %30 : i64 | |
%32 = arith.index_castui %31 : i64 to index | |
%33 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%32] | |
%34 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%33, %c4, %c64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%27} -> tensor<?x?x?x16x1xf16> | |
%cast = tensor.cast %34 : tensor<?x?x?x16x1xf16> to tensor<?x4x64x16x1xf16> | |
%35 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%36 = flow.dispatch.tensor.load %14, offsets = [%35, %arg2], sizes = [%c64, %c64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<?x?xf16> | |
%37 = tensor.empty(%18) : tensor<?x64x64xf16> | |
%cast_0 = tensor.cast %36 : tensor<?x?xf16> to tensor<64x64xf16> | |
%38 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cast_0 : tensor<64x64xf16>) outs(%37 : tensor<?x64x64xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x64x64xf16> | |
%pack = tensor.pack %38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %cast {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x64x64xf16> -> tensor<?x4x64x16x1xf16> | |
%cast_1 = tensor.cast %pack : tensor<?x4x64x16x1xf16> to tensor<?x?x?x16x1xf16> | |
%39 = arith.extui %2 : i32 to i64 | |
%40 = arith.extui %3 : i32 to i64 | |
%41 = arith.shli %40, %c32_i64 : i64 | |
%42 = arith.ori %39, %41 : i64 | |
%43 = arith.index_castui %42 : i64 to index | |
flow.dispatch.tensor.store %cast_1, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, %c4, %c64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x?x?x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%43} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer (canonicalize) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = arith.extui %2 : i32 to i64 | |
%24 = arith.extui %3 : i32 to i64 | |
%25 = arith.shli %24, %c32_i64 : i64 | |
%26 = arith.ori %23, %25 : i64 | |
%27 = arith.index_castui %26 : i64 to index | |
%28 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%27] | |
%29 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%28, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%30 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%31 = flow.dispatch.tensor.load %14, offsets = [%30, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%32 = tensor.empty(%18) : tensor<?x64x64xf16> | |
%33 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%31 : tensor<64x64xf16>) outs(%32 : tensor<?x64x64xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x64x64xf16> | |
%pack = tensor.pack %33 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x64x64xf16> -> tensor<?x4x64x16x1xf16> | |
flow.dispatch.tensor.store %pack, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After CSE (cse) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = tensor.empty(%18) : tensor<?x64x64xf16> | |
%27 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%25 : tensor<64x64xf16>) outs(%26 : tensor<?x64x64xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x64x64xf16> | |
%pack = tensor.pack %27 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %23 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x64x64xf16> -> tensor<?x4x64x16x1xf16> | |
flow.dispatch.tensor.store %pack, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After FuseTensorPadWithConsumer (iree-codegen-fuse-tensor-pad-with-consumer) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = tensor.empty(%18) : tensor<?x64x64xf16> | |
%27 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%25 : tensor<64x64xf16>) outs(%26 : tensor<?x64x64xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x64x64xf16> | |
%pack = tensor.pack %27 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %23 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x64x64xf16> -> tensor<?x4x64x16x1xf16> | |
flow.dispatch.tensor.store %pack, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After ConcretizePadResultShape (iree-codegen-concretize-pad-result-shape) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = tensor.empty(%18) : tensor<?x64x64xf16> | |
%27 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%25 : tensor<64x64xf16>) outs(%26 : tensor<?x64x64xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<?x64x64xf16> | |
%pack = tensor.pack %27 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %23 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<?x64x64xf16> -> tensor<?x4x64x16x1xf16> | |
flow.dispatch.tensor.store %pack, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUTileAndFuse (iree-llvmcpu-tile-and-fuse) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After FuseTensorPadWithConsumer (iree-codegen-fuse-tensor-pad-with-consumer) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After ConcretizePadResultShape (iree-codegen-concretize-pad-result-shape) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUSplitReduction (iree-llvmcpu-split-reduction) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUTile (iree-llvmcpu-tile) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUTileAndFuse (iree-llvmcpu-tile-and-fuse) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After FuseTensorPadWithConsumer (iree-codegen-fuse-tensor-pad-with-consumer) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After ConcretizePadResultShape (iree-codegen-concretize-pad-result-shape) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After TensorToVectorVectorizePad (iree-codegen-vectorize-tensor-pad) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%pack = tensor.pack %29 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %extracted_slice_0 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 4, 64], [1, 0, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x64x16xf16> -> tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %pack into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After DecomposePackUnPackOps (iree-codegen-decompose-pack-unpack-ops) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%expanded = tensor.expand_shape %29 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%transposed = linalg.transpose ins(%expanded : tensor<1x4x16x16x1xf16>) outs(%extracted_slice_0 : tensor<1x4x16x16x1xf16>) permutation = [0, 1, 3, 2, 4] | |
%inserted_slice = tensor.insert_slice %transposed into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer (canonicalize) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%expanded = tensor.expand_shape %29 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%transposed = linalg.transpose ins(%expanded : tensor<1x4x16x16x1xf16>) outs(%extracted_slice_0 : tensor<1x4x16x16x1xf16>) permutation = [0, 1, 3, 2, 4] | |
%inserted_slice = tensor.insert_slice %transposed into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After CSE (cse) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice : tensor<64x16xf16>) outs(%28 : tensor<1x64x16xf16>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64], [1, 1, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} -> tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%expanded = tensor.expand_shape %29 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%transposed = linalg.transpose ins(%expanded : tensor<1x4x16x16x1xf16>) outs(%extracted_slice_0 : tensor<1x4x16x16x1xf16>) permutation = [0, 1, 3, 2, 4] | |
%inserted_slice = tensor.insert_slice %transposed into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After GenericVectorization (iree-codegen-generic-vectorization) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant 0.000000e+00 : f16 | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
scf.for %arg1 = %19 to %c540 step %20 { | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%22 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg2 = %21 to %c3200 step %22 { | |
%23 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
%25 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%26 = scf.for %arg3 = %c0 to %18 step %c1 iter_args(%arg4 = %23) -> (tensor<?x4x64x16x1xf16>) { | |
%27 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%extracted_slice = tensor.extract_slice %25[0, %arg5] [64, 16] [1, 1] : tensor<64x64xf16> to tensor<64x16xf16> | |
%28 = tensor.empty() : tensor<1x64x16xf16> | |
%29 = vector.transfer_read %extracted_slice[%c0, %c0], %cst {in_bounds = [true, true]} : tensor<64x16xf16>, vector<64x16xf16> | |
%30 = vector.broadcast %29 : vector<64x16xf16> to vector<1x64x16xf16> | |
%31 = vector.transfer_write %30, %28[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, tensor<1x64x16xf16> | |
%extracted_slice_0 = tensor.extract_slice %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> to tensor<1x4x16x16x1xf16> | |
%expanded = tensor.expand_shape %31 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%32 = vector.transfer_read %expanded[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : tensor<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%33 = vector.transpose %32, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%34 = vector.transfer_write %33, %extracted_slice_0[%c0, %c0, %c0, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, tensor<1x4x16x16x1xf16> | |
%inserted_slice = tensor.insert_slice %34 into %arg6[%arg3, 0, %arg5, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : tensor<1x4x16x16x1xf16> into tensor<?x4x64x16x1xf16> | |
scf.yield %inserted_slice : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %27 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %26, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%18, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After OptimizeTensorInsertExtractSlices (iree-codegen-optimize-tensor-insert-extract-slices) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant 0.000000e+00 : f16 | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
%22 = tensor.empty() : tensor<1x64x16xf16> | |
scf.for %arg0 = %16 to %13 step %17 { | |
%23 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%25 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%26 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%27 = scf.for %arg3 = %c0 to %23 step %c1 iter_args(%arg4 = %25) -> (tensor<?x4x64x16x1xf16>) { | |
%28 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%29 = vector.transfer_read %26[%c0, %arg5], %cst {in_bounds = [true, true]} : tensor<64x64xf16>, vector<64x16xf16> | |
%30 = vector.broadcast %29 : vector<64x16xf16> to vector<1x64x16xf16> | |
%31 = vector.transfer_write %30, %22[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, tensor<1x64x16xf16> | |
%expanded = tensor.expand_shape %31 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%32 = vector.transfer_read %expanded[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : tensor<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%33 = vector.transpose %32, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%34 = vector.transfer_write %33, %arg6[%arg3, %c0, %arg5, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, tensor<?x4x64x16x1xf16> | |
scf.yield %34 : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %28 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %27, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer (canonicalize) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant 0.000000e+00 : f16 | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
%22 = tensor.empty() : tensor<1x64x16xf16> | |
scf.for %arg0 = %16 to %13 step %17 { | |
%23 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%25 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%26 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%27 = scf.for %arg3 = %c0 to %23 step %c1 iter_args(%arg4 = %25) -> (tensor<?x4x64x16x1xf16>) { | |
%28 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%29 = vector.transfer_read %26[%c0, %arg5], %cst {in_bounds = [true, true]} : tensor<64x64xf16>, vector<64x16xf16> | |
%30 = vector.broadcast %29 : vector<64x16xf16> to vector<1x64x16xf16> | |
%31 = vector.transfer_write %30, %22[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, tensor<1x64x16xf16> | |
%expanded = tensor.expand_shape %31 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%32 = vector.transfer_read %expanded[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : tensor<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%33 = vector.transpose %32, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%34 = vector.transfer_write %33, %arg6[%arg3, %c0, %arg5, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, tensor<?x4x64x16x1xf16> | |
scf.yield %34 : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %28 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %27, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After CSE (cse) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant 0.000000e+00 : f16 | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
%22 = tensor.empty() : tensor<1x64x16xf16> | |
scf.for %arg0 = %16 to %13 step %17 { | |
%23 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%25 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%26 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%27 = scf.for %arg3 = %c0 to %23 step %c1 iter_args(%arg4 = %25) -> (tensor<?x4x64x16x1xf16>) { | |
%28 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%29 = vector.transfer_read %26[%c0, %arg5], %cst {in_bounds = [true, true]} : tensor<64x64xf16>, vector<64x16xf16> | |
%30 = vector.broadcast %29 : vector<64x16xf16> to vector<1x64x16xf16> | |
%31 = vector.transfer_write %30, %22[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, tensor<1x64x16xf16> | |
%expanded = tensor.expand_shape %31 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%32 = vector.transfer_read %expanded[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : tensor<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%33 = vector.transpose %32, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%34 = vector.transfer_write %33, %arg6[%arg3, %c0, %arg5, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, tensor<?x4x64x16x1xf16> | |
scf.yield %34 : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %28 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %27, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After EliminateEmptyTensors (iree-eliminate-empty-tensors) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant 0.000000e+00 : f16 | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
%22 = tensor.empty() : tensor<1x64x16xf16> | |
scf.for %arg0 = %16 to %13 step %17 { | |
%23 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%25 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%26 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%27 = scf.for %arg3 = %c0 to %23 step %c1 iter_args(%arg4 = %25) -> (tensor<?x4x64x16x1xf16>) { | |
%28 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%29 = vector.transfer_read %26[%c0, %arg5], %cst {in_bounds = [true, true]} : tensor<64x64xf16>, vector<64x16xf16> | |
%30 = vector.broadcast %29 : vector<64x16xf16> to vector<1x64x16xf16> | |
%31 = vector.transfer_write %30, %22[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, tensor<1x64x16xf16> | |
%expanded = tensor.expand_shape %31 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%32 = vector.transfer_read %expanded[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : tensor<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%33 = vector.transpose %32, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%34 = vector.transfer_write %33, %arg6[%arg3, %c0, %arg5, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, tensor<?x4x64x16x1xf16> | |
scf.yield %34 : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %28 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %27, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After EmptyTensorToAllocTensor (empty-tensor-to-alloc-tensor) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant 0.000000e+00 : f16 | |
%c16 = arith.constant 16 : index | |
%c64 = arith.constant 64 : index | |
%c1 = arith.constant 1 : index | |
%c3200 = arith.constant 3200 : index | |
%c540 = arith.constant 540 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
%22 = bufferization.alloc_tensor() : tensor<1x64x16xf16> | |
scf.for %arg0 = %16 to %13 step %17 { | |
%23 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%24 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%25 = flow.dispatch.tensor.load %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} -> tensor<?x4x64x16x1xf16> | |
%26 = flow.dispatch.tensor.load %14, offsets = [%24, %arg2], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8640x3200xf16>> -> tensor<64x64xf16> | |
%27 = scf.for %arg3 = %c0 to %23 step %c1 iter_args(%arg4 = %25) -> (tensor<?x4x64x16x1xf16>) { | |
%28 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (tensor<?x4x64x16x1xf16>) { | |
%29 = vector.transfer_read %26[%c0, %arg5], %cst {in_bounds = [true, true]} : tensor<64x64xf16>, vector<64x16xf16> | |
%30 = vector.broadcast %29 : vector<64x16xf16> to vector<1x64x16xf16> | |
%31 = vector.transfer_write %30, %22[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, tensor<1x64x16xf16> | |
%expanded = tensor.expand_shape %31 [[0], [1, 2], [3, 4]] : tensor<1x64x16xf16> into tensor<1x4x16x16x1xf16> | |
%32 = vector.transfer_read %expanded[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : tensor<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%33 = vector.transpose %32, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%34 = vector.transfer_write %33, %arg6[%arg3, %c0, %arg5, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, tensor<?x4x64x16x1xf16> | |
scf.yield %34 : tensor<?x4x64x16x1xf16> | |
} | |
scf.yield %28 : tensor<?x4x64x16x1xf16> | |
} | |
flow.dispatch.tensor.store %27, %15, offsets = [%arg0, %arg1, %arg2, 0, 0], sizes = [%23, 4, 64, 16, 1], strides = [1, 1, 1, 1, 1] : tensor<?x4x64x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<?x540x3200x16x1xf16>>{%13} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After IREEComprehensiveBufferize (iree-codegen-iree-comprehensive-bufferize) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%24 = scf.for %arg3 = %c0 to %22 step %c1 iter_args(%arg4 = %subview) -> (memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
%25 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
%26 = vector.transfer_read %subview_0[%c0, %arg5], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%27 = vector.broadcast %26 : vector<64x16xf16> to vector<1x64x16xf16> | |
vector.transfer_write %27, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, memref<1x64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%28 = vector.transfer_read %expand_shape[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : memref<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%29 = vector.transpose %28, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
vector.transfer_write %29, %arg6[%arg3, %c0, %arg5, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.yield %arg6 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
scf.yield %25 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
%subview_1 = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%24 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_1 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After ResolveShapedTypeResultDims (resolve-shaped-type-result-dims) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%24 = scf.for %arg3 = %c0 to %22 step %c1 iter_args(%arg4 = %subview) -> (memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
%25 = scf.for %arg5 = %c0 to %c64 step %c16 iter_args(%arg6 = %arg4) -> (memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
%26 = vector.transfer_read %subview_0[%c0, %arg5], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%27 = vector.broadcast %26 : vector<64x16xf16> to vector<1x64x16xf16> | |
vector.transfer_write %27, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, memref<1x64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%28 = vector.transfer_read %expand_shape[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : memref<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%29 = vector.transpose %28, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
vector.transfer_write %29, %arg6[%arg3, %c0, %arg5, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.yield %arg6 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
scf.yield %25 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
%subview_1 = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%24 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_1 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer (canonicalize) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.transfer_read %subview_0[%c0, %arg4], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%25 = vector.broadcast %24 : vector<64x16xf16> to vector<1x64x16xf16> | |
vector.transfer_write %25, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, memref<1x64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%26 = vector.transfer_read %expand_shape[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : memref<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%27 = vector.transpose %26, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
vector.transfer_write %27, %subview[%arg3, %c0, %arg4, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
} | |
%subview_1 = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%subview : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview_1 : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After CSE (cse) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.transfer_read %subview_0[%c0, %arg4], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%25 = vector.broadcast %24 : vector<64x16xf16> to vector<1x64x16xf16> | |
vector.transfer_write %25, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, memref<1x64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%26 = vector.transfer_read %expand_shape[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : memref<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%27 = vector.transpose %26, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
vector.transfer_write %27, %subview[%arg3, %c0, %arg4, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
} | |
linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%subview : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%subview : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) { | |
^bb0(%in: f16, %out: f16): | |
linalg.yield %in : f16 | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer (canonicalize) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.transfer_read %subview_0[%c0, %arg4], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%25 = vector.broadcast %24 : vector<64x16xf16> to vector<1x64x16xf16> | |
vector.transfer_write %25, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, memref<1x64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%26 = vector.transfer_read %expand_shape[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : memref<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%27 = vector.transpose %26, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
vector.transfer_write %27, %subview[%arg3, %c0, %arg4, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After CleanupBufferAllocView (iree-codegen-cleanup-buffer-alloc-view) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.transfer_read %subview_0[%c0, %arg4], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%25 = vector.broadcast %24 : vector<64x16xf16> to vector<1x64x16xf16> | |
vector.transfer_write %25, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, memref<1x64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%26 = vector.transfer_read %expand_shape[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : memref<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%27 = vector.transpose %26, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
vector.transfer_write %27, %subview[%arg3, %c0, %arg4, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After RemoveSingleIterationLoop (iree-codegen-remove-single-iteration-loop) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.transfer_read %subview_0[%c0, %arg4], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%25 = vector.broadcast %24 : vector<64x16xf16> to vector<1x64x16xf16> | |
vector.transfer_write %25, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x16xf16>, memref<1x64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%26 = vector.transfer_read %expand_shape[%c0, %c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true, true, true]} : memref<1x4x16x16x1xf16>, vector<1x4x16x16x1xf16> | |
%27 = vector.transpose %26, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
vector.transfer_write %27, %subview[%arg3, %c0, %arg4, %c0, %c0] {in_bounds = [true, true, true, true, true]} : vector<1x4x16x16x1xf16>, memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUDropVectorUnitDims (iree-llvmcpu-drop-vector-unit-dims) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.transfer_read %subview_0[%c0, %arg4], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%subview_1 = memref.subview %alloca[0, 0, 0] [1, 64, 16] [1, 1, 1] : memref<1x64x16xf16> to memref<64x16xf16> | |
vector.transfer_write %24, %subview_1[%c0, %c0] {in_bounds = [true, true]} : vector<64x16xf16>, memref<64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%subview_2 = memref.subview %expand_shape[0, 0, 0, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : memref<1x4x16x16x1xf16> to memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> | |
%subview_3 = memref.subview %subview_2[0, 0, 0, 0] [1, 4, 16, 16] [1, 1, 1, 1] : memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> to memref<4x16x16xf16> | |
%25 = vector.transfer_read %subview_3[%c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<4x16x16xf16>, vector<4x16x16xf16> | |
%26 = vector.shape_cast %25 : vector<4x16x16xf16> to vector<4x16x16x1xf16> | |
%27 = vector.broadcast %26 : vector<4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%28 = vector.transpose %27, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%29 = vector.extract %28[0] : vector<4x16x16x1xf16> from vector<1x4x16x16x1xf16> | |
%subview_4 = memref.subview %subview[0, 0, 0, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%30 = vector.shape_cast %29 : vector<4x16x16x1xf16> to vector<4x16x16xf16> | |
vector.transfer_write %30, %subview_4[%arg3, %c0, %arg4, %c0] {in_bounds = [true, true, true]} : vector<4x16x16xf16>, memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUVirtualVectorLowering (iree-llvmcpu-virtual-vector-lowering) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.transfer_read %subview_0[%c0, %arg4], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%subview_1 = memref.subview %alloca[0, 0, 0] [1, 64, 16] [1, 1, 1] : memref<1x64x16xf16> to memref<64x16xf16> | |
vector.transfer_write %24, %subview_1[%c0, %c0] {in_bounds = [true, true]} : vector<64x16xf16>, memref<64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%subview_2 = memref.subview %expand_shape[0, 0, 0, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : memref<1x4x16x16x1xf16> to memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> | |
%subview_3 = memref.subview %subview_2[0, 0, 0, 0] [1, 4, 16, 16] [1, 1, 1, 1] : memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> to memref<4x16x16xf16> | |
%25 = vector.transfer_read %subview_3[%c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<4x16x16xf16>, vector<4x16x16xf16> | |
%26 = vector.shape_cast %25 : vector<4x16x16xf16> to vector<4x16x16x1xf16> | |
%27 = vector.broadcast %26 : vector<4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%28 = vector.transpose %27, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%29 = vector.extract %28[0] : vector<4x16x16x1xf16> from vector<1x4x16x16x1xf16> | |
%subview_4 = memref.subview %subview[0, 0, 0, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%30 = vector.shape_cast %29 : vector<4x16x16x1xf16> to vector<4x16x16xf16> | |
vector.transfer_write %30, %subview_4[%arg3, %c0, %arg4, %c0] {in_bounds = [true, true, true]} : vector<4x16x16xf16>, memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer (canonicalize) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.transfer_read %subview_0[%c0, %arg4], %cst {in_bounds = [true, true]} : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<64x16xf16> | |
%subview_1 = memref.subview %alloca[0, 0, 0] [1, 64, 16] [1, 1, 1] : memref<1x64x16xf16> to memref<64x16xf16> | |
vector.transfer_write %24, %subview_1[%c0, %c0] {in_bounds = [true, true]} : vector<64x16xf16>, memref<64x16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%subview_2 = memref.subview %expand_shape[0, 0, 0, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : memref<1x4x16x16x1xf16> to memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> | |
%subview_3 = memref.subview %subview_2[0, 0, 0, 0] [1, 4, 16, 16] [1, 1, 1, 1] : memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> to memref<4x16x16xf16> | |
%25 = vector.transfer_read %subview_3[%c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<4x16x16xf16>, vector<4x16x16xf16> | |
%26 = vector.shape_cast %25 : vector<4x16x16xf16> to vector<4x16x16x1xf16> | |
%27 = vector.broadcast %26 : vector<4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%28 = vector.transpose %27, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%29 = vector.extract %28[0] : vector<4x16x16x1xf16> from vector<1x4x16x16x1xf16> | |
%subview_4 = memref.subview %subview[0, 0, 0, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%30 = vector.shape_cast %29 : vector<4x16x16x1xf16> to vector<4x16x16xf16> | |
vector.transfer_write %30, %subview_4[%arg3, %c0, %arg4, %c0] {in_bounds = [true, true, true]} : vector<4x16x16xf16>, memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUVectorTransferLowering (iree-llvmcpu-vector-transfer-lowering) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant dense<0.000000e+00> : vector<4x16x16xf16> | |
%c63 = arith.constant 63 : index | |
%c62 = arith.constant 62 : index | |
%c61 = arith.constant 61 : index | |
%c60 = arith.constant 60 : index | |
%c59 = arith.constant 59 : index | |
%c58 = arith.constant 58 : index | |
%c57 = arith.constant 57 : index | |
%c56 = arith.constant 56 : index | |
%c55 = arith.constant 55 : index | |
%c54 = arith.constant 54 : index | |
%c53 = arith.constant 53 : index | |
%c52 = arith.constant 52 : index | |
%c51 = arith.constant 51 : index | |
%c50 = arith.constant 50 : index | |
%c49 = arith.constant 49 : index | |
%c48 = arith.constant 48 : index | |
%c47 = arith.constant 47 : index | |
%c46 = arith.constant 46 : index | |
%c45 = arith.constant 45 : index | |
%c44 = arith.constant 44 : index | |
%c43 = arith.constant 43 : index | |
%c42 = arith.constant 42 : index | |
%c41 = arith.constant 41 : index | |
%c40 = arith.constant 40 : index | |
%c39 = arith.constant 39 : index | |
%c38 = arith.constant 38 : index | |
%c37 = arith.constant 37 : index | |
%c36 = arith.constant 36 : index | |
%c35 = arith.constant 35 : index | |
%c34 = arith.constant 34 : index | |
%c33 = arith.constant 33 : index | |
%c32 = arith.constant 32 : index | |
%c31 = arith.constant 31 : index | |
%c30 = arith.constant 30 : index | |
%c29 = arith.constant 29 : index | |
%c28 = arith.constant 28 : index | |
%c27 = arith.constant 27 : index | |
%c26 = arith.constant 26 : index | |
%c25 = arith.constant 25 : index | |
%c24 = arith.constant 24 : index | |
%c23 = arith.constant 23 : index | |
%c22 = arith.constant 22 : index | |
%c21 = arith.constant 21 : index | |
%c20 = arith.constant 20 : index | |
%c19 = arith.constant 19 : index | |
%c18 = arith.constant 18 : index | |
%c17 = arith.constant 17 : index | |
%c15 = arith.constant 15 : index | |
%c14 = arith.constant 14 : index | |
%c13 = arith.constant 13 : index | |
%c12 = arith.constant 12 : index | |
%c11 = arith.constant 11 : index | |
%c10 = arith.constant 10 : index | |
%c9 = arith.constant 9 : index | |
%c8 = arith.constant 8 : index | |
%c7 = arith.constant 7 : index | |
%c6 = arith.constant 6 : index | |
%c5 = arith.constant 5 : index | |
%c4 = arith.constant 4 : index | |
%c3 = arith.constant 3 : index | |
%c2 = arith.constant 2 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.load %subview_0[%c0, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%25 = vector.load %subview_0[%c1, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%26 = vector.load %subview_0[%c2, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%27 = vector.load %subview_0[%c3, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%28 = vector.load %subview_0[%c4, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%29 = vector.load %subview_0[%c5, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%30 = vector.load %subview_0[%c6, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%31 = vector.load %subview_0[%c7, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%32 = vector.load %subview_0[%c8, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%33 = vector.load %subview_0[%c9, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%34 = vector.load %subview_0[%c10, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%35 = vector.load %subview_0[%c11, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%36 = vector.load %subview_0[%c12, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%37 = vector.load %subview_0[%c13, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%38 = vector.load %subview_0[%c14, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%39 = vector.load %subview_0[%c15, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%40 = vector.load %subview_0[%c16, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%41 = vector.load %subview_0[%c17, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%42 = vector.load %subview_0[%c18, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%43 = vector.load %subview_0[%c19, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%44 = vector.load %subview_0[%c20, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%45 = vector.load %subview_0[%c21, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%46 = vector.load %subview_0[%c22, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%47 = vector.load %subview_0[%c23, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%48 = vector.load %subview_0[%c24, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%49 = vector.load %subview_0[%c25, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%50 = vector.load %subview_0[%c26, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%51 = vector.load %subview_0[%c27, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%52 = vector.load %subview_0[%c28, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%53 = vector.load %subview_0[%c29, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%54 = vector.load %subview_0[%c30, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%55 = vector.load %subview_0[%c31, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%56 = vector.load %subview_0[%c32, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%57 = vector.load %subview_0[%c33, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%58 = vector.load %subview_0[%c34, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%59 = vector.load %subview_0[%c35, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%60 = vector.load %subview_0[%c36, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%61 = vector.load %subview_0[%c37, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%62 = vector.load %subview_0[%c38, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%63 = vector.load %subview_0[%c39, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%64 = vector.load %subview_0[%c40, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%65 = vector.load %subview_0[%c41, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%66 = vector.load %subview_0[%c42, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%67 = vector.load %subview_0[%c43, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%68 = vector.load %subview_0[%c44, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%69 = vector.load %subview_0[%c45, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%70 = vector.load %subview_0[%c46, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%71 = vector.load %subview_0[%c47, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%72 = vector.load %subview_0[%c48, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%73 = vector.load %subview_0[%c49, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%74 = vector.load %subview_0[%c50, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%75 = vector.load %subview_0[%c51, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%76 = vector.load %subview_0[%c52, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%77 = vector.load %subview_0[%c53, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%78 = vector.load %subview_0[%c54, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%79 = vector.load %subview_0[%c55, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%80 = vector.load %subview_0[%c56, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%81 = vector.load %subview_0[%c57, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%82 = vector.load %subview_0[%c58, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%83 = vector.load %subview_0[%c59, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%84 = vector.load %subview_0[%c60, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%85 = vector.load %subview_0[%c61, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%86 = vector.load %subview_0[%c62, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%87 = vector.load %subview_0[%c63, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%subview_1 = memref.subview %alloca[0, 0, 0] [1, 64, 16] [1, 1, 1] : memref<1x64x16xf16> to memref<64x16xf16> | |
vector.store %24, %subview_1[%c0, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %25, %subview_1[%c1, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %26, %subview_1[%c2, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %27, %subview_1[%c3, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %28, %subview_1[%c4, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %29, %subview_1[%c5, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %30, %subview_1[%c6, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %31, %subview_1[%c7, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %32, %subview_1[%c8, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %33, %subview_1[%c9, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %34, %subview_1[%c10, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %35, %subview_1[%c11, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %36, %subview_1[%c12, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %37, %subview_1[%c13, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %38, %subview_1[%c14, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %39, %subview_1[%c15, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %40, %subview_1[%c16, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %41, %subview_1[%c17, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %42, %subview_1[%c18, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %43, %subview_1[%c19, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %44, %subview_1[%c20, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %45, %subview_1[%c21, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %46, %subview_1[%c22, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %47, %subview_1[%c23, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %48, %subview_1[%c24, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %49, %subview_1[%c25, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %50, %subview_1[%c26, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %51, %subview_1[%c27, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %52, %subview_1[%c28, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %53, %subview_1[%c29, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %54, %subview_1[%c30, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %55, %subview_1[%c31, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %56, %subview_1[%c32, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %57, %subview_1[%c33, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %58, %subview_1[%c34, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %59, %subview_1[%c35, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %60, %subview_1[%c36, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %61, %subview_1[%c37, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %62, %subview_1[%c38, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %63, %subview_1[%c39, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %64, %subview_1[%c40, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %65, %subview_1[%c41, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %66, %subview_1[%c42, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %67, %subview_1[%c43, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %68, %subview_1[%c44, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %69, %subview_1[%c45, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %70, %subview_1[%c46, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %71, %subview_1[%c47, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %72, %subview_1[%c48, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %73, %subview_1[%c49, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %74, %subview_1[%c50, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %75, %subview_1[%c51, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %76, %subview_1[%c52, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %77, %subview_1[%c53, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %78, %subview_1[%c54, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %79, %subview_1[%c55, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %80, %subview_1[%c56, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %81, %subview_1[%c57, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %82, %subview_1[%c58, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %83, %subview_1[%c59, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %84, %subview_1[%c60, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %85, %subview_1[%c61, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %86, %subview_1[%c62, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %87, %subview_1[%c63, %c0] : memref<64x16xf16>, vector<16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%subview_2 = memref.subview %expand_shape[0, 0, 0, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : memref<1x4x16x16x1xf16> to memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> | |
%subview_3 = memref.subview %subview_2[0, 0, 0, 0] [1, 4, 16, 16] [1, 1, 1, 1] : memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> to memref<4x16x16xf16> | |
%88 = vector.load %subview_3[%c0, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%89 = vector.insert %88, %cst [0, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%90 = vector.load %subview_3[%c0, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%91 = vector.insert %90, %89 [0, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%92 = vector.load %subview_3[%c0, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%93 = vector.insert %92, %91 [0, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%94 = vector.load %subview_3[%c0, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%95 = vector.insert %94, %93 [0, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%96 = vector.load %subview_3[%c0, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%97 = vector.insert %96, %95 [0, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%98 = vector.load %subview_3[%c0, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%99 = vector.insert %98, %97 [0, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%100 = vector.load %subview_3[%c0, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%101 = vector.insert %100, %99 [0, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%102 = vector.load %subview_3[%c0, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%103 = vector.insert %102, %101 [0, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%104 = vector.load %subview_3[%c0, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%105 = vector.insert %104, %103 [0, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%106 = vector.load %subview_3[%c0, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%107 = vector.insert %106, %105 [0, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%108 = vector.load %subview_3[%c0, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%109 = vector.insert %108, %107 [0, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%110 = vector.load %subview_3[%c0, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%111 = vector.insert %110, %109 [0, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%112 = vector.load %subview_3[%c0, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%113 = vector.insert %112, %111 [0, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%114 = vector.load %subview_3[%c0, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%115 = vector.insert %114, %113 [0, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%116 = vector.load %subview_3[%c0, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%117 = vector.insert %116, %115 [0, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%118 = vector.load %subview_3[%c0, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%119 = vector.insert %118, %117 [0, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%120 = vector.load %subview_3[%c1, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%121 = vector.insert %120, %119 [1, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%122 = vector.load %subview_3[%c1, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%123 = vector.insert %122, %121 [1, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%124 = vector.load %subview_3[%c1, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%125 = vector.insert %124, %123 [1, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%126 = vector.load %subview_3[%c1, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%127 = vector.insert %126, %125 [1, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%128 = vector.load %subview_3[%c1, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%129 = vector.insert %128, %127 [1, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%130 = vector.load %subview_3[%c1, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%131 = vector.insert %130, %129 [1, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%132 = vector.load %subview_3[%c1, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%133 = vector.insert %132, %131 [1, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%134 = vector.load %subview_3[%c1, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%135 = vector.insert %134, %133 [1, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%136 = vector.load %subview_3[%c1, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%137 = vector.insert %136, %135 [1, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%138 = vector.load %subview_3[%c1, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%139 = vector.insert %138, %137 [1, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%140 = vector.load %subview_3[%c1, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%141 = vector.insert %140, %139 [1, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%142 = vector.load %subview_3[%c1, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%143 = vector.insert %142, %141 [1, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%144 = vector.load %subview_3[%c1, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%145 = vector.insert %144, %143 [1, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%146 = vector.load %subview_3[%c1, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%147 = vector.insert %146, %145 [1, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%148 = vector.load %subview_3[%c1, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%149 = vector.insert %148, %147 [1, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%150 = vector.load %subview_3[%c1, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%151 = vector.insert %150, %149 [1, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%152 = vector.load %subview_3[%c2, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%153 = vector.insert %152, %151 [2, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%154 = vector.load %subview_3[%c2, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%155 = vector.insert %154, %153 [2, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%156 = vector.load %subview_3[%c2, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%157 = vector.insert %156, %155 [2, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%158 = vector.load %subview_3[%c2, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%159 = vector.insert %158, %157 [2, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%160 = vector.load %subview_3[%c2, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%161 = vector.insert %160, %159 [2, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%162 = vector.load %subview_3[%c2, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%163 = vector.insert %162, %161 [2, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%164 = vector.load %subview_3[%c2, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%165 = vector.insert %164, %163 [2, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%166 = vector.load %subview_3[%c2, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%167 = vector.insert %166, %165 [2, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%168 = vector.load %subview_3[%c2, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%169 = vector.insert %168, %167 [2, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%170 = vector.load %subview_3[%c2, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%171 = vector.insert %170, %169 [2, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%172 = vector.load %subview_3[%c2, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%173 = vector.insert %172, %171 [2, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%174 = vector.load %subview_3[%c2, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%175 = vector.insert %174, %173 [2, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%176 = vector.load %subview_3[%c2, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%177 = vector.insert %176, %175 [2, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%178 = vector.load %subview_3[%c2, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%179 = vector.insert %178, %177 [2, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%180 = vector.load %subview_3[%c2, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%181 = vector.insert %180, %179 [2, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%182 = vector.load %subview_3[%c2, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%183 = vector.insert %182, %181 [2, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%184 = vector.load %subview_3[%c3, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%185 = vector.insert %184, %183 [3, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%186 = vector.load %subview_3[%c3, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%187 = vector.insert %186, %185 [3, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%188 = vector.load %subview_3[%c3, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%189 = vector.insert %188, %187 [3, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%190 = vector.load %subview_3[%c3, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%191 = vector.insert %190, %189 [3, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%192 = vector.load %subview_3[%c3, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%193 = vector.insert %192, %191 [3, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%194 = vector.load %subview_3[%c3, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%195 = vector.insert %194, %193 [3, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%196 = vector.load %subview_3[%c3, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%197 = vector.insert %196, %195 [3, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%198 = vector.load %subview_3[%c3, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%199 = vector.insert %198, %197 [3, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%200 = vector.load %subview_3[%c3, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%201 = vector.insert %200, %199 [3, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%202 = vector.load %subview_3[%c3, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%203 = vector.insert %202, %201 [3, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%204 = vector.load %subview_3[%c3, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%205 = vector.insert %204, %203 [3, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%206 = vector.load %subview_3[%c3, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%207 = vector.insert %206, %205 [3, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%208 = vector.load %subview_3[%c3, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%209 = vector.insert %208, %207 [3, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%210 = vector.load %subview_3[%c3, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%211 = vector.insert %210, %209 [3, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%212 = vector.load %subview_3[%c3, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%213 = vector.insert %212, %211 [3, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%214 = vector.load %subview_3[%c3, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%215 = vector.insert %214, %213 [3, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%216 = vector.shape_cast %215 : vector<4x16x16xf16> to vector<4x16x16x1xf16> | |
%217 = vector.broadcast %216 : vector<4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%218 = vector.transpose %217, [0, 1, 3, 2, 4] : vector<1x4x16x16x1xf16> to vector<1x4x16x16x1xf16> | |
%219 = vector.extract %218[0] : vector<4x16x16x1xf16> from vector<1x4x16x16x1xf16> | |
%subview_4 = memref.subview %subview[0, 0, 0, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%220 = vector.shape_cast %219 : vector<4x16x16x1xf16> to vector<4x16x16xf16> | |
%221 = vector.extract %220[0, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %221, %subview_4[%arg3, %c0, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%222 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%223 = vector.extract %220[0, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %223, %subview_4[%arg3, %c0, %222, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%224 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%225 = vector.extract %220[0, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %225, %subview_4[%arg3, %c0, %224, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%226 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%227 = vector.extract %220[0, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %227, %subview_4[%arg3, %c0, %226, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%228 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%229 = vector.extract %220[0, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %229, %subview_4[%arg3, %c0, %228, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%230 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%231 = vector.extract %220[0, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %231, %subview_4[%arg3, %c0, %230, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%232 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%233 = vector.extract %220[0, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %233, %subview_4[%arg3, %c0, %232, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%234 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%235 = vector.extract %220[0, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %235, %subview_4[%arg3, %c0, %234, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%236 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%237 = vector.extract %220[0, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %237, %subview_4[%arg3, %c0, %236, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%238 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%239 = vector.extract %220[0, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %239, %subview_4[%arg3, %c0, %238, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%240 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%241 = vector.extract %220[0, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %241, %subview_4[%arg3, %c0, %240, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%242 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%243 = vector.extract %220[0, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %243, %subview_4[%arg3, %c0, %242, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%244 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%245 = vector.extract %220[0, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %245, %subview_4[%arg3, %c0, %244, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%246 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%247 = vector.extract %220[0, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %247, %subview_4[%arg3, %c0, %246, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%248 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%249 = vector.extract %220[0, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %249, %subview_4[%arg3, %c0, %248, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%250 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%251 = vector.extract %220[0, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %251, %subview_4[%arg3, %c0, %250, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%252 = vector.extract %220[1, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %252, %subview_4[%arg3, %c1, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%253 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%254 = vector.extract %220[1, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %254, %subview_4[%arg3, %c1, %253, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%255 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%256 = vector.extract %220[1, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %256, %subview_4[%arg3, %c1, %255, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%257 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%258 = vector.extract %220[1, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %258, %subview_4[%arg3, %c1, %257, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%259 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%260 = vector.extract %220[1, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %260, %subview_4[%arg3, %c1, %259, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%261 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%262 = vector.extract %220[1, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %262, %subview_4[%arg3, %c1, %261, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%263 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%264 = vector.extract %220[1, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %264, %subview_4[%arg3, %c1, %263, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%265 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%266 = vector.extract %220[1, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %266, %subview_4[%arg3, %c1, %265, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%267 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%268 = vector.extract %220[1, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %268, %subview_4[%arg3, %c1, %267, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%269 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%270 = vector.extract %220[1, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %270, %subview_4[%arg3, %c1, %269, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%271 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%272 = vector.extract %220[1, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %272, %subview_4[%arg3, %c1, %271, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%273 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%274 = vector.extract %220[1, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %274, %subview_4[%arg3, %c1, %273, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%275 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%276 = vector.extract %220[1, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %276, %subview_4[%arg3, %c1, %275, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%277 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%278 = vector.extract %220[1, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %278, %subview_4[%arg3, %c1, %277, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%279 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%280 = vector.extract %220[1, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %280, %subview_4[%arg3, %c1, %279, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%281 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%282 = vector.extract %220[1, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %282, %subview_4[%arg3, %c1, %281, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%283 = vector.extract %220[2, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %283, %subview_4[%arg3, %c2, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%284 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%285 = vector.extract %220[2, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %285, %subview_4[%arg3, %c2, %284, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%286 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%287 = vector.extract %220[2, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %287, %subview_4[%arg3, %c2, %286, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%288 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%289 = vector.extract %220[2, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %289, %subview_4[%arg3, %c2, %288, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%290 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%291 = vector.extract %220[2, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %291, %subview_4[%arg3, %c2, %290, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%292 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%293 = vector.extract %220[2, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %293, %subview_4[%arg3, %c2, %292, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%294 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%295 = vector.extract %220[2, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %295, %subview_4[%arg3, %c2, %294, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%296 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%297 = vector.extract %220[2, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %297, %subview_4[%arg3, %c2, %296, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%298 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%299 = vector.extract %220[2, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %299, %subview_4[%arg3, %c2, %298, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%300 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%301 = vector.extract %220[2, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %301, %subview_4[%arg3, %c2, %300, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%302 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%303 = vector.extract %220[2, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %303, %subview_4[%arg3, %c2, %302, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%304 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%305 = vector.extract %220[2, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %305, %subview_4[%arg3, %c2, %304, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%306 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%307 = vector.extract %220[2, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %307, %subview_4[%arg3, %c2, %306, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%308 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%309 = vector.extract %220[2, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %309, %subview_4[%arg3, %c2, %308, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%310 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%311 = vector.extract %220[2, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %311, %subview_4[%arg3, %c2, %310, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%312 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%313 = vector.extract %220[2, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %313, %subview_4[%arg3, %c2, %312, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%314 = vector.extract %220[3, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %314, %subview_4[%arg3, %c3, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%315 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%316 = vector.extract %220[3, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %316, %subview_4[%arg3, %c3, %315, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%317 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%318 = vector.extract %220[3, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %318, %subview_4[%arg3, %c3, %317, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%319 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%320 = vector.extract %220[3, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %320, %subview_4[%arg3, %c3, %319, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%321 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%322 = vector.extract %220[3, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %322, %subview_4[%arg3, %c3, %321, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%323 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%324 = vector.extract %220[3, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %324, %subview_4[%arg3, %c3, %323, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%325 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%326 = vector.extract %220[3, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %326, %subview_4[%arg3, %c3, %325, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%327 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%328 = vector.extract %220[3, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %328, %subview_4[%arg3, %c3, %327, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%329 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%330 = vector.extract %220[3, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %330, %subview_4[%arg3, %c3, %329, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%331 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%332 = vector.extract %220[3, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %332, %subview_4[%arg3, %c3, %331, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%333 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%334 = vector.extract %220[3, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %334, %subview_4[%arg3, %c3, %333, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%335 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%336 = vector.extract %220[3, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %336, %subview_4[%arg3, %c3, %335, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%337 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%338 = vector.extract %220[3, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %338, %subview_4[%arg3, %c3, %337, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%339 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%340 = vector.extract %220[3, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %340, %subview_4[%arg3, %c3, %339, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%341 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%342 = vector.extract %220[3, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %342, %subview_4[%arg3, %c3, %341, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%343 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%344 = vector.extract %220[3, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %344, %subview_4[%arg3, %c3, %343, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUVectorTransposeLowering (iree-llvmcpu-vector-transpose-lowering) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant dense<0.000000e+00> : vector<1x4x16x16x1xf16> | |
%cst_0 = arith.constant dense<0.000000e+00> : vector<4x16x16xf16> | |
%c63 = arith.constant 63 : index | |
%c62 = arith.constant 62 : index | |
%c61 = arith.constant 61 : index | |
%c60 = arith.constant 60 : index | |
%c59 = arith.constant 59 : index | |
%c58 = arith.constant 58 : index | |
%c57 = arith.constant 57 : index | |
%c56 = arith.constant 56 : index | |
%c55 = arith.constant 55 : index | |
%c54 = arith.constant 54 : index | |
%c53 = arith.constant 53 : index | |
%c52 = arith.constant 52 : index | |
%c51 = arith.constant 51 : index | |
%c50 = arith.constant 50 : index | |
%c49 = arith.constant 49 : index | |
%c48 = arith.constant 48 : index | |
%c47 = arith.constant 47 : index | |
%c46 = arith.constant 46 : index | |
%c45 = arith.constant 45 : index | |
%c44 = arith.constant 44 : index | |
%c43 = arith.constant 43 : index | |
%c42 = arith.constant 42 : index | |
%c41 = arith.constant 41 : index | |
%c40 = arith.constant 40 : index | |
%c39 = arith.constant 39 : index | |
%c38 = arith.constant 38 : index | |
%c37 = arith.constant 37 : index | |
%c36 = arith.constant 36 : index | |
%c35 = arith.constant 35 : index | |
%c34 = arith.constant 34 : index | |
%c33 = arith.constant 33 : index | |
%c32 = arith.constant 32 : index | |
%c31 = arith.constant 31 : index | |
%c30 = arith.constant 30 : index | |
%c29 = arith.constant 29 : index | |
%c28 = arith.constant 28 : index | |
%c27 = arith.constant 27 : index | |
%c26 = arith.constant 26 : index | |
%c25 = arith.constant 25 : index | |
%c24 = arith.constant 24 : index | |
%c23 = arith.constant 23 : index | |
%c22 = arith.constant 22 : index | |
%c21 = arith.constant 21 : index | |
%c20 = arith.constant 20 : index | |
%c19 = arith.constant 19 : index | |
%c18 = arith.constant 18 : index | |
%c17 = arith.constant 17 : index | |
%c15 = arith.constant 15 : index | |
%c14 = arith.constant 14 : index | |
%c13 = arith.constant 13 : index | |
%c12 = arith.constant 12 : index | |
%c11 = arith.constant 11 : index | |
%c10 = arith.constant 10 : index | |
%c9 = arith.constant 9 : index | |
%c8 = arith.constant 8 : index | |
%c7 = arith.constant 7 : index | |
%c6 = arith.constant 6 : index | |
%c5 = arith.constant 5 : index | |
%c4 = arith.constant 4 : index | |
%c3 = arith.constant 3 : index | |
%c2 = arith.constant 2 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_1 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.load %subview_1[%c0, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%25 = vector.load %subview_1[%c1, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%26 = vector.load %subview_1[%c2, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%27 = vector.load %subview_1[%c3, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%28 = vector.load %subview_1[%c4, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%29 = vector.load %subview_1[%c5, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%30 = vector.load %subview_1[%c6, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%31 = vector.load %subview_1[%c7, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%32 = vector.load %subview_1[%c8, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%33 = vector.load %subview_1[%c9, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%34 = vector.load %subview_1[%c10, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%35 = vector.load %subview_1[%c11, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%36 = vector.load %subview_1[%c12, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%37 = vector.load %subview_1[%c13, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%38 = vector.load %subview_1[%c14, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%39 = vector.load %subview_1[%c15, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%40 = vector.load %subview_1[%c16, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%41 = vector.load %subview_1[%c17, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%42 = vector.load %subview_1[%c18, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%43 = vector.load %subview_1[%c19, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%44 = vector.load %subview_1[%c20, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%45 = vector.load %subview_1[%c21, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%46 = vector.load %subview_1[%c22, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%47 = vector.load %subview_1[%c23, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%48 = vector.load %subview_1[%c24, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%49 = vector.load %subview_1[%c25, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%50 = vector.load %subview_1[%c26, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%51 = vector.load %subview_1[%c27, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%52 = vector.load %subview_1[%c28, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%53 = vector.load %subview_1[%c29, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%54 = vector.load %subview_1[%c30, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%55 = vector.load %subview_1[%c31, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%56 = vector.load %subview_1[%c32, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%57 = vector.load %subview_1[%c33, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%58 = vector.load %subview_1[%c34, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%59 = vector.load %subview_1[%c35, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%60 = vector.load %subview_1[%c36, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%61 = vector.load %subview_1[%c37, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%62 = vector.load %subview_1[%c38, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%63 = vector.load %subview_1[%c39, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%64 = vector.load %subview_1[%c40, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%65 = vector.load %subview_1[%c41, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%66 = vector.load %subview_1[%c42, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%67 = vector.load %subview_1[%c43, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%68 = vector.load %subview_1[%c44, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%69 = vector.load %subview_1[%c45, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%70 = vector.load %subview_1[%c46, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%71 = vector.load %subview_1[%c47, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%72 = vector.load %subview_1[%c48, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%73 = vector.load %subview_1[%c49, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%74 = vector.load %subview_1[%c50, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%75 = vector.load %subview_1[%c51, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%76 = vector.load %subview_1[%c52, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%77 = vector.load %subview_1[%c53, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%78 = vector.load %subview_1[%c54, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%79 = vector.load %subview_1[%c55, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%80 = vector.load %subview_1[%c56, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%81 = vector.load %subview_1[%c57, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%82 = vector.load %subview_1[%c58, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%83 = vector.load %subview_1[%c59, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%84 = vector.load %subview_1[%c60, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%85 = vector.load %subview_1[%c61, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%86 = vector.load %subview_1[%c62, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%87 = vector.load %subview_1[%c63, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%subview_2 = memref.subview %alloca[0, 0, 0] [1, 64, 16] [1, 1, 1] : memref<1x64x16xf16> to memref<64x16xf16> | |
vector.store %24, %subview_2[%c0, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %25, %subview_2[%c1, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %26, %subview_2[%c2, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %27, %subview_2[%c3, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %28, %subview_2[%c4, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %29, %subview_2[%c5, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %30, %subview_2[%c6, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %31, %subview_2[%c7, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %32, %subview_2[%c8, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %33, %subview_2[%c9, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %34, %subview_2[%c10, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %35, %subview_2[%c11, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %36, %subview_2[%c12, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %37, %subview_2[%c13, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %38, %subview_2[%c14, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %39, %subview_2[%c15, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %40, %subview_2[%c16, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %41, %subview_2[%c17, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %42, %subview_2[%c18, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %43, %subview_2[%c19, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %44, %subview_2[%c20, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %45, %subview_2[%c21, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %46, %subview_2[%c22, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %47, %subview_2[%c23, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %48, %subview_2[%c24, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %49, %subview_2[%c25, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %50, %subview_2[%c26, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %51, %subview_2[%c27, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %52, %subview_2[%c28, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %53, %subview_2[%c29, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %54, %subview_2[%c30, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %55, %subview_2[%c31, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %56, %subview_2[%c32, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %57, %subview_2[%c33, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %58, %subview_2[%c34, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %59, %subview_2[%c35, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %60, %subview_2[%c36, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %61, %subview_2[%c37, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %62, %subview_2[%c38, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %63, %subview_2[%c39, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %64, %subview_2[%c40, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %65, %subview_2[%c41, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %66, %subview_2[%c42, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %67, %subview_2[%c43, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %68, %subview_2[%c44, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %69, %subview_2[%c45, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %70, %subview_2[%c46, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %71, %subview_2[%c47, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %72, %subview_2[%c48, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %73, %subview_2[%c49, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %74, %subview_2[%c50, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %75, %subview_2[%c51, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %76, %subview_2[%c52, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %77, %subview_2[%c53, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %78, %subview_2[%c54, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %79, %subview_2[%c55, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %80, %subview_2[%c56, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %81, %subview_2[%c57, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %82, %subview_2[%c58, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %83, %subview_2[%c59, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %84, %subview_2[%c60, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %85, %subview_2[%c61, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %86, %subview_2[%c62, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %87, %subview_2[%c63, %c0] : memref<64x16xf16>, vector<16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%subview_3 = memref.subview %expand_shape[0, 0, 0, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : memref<1x4x16x16x1xf16> to memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> | |
%subview_4 = memref.subview %subview_3[0, 0, 0, 0] [1, 4, 16, 16] [1, 1, 1, 1] : memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> to memref<4x16x16xf16> | |
%88 = vector.load %subview_4[%c0, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%89 = vector.insert %88, %cst_0 [0, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%90 = vector.load %subview_4[%c0, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%91 = vector.insert %90, %89 [0, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%92 = vector.load %subview_4[%c0, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%93 = vector.insert %92, %91 [0, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%94 = vector.load %subview_4[%c0, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%95 = vector.insert %94, %93 [0, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%96 = vector.load %subview_4[%c0, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%97 = vector.insert %96, %95 [0, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%98 = vector.load %subview_4[%c0, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%99 = vector.insert %98, %97 [0, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%100 = vector.load %subview_4[%c0, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%101 = vector.insert %100, %99 [0, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%102 = vector.load %subview_4[%c0, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%103 = vector.insert %102, %101 [0, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%104 = vector.load %subview_4[%c0, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%105 = vector.insert %104, %103 [0, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%106 = vector.load %subview_4[%c0, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%107 = vector.insert %106, %105 [0, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%108 = vector.load %subview_4[%c0, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%109 = vector.insert %108, %107 [0, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%110 = vector.load %subview_4[%c0, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%111 = vector.insert %110, %109 [0, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%112 = vector.load %subview_4[%c0, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%113 = vector.insert %112, %111 [0, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%114 = vector.load %subview_4[%c0, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%115 = vector.insert %114, %113 [0, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%116 = vector.load %subview_4[%c0, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%117 = vector.insert %116, %115 [0, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%118 = vector.load %subview_4[%c0, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%119 = vector.insert %118, %117 [0, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%120 = vector.load %subview_4[%c1, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%121 = vector.insert %120, %119 [1, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%122 = vector.load %subview_4[%c1, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%123 = vector.insert %122, %121 [1, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%124 = vector.load %subview_4[%c1, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%125 = vector.insert %124, %123 [1, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%126 = vector.load %subview_4[%c1, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%127 = vector.insert %126, %125 [1, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%128 = vector.load %subview_4[%c1, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%129 = vector.insert %128, %127 [1, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%130 = vector.load %subview_4[%c1, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%131 = vector.insert %130, %129 [1, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%132 = vector.load %subview_4[%c1, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%133 = vector.insert %132, %131 [1, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%134 = vector.load %subview_4[%c1, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%135 = vector.insert %134, %133 [1, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%136 = vector.load %subview_4[%c1, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%137 = vector.insert %136, %135 [1, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%138 = vector.load %subview_4[%c1, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%139 = vector.insert %138, %137 [1, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%140 = vector.load %subview_4[%c1, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%141 = vector.insert %140, %139 [1, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%142 = vector.load %subview_4[%c1, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%143 = vector.insert %142, %141 [1, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%144 = vector.load %subview_4[%c1, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%145 = vector.insert %144, %143 [1, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%146 = vector.load %subview_4[%c1, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%147 = vector.insert %146, %145 [1, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%148 = vector.load %subview_4[%c1, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%149 = vector.insert %148, %147 [1, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%150 = vector.load %subview_4[%c1, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%151 = vector.insert %150, %149 [1, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%152 = vector.load %subview_4[%c2, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%153 = vector.insert %152, %151 [2, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%154 = vector.load %subview_4[%c2, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%155 = vector.insert %154, %153 [2, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%156 = vector.load %subview_4[%c2, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%157 = vector.insert %156, %155 [2, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%158 = vector.load %subview_4[%c2, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%159 = vector.insert %158, %157 [2, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%160 = vector.load %subview_4[%c2, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%161 = vector.insert %160, %159 [2, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%162 = vector.load %subview_4[%c2, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%163 = vector.insert %162, %161 [2, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%164 = vector.load %subview_4[%c2, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%165 = vector.insert %164, %163 [2, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%166 = vector.load %subview_4[%c2, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%167 = vector.insert %166, %165 [2, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%168 = vector.load %subview_4[%c2, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%169 = vector.insert %168, %167 [2, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%170 = vector.load %subview_4[%c2, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%171 = vector.insert %170, %169 [2, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%172 = vector.load %subview_4[%c2, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%173 = vector.insert %172, %171 [2, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%174 = vector.load %subview_4[%c2, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%175 = vector.insert %174, %173 [2, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%176 = vector.load %subview_4[%c2, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%177 = vector.insert %176, %175 [2, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%178 = vector.load %subview_4[%c2, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%179 = vector.insert %178, %177 [2, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%180 = vector.load %subview_4[%c2, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%181 = vector.insert %180, %179 [2, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%182 = vector.load %subview_4[%c2, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%183 = vector.insert %182, %181 [2, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%184 = vector.load %subview_4[%c3, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%185 = vector.insert %184, %183 [3, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%186 = vector.load %subview_4[%c3, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%187 = vector.insert %186, %185 [3, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%188 = vector.load %subview_4[%c3, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%189 = vector.insert %188, %187 [3, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%190 = vector.load %subview_4[%c3, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%191 = vector.insert %190, %189 [3, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%192 = vector.load %subview_4[%c3, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%193 = vector.insert %192, %191 [3, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%194 = vector.load %subview_4[%c3, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%195 = vector.insert %194, %193 [3, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%196 = vector.load %subview_4[%c3, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%197 = vector.insert %196, %195 [3, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%198 = vector.load %subview_4[%c3, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%199 = vector.insert %198, %197 [3, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%200 = vector.load %subview_4[%c3, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%201 = vector.insert %200, %199 [3, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%202 = vector.load %subview_4[%c3, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%203 = vector.insert %202, %201 [3, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%204 = vector.load %subview_4[%c3, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%205 = vector.insert %204, %203 [3, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%206 = vector.load %subview_4[%c3, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%207 = vector.insert %206, %205 [3, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%208 = vector.load %subview_4[%c3, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%209 = vector.insert %208, %207 [3, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%210 = vector.load %subview_4[%c3, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%211 = vector.insert %210, %209 [3, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%212 = vector.load %subview_4[%c3, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%213 = vector.insert %212, %211 [3, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%214 = vector.load %subview_4[%c3, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%215 = vector.insert %214, %213 [3, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%216 = vector.shape_cast %215 : vector<4x16x16xf16> to vector<4x16x16x1xf16> | |
%217 = vector.extract %216[0, 0, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%218 = vector.insert %217, %cst [0, 0, 0, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%219 = vector.extract %216[0, 0, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%220 = vector.insert %219, %218 [0, 0, 1, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%221 = vector.extract %216[0, 0, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%222 = vector.insert %221, %220 [0, 0, 2, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%223 = vector.extract %216[0, 0, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%224 = vector.insert %223, %222 [0, 0, 3, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%225 = vector.extract %216[0, 0, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%226 = vector.insert %225, %224 [0, 0, 4, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%227 = vector.extract %216[0, 0, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%228 = vector.insert %227, %226 [0, 0, 5, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%229 = vector.extract %216[0, 0, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%230 = vector.insert %229, %228 [0, 0, 6, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%231 = vector.extract %216[0, 0, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%232 = vector.insert %231, %230 [0, 0, 7, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%233 = vector.extract %216[0, 0, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%234 = vector.insert %233, %232 [0, 0, 8, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%235 = vector.extract %216[0, 0, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%236 = vector.insert %235, %234 [0, 0, 9, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%237 = vector.extract %216[0, 0, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%238 = vector.insert %237, %236 [0, 0, 10, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%239 = vector.extract %216[0, 0, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%240 = vector.insert %239, %238 [0, 0, 11, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%241 = vector.extract %216[0, 0, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%242 = vector.insert %241, %240 [0, 0, 12, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%243 = vector.extract %216[0, 0, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%244 = vector.insert %243, %242 [0, 0, 13, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%245 = vector.extract %216[0, 0, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%246 = vector.insert %245, %244 [0, 0, 14, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%247 = vector.extract %216[0, 0, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%248 = vector.insert %247, %246 [0, 0, 15, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%249 = vector.extract %216[0, 1, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%250 = vector.insert %249, %248 [0, 0, 0, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%251 = vector.extract %216[0, 1, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%252 = vector.insert %251, %250 [0, 0, 1, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%253 = vector.extract %216[0, 1, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%254 = vector.insert %253, %252 [0, 0, 2, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%255 = vector.extract %216[0, 1, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%256 = vector.insert %255, %254 [0, 0, 3, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%257 = vector.extract %216[0, 1, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%258 = vector.insert %257, %256 [0, 0, 4, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%259 = vector.extract %216[0, 1, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%260 = vector.insert %259, %258 [0, 0, 5, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%261 = vector.extract %216[0, 1, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%262 = vector.insert %261, %260 [0, 0, 6, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%263 = vector.extract %216[0, 1, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%264 = vector.insert %263, %262 [0, 0, 7, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%265 = vector.extract %216[0, 1, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%266 = vector.insert %265, %264 [0, 0, 8, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%267 = vector.extract %216[0, 1, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%268 = vector.insert %267, %266 [0, 0, 9, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%269 = vector.extract %216[0, 1, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%270 = vector.insert %269, %268 [0, 0, 10, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%271 = vector.extract %216[0, 1, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%272 = vector.insert %271, %270 [0, 0, 11, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%273 = vector.extract %216[0, 1, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%274 = vector.insert %273, %272 [0, 0, 12, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%275 = vector.extract %216[0, 1, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%276 = vector.insert %275, %274 [0, 0, 13, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%277 = vector.extract %216[0, 1, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%278 = vector.insert %277, %276 [0, 0, 14, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%279 = vector.extract %216[0, 1, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%280 = vector.insert %279, %278 [0, 0, 15, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%281 = vector.extract %216[0, 2, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%282 = vector.insert %281, %280 [0, 0, 0, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%283 = vector.extract %216[0, 2, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%284 = vector.insert %283, %282 [0, 0, 1, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%285 = vector.extract %216[0, 2, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%286 = vector.insert %285, %284 [0, 0, 2, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%287 = vector.extract %216[0, 2, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%288 = vector.insert %287, %286 [0, 0, 3, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%289 = vector.extract %216[0, 2, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%290 = vector.insert %289, %288 [0, 0, 4, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%291 = vector.extract %216[0, 2, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%292 = vector.insert %291, %290 [0, 0, 5, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%293 = vector.extract %216[0, 2, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%294 = vector.insert %293, %292 [0, 0, 6, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%295 = vector.extract %216[0, 2, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%296 = vector.insert %295, %294 [0, 0, 7, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%297 = vector.extract %216[0, 2, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%298 = vector.insert %297, %296 [0, 0, 8, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%299 = vector.extract %216[0, 2, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%300 = vector.insert %299, %298 [0, 0, 9, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%301 = vector.extract %216[0, 2, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%302 = vector.insert %301, %300 [0, 0, 10, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%303 = vector.extract %216[0, 2, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%304 = vector.insert %303, %302 [0, 0, 11, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%305 = vector.extract %216[0, 2, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%306 = vector.insert %305, %304 [0, 0, 12, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%307 = vector.extract %216[0, 2, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%308 = vector.insert %307, %306 [0, 0, 13, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%309 = vector.extract %216[0, 2, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%310 = vector.insert %309, %308 [0, 0, 14, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%311 = vector.extract %216[0, 2, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%312 = vector.insert %311, %310 [0, 0, 15, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%313 = vector.extract %216[0, 3, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%314 = vector.insert %313, %312 [0, 0, 0, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%315 = vector.extract %216[0, 3, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%316 = vector.insert %315, %314 [0, 0, 1, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%317 = vector.extract %216[0, 3, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%318 = vector.insert %317, %316 [0, 0, 2, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%319 = vector.extract %216[0, 3, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%320 = vector.insert %319, %318 [0, 0, 3, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%321 = vector.extract %216[0, 3, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%322 = vector.insert %321, %320 [0, 0, 4, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%323 = vector.extract %216[0, 3, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%324 = vector.insert %323, %322 [0, 0, 5, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%325 = vector.extract %216[0, 3, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%326 = vector.insert %325, %324 [0, 0, 6, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%327 = vector.extract %216[0, 3, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%328 = vector.insert %327, %326 [0, 0, 7, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%329 = vector.extract %216[0, 3, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%330 = vector.insert %329, %328 [0, 0, 8, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%331 = vector.extract %216[0, 3, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%332 = vector.insert %331, %330 [0, 0, 9, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%333 = vector.extract %216[0, 3, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%334 = vector.insert %333, %332 [0, 0, 10, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%335 = vector.extract %216[0, 3, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%336 = vector.insert %335, %334 [0, 0, 11, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%337 = vector.extract %216[0, 3, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%338 = vector.insert %337, %336 [0, 0, 12, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%339 = vector.extract %216[0, 3, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%340 = vector.insert %339, %338 [0, 0, 13, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%341 = vector.extract %216[0, 3, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%342 = vector.insert %341, %340 [0, 0, 14, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%343 = vector.extract %216[0, 3, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%344 = vector.insert %343, %342 [0, 0, 15, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%345 = vector.extract %216[0, 4, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%346 = vector.insert %345, %344 [0, 0, 0, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%347 = vector.extract %216[0, 4, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%348 = vector.insert %347, %346 [0, 0, 1, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%349 = vector.extract %216[0, 4, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%350 = vector.insert %349, %348 [0, 0, 2, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%351 = vector.extract %216[0, 4, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%352 = vector.insert %351, %350 [0, 0, 3, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%353 = vector.extract %216[0, 4, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%354 = vector.insert %353, %352 [0, 0, 4, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%355 = vector.extract %216[0, 4, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%356 = vector.insert %355, %354 [0, 0, 5, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%357 = vector.extract %216[0, 4, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%358 = vector.insert %357, %356 [0, 0, 6, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%359 = vector.extract %216[0, 4, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%360 = vector.insert %359, %358 [0, 0, 7, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%361 = vector.extract %216[0, 4, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%362 = vector.insert %361, %360 [0, 0, 8, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%363 = vector.extract %216[0, 4, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%364 = vector.insert %363, %362 [0, 0, 9, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%365 = vector.extract %216[0, 4, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%366 = vector.insert %365, %364 [0, 0, 10, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%367 = vector.extract %216[0, 4, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%368 = vector.insert %367, %366 [0, 0, 11, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%369 = vector.extract %216[0, 4, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%370 = vector.insert %369, %368 [0, 0, 12, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%371 = vector.extract %216[0, 4, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%372 = vector.insert %371, %370 [0, 0, 13, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%373 = vector.extract %216[0, 4, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%374 = vector.insert %373, %372 [0, 0, 14, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%375 = vector.extract %216[0, 4, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%376 = vector.insert %375, %374 [0, 0, 15, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%377 = vector.extract %216[0, 5, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%378 = vector.insert %377, %376 [0, 0, 0, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%379 = vector.extract %216[0, 5, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%380 = vector.insert %379, %378 [0, 0, 1, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%381 = vector.extract %216[0, 5, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%382 = vector.insert %381, %380 [0, 0, 2, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%383 = vector.extract %216[0, 5, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%384 = vector.insert %383, %382 [0, 0, 3, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%385 = vector.extract %216[0, 5, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%386 = vector.insert %385, %384 [0, 0, 4, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%387 = vector.extract %216[0, 5, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%388 = vector.insert %387, %386 [0, 0, 5, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%389 = vector.extract %216[0, 5, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%390 = vector.insert %389, %388 [0, 0, 6, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%391 = vector.extract %216[0, 5, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%392 = vector.insert %391, %390 [0, 0, 7, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%393 = vector.extract %216[0, 5, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%394 = vector.insert %393, %392 [0, 0, 8, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%395 = vector.extract %216[0, 5, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%396 = vector.insert %395, %394 [0, 0, 9, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%397 = vector.extract %216[0, 5, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%398 = vector.insert %397, %396 [0, 0, 10, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%399 = vector.extract %216[0, 5, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%400 = vector.insert %399, %398 [0, 0, 11, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%401 = vector.extract %216[0, 5, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%402 = vector.insert %401, %400 [0, 0, 12, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%403 = vector.extract %216[0, 5, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%404 = vector.insert %403, %402 [0, 0, 13, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%405 = vector.extract %216[0, 5, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%406 = vector.insert %405, %404 [0, 0, 14, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%407 = vector.extract %216[0, 5, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%408 = vector.insert %407, %406 [0, 0, 15, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%409 = vector.extract %216[0, 6, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%410 = vector.insert %409, %408 [0, 0, 0, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%411 = vector.extract %216[0, 6, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%412 = vector.insert %411, %410 [0, 0, 1, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%413 = vector.extract %216[0, 6, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%414 = vector.insert %413, %412 [0, 0, 2, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%415 = vector.extract %216[0, 6, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%416 = vector.insert %415, %414 [0, 0, 3, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%417 = vector.extract %216[0, 6, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%418 = vector.insert %417, %416 [0, 0, 4, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%419 = vector.extract %216[0, 6, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%420 = vector.insert %419, %418 [0, 0, 5, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%421 = vector.extract %216[0, 6, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%422 = vector.insert %421, %420 [0, 0, 6, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%423 = vector.extract %216[0, 6, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%424 = vector.insert %423, %422 [0, 0, 7, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%425 = vector.extract %216[0, 6, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%426 = vector.insert %425, %424 [0, 0, 8, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%427 = vector.extract %216[0, 6, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%428 = vector.insert %427, %426 [0, 0, 9, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%429 = vector.extract %216[0, 6, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%430 = vector.insert %429, %428 [0, 0, 10, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%431 = vector.extract %216[0, 6, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%432 = vector.insert %431, %430 [0, 0, 11, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%433 = vector.extract %216[0, 6, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%434 = vector.insert %433, %432 [0, 0, 12, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%435 = vector.extract %216[0, 6, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%436 = vector.insert %435, %434 [0, 0, 13, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%437 = vector.extract %216[0, 6, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%438 = vector.insert %437, %436 [0, 0, 14, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%439 = vector.extract %216[0, 6, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%440 = vector.insert %439, %438 [0, 0, 15, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%441 = vector.extract %216[0, 7, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%442 = vector.insert %441, %440 [0, 0, 0, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%443 = vector.extract %216[0, 7, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%444 = vector.insert %443, %442 [0, 0, 1, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%445 = vector.extract %216[0, 7, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%446 = vector.insert %445, %444 [0, 0, 2, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%447 = vector.extract %216[0, 7, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%448 = vector.insert %447, %446 [0, 0, 3, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%449 = vector.extract %216[0, 7, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%450 = vector.insert %449, %448 [0, 0, 4, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%451 = vector.extract %216[0, 7, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%452 = vector.insert %451, %450 [0, 0, 5, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%453 = vector.extract %216[0, 7, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%454 = vector.insert %453, %452 [0, 0, 6, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%455 = vector.extract %216[0, 7, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%456 = vector.insert %455, %454 [0, 0, 7, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%457 = vector.extract %216[0, 7, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%458 = vector.insert %457, %456 [0, 0, 8, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%459 = vector.extract %216[0, 7, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%460 = vector.insert %459, %458 [0, 0, 9, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%461 = vector.extract %216[0, 7, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%462 = vector.insert %461, %460 [0, 0, 10, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%463 = vector.extract %216[0, 7, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%464 = vector.insert %463, %462 [0, 0, 11, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%465 = vector.extract %216[0, 7, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%466 = vector.insert %465, %464 [0, 0, 12, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%467 = vector.extract %216[0, 7, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%468 = vector.insert %467, %466 [0, 0, 13, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%469 = vector.extract %216[0, 7, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%470 = vector.insert %469, %468 [0, 0, 14, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%471 = vector.extract %216[0, 7, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%472 = vector.insert %471, %470 [0, 0, 15, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%473 = vector.extract %216[0, 8, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%474 = vector.insert %473, %472 [0, 0, 0, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%475 = vector.extract %216[0, 8, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%476 = vector.insert %475, %474 [0, 0, 1, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%477 = vector.extract %216[0, 8, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%478 = vector.insert %477, %476 [0, 0, 2, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%479 = vector.extract %216[0, 8, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%480 = vector.insert %479, %478 [0, 0, 3, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%481 = vector.extract %216[0, 8, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%482 = vector.insert %481, %480 [0, 0, 4, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%483 = vector.extract %216[0, 8, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%484 = vector.insert %483, %482 [0, 0, 5, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%485 = vector.extract %216[0, 8, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%486 = vector.insert %485, %484 [0, 0, 6, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%487 = vector.extract %216[0, 8, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%488 = vector.insert %487, %486 [0, 0, 7, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%489 = vector.extract %216[0, 8, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%490 = vector.insert %489, %488 [0, 0, 8, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%491 = vector.extract %216[0, 8, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%492 = vector.insert %491, %490 [0, 0, 9, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%493 = vector.extract %216[0, 8, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%494 = vector.insert %493, %492 [0, 0, 10, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%495 = vector.extract %216[0, 8, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%496 = vector.insert %495, %494 [0, 0, 11, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%497 = vector.extract %216[0, 8, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%498 = vector.insert %497, %496 [0, 0, 12, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%499 = vector.extract %216[0, 8, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%500 = vector.insert %499, %498 [0, 0, 13, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%501 = vector.extract %216[0, 8, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%502 = vector.insert %501, %500 [0, 0, 14, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%503 = vector.extract %216[0, 8, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%504 = vector.insert %503, %502 [0, 0, 15, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%505 = vector.extract %216[0, 9, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%506 = vector.insert %505, %504 [0, 0, 0, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%507 = vector.extract %216[0, 9, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%508 = vector.insert %507, %506 [0, 0, 1, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%509 = vector.extract %216[0, 9, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%510 = vector.insert %509, %508 [0, 0, 2, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%511 = vector.extract %216[0, 9, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%512 = vector.insert %511, %510 [0, 0, 3, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%513 = vector.extract %216[0, 9, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%514 = vector.insert %513, %512 [0, 0, 4, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%515 = vector.extract %216[0, 9, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%516 = vector.insert %515, %514 [0, 0, 5, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%517 = vector.extract %216[0, 9, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%518 = vector.insert %517, %516 [0, 0, 6, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%519 = vector.extract %216[0, 9, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%520 = vector.insert %519, %518 [0, 0, 7, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%521 = vector.extract %216[0, 9, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%522 = vector.insert %521, %520 [0, 0, 8, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%523 = vector.extract %216[0, 9, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%524 = vector.insert %523, %522 [0, 0, 9, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%525 = vector.extract %216[0, 9, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%526 = vector.insert %525, %524 [0, 0, 10, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%527 = vector.extract %216[0, 9, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%528 = vector.insert %527, %526 [0, 0, 11, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%529 = vector.extract %216[0, 9, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%530 = vector.insert %529, %528 [0, 0, 12, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%531 = vector.extract %216[0, 9, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%532 = vector.insert %531, %530 [0, 0, 13, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%533 = vector.extract %216[0, 9, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%534 = vector.insert %533, %532 [0, 0, 14, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%535 = vector.extract %216[0, 9, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%536 = vector.insert %535, %534 [0, 0, 15, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%537 = vector.extract %216[0, 10, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%538 = vector.insert %537, %536 [0, 0, 0, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%539 = vector.extract %216[0, 10, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%540 = vector.insert %539, %538 [0, 0, 1, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%541 = vector.extract %216[0, 10, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%542 = vector.insert %541, %540 [0, 0, 2, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%543 = vector.extract %216[0, 10, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%544 = vector.insert %543, %542 [0, 0, 3, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%545 = vector.extract %216[0, 10, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%546 = vector.insert %545, %544 [0, 0, 4, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%547 = vector.extract %216[0, 10, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%548 = vector.insert %547, %546 [0, 0, 5, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%549 = vector.extract %216[0, 10, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%550 = vector.insert %549, %548 [0, 0, 6, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%551 = vector.extract %216[0, 10, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%552 = vector.insert %551, %550 [0, 0, 7, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%553 = vector.extract %216[0, 10, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%554 = vector.insert %553, %552 [0, 0, 8, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%555 = vector.extract %216[0, 10, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%556 = vector.insert %555, %554 [0, 0, 9, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%557 = vector.extract %216[0, 10, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%558 = vector.insert %557, %556 [0, 0, 10, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%559 = vector.extract %216[0, 10, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%560 = vector.insert %559, %558 [0, 0, 11, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%561 = vector.extract %216[0, 10, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%562 = vector.insert %561, %560 [0, 0, 12, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%563 = vector.extract %216[0, 10, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%564 = vector.insert %563, %562 [0, 0, 13, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%565 = vector.extract %216[0, 10, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%566 = vector.insert %565, %564 [0, 0, 14, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%567 = vector.extract %216[0, 10, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%568 = vector.insert %567, %566 [0, 0, 15, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%569 = vector.extract %216[0, 11, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%570 = vector.insert %569, %568 [0, 0, 0, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%571 = vector.extract %216[0, 11, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%572 = vector.insert %571, %570 [0, 0, 1, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%573 = vector.extract %216[0, 11, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%574 = vector.insert %573, %572 [0, 0, 2, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%575 = vector.extract %216[0, 11, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%576 = vector.insert %575, %574 [0, 0, 3, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%577 = vector.extract %216[0, 11, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%578 = vector.insert %577, %576 [0, 0, 4, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%579 = vector.extract %216[0, 11, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%580 = vector.insert %579, %578 [0, 0, 5, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%581 = vector.extract %216[0, 11, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%582 = vector.insert %581, %580 [0, 0, 6, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%583 = vector.extract %216[0, 11, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%584 = vector.insert %583, %582 [0, 0, 7, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%585 = vector.extract %216[0, 11, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%586 = vector.insert %585, %584 [0, 0, 8, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%587 = vector.extract %216[0, 11, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%588 = vector.insert %587, %586 [0, 0, 9, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%589 = vector.extract %216[0, 11, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%590 = vector.insert %589, %588 [0, 0, 10, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%591 = vector.extract %216[0, 11, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%592 = vector.insert %591, %590 [0, 0, 11, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%593 = vector.extract %216[0, 11, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%594 = vector.insert %593, %592 [0, 0, 12, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%595 = vector.extract %216[0, 11, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%596 = vector.insert %595, %594 [0, 0, 13, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%597 = vector.extract %216[0, 11, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%598 = vector.insert %597, %596 [0, 0, 14, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%599 = vector.extract %216[0, 11, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%600 = vector.insert %599, %598 [0, 0, 15, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%601 = vector.extract %216[0, 12, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%602 = vector.insert %601, %600 [0, 0, 0, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%603 = vector.extract %216[0, 12, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%604 = vector.insert %603, %602 [0, 0, 1, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%605 = vector.extract %216[0, 12, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%606 = vector.insert %605, %604 [0, 0, 2, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%607 = vector.extract %216[0, 12, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%608 = vector.insert %607, %606 [0, 0, 3, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%609 = vector.extract %216[0, 12, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%610 = vector.insert %609, %608 [0, 0, 4, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%611 = vector.extract %216[0, 12, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%612 = vector.insert %611, %610 [0, 0, 5, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%613 = vector.extract %216[0, 12, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%614 = vector.insert %613, %612 [0, 0, 6, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%615 = vector.extract %216[0, 12, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%616 = vector.insert %615, %614 [0, 0, 7, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%617 = vector.extract %216[0, 12, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%618 = vector.insert %617, %616 [0, 0, 8, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%619 = vector.extract %216[0, 12, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%620 = vector.insert %619, %618 [0, 0, 9, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%621 = vector.extract %216[0, 12, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%622 = vector.insert %621, %620 [0, 0, 10, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%623 = vector.extract %216[0, 12, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%624 = vector.insert %623, %622 [0, 0, 11, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%625 = vector.extract %216[0, 12, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%626 = vector.insert %625, %624 [0, 0, 12, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%627 = vector.extract %216[0, 12, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%628 = vector.insert %627, %626 [0, 0, 13, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%629 = vector.extract %216[0, 12, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%630 = vector.insert %629, %628 [0, 0, 14, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%631 = vector.extract %216[0, 12, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%632 = vector.insert %631, %630 [0, 0, 15, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%633 = vector.extract %216[0, 13, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%634 = vector.insert %633, %632 [0, 0, 0, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%635 = vector.extract %216[0, 13, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%636 = vector.insert %635, %634 [0, 0, 1, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%637 = vector.extract %216[0, 13, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%638 = vector.insert %637, %636 [0, 0, 2, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%639 = vector.extract %216[0, 13, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%640 = vector.insert %639, %638 [0, 0, 3, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%641 = vector.extract %216[0, 13, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%642 = vector.insert %641, %640 [0, 0, 4, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%643 = vector.extract %216[0, 13, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%644 = vector.insert %643, %642 [0, 0, 5, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%645 = vector.extract %216[0, 13, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%646 = vector.insert %645, %644 [0, 0, 6, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%647 = vector.extract %216[0, 13, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%648 = vector.insert %647, %646 [0, 0, 7, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%649 = vector.extract %216[0, 13, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%650 = vector.insert %649, %648 [0, 0, 8, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%651 = vector.extract %216[0, 13, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%652 = vector.insert %651, %650 [0, 0, 9, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%653 = vector.extract %216[0, 13, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%654 = vector.insert %653, %652 [0, 0, 10, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%655 = vector.extract %216[0, 13, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%656 = vector.insert %655, %654 [0, 0, 11, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%657 = vector.extract %216[0, 13, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%658 = vector.insert %657, %656 [0, 0, 12, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%659 = vector.extract %216[0, 13, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%660 = vector.insert %659, %658 [0, 0, 13, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%661 = vector.extract %216[0, 13, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%662 = vector.insert %661, %660 [0, 0, 14, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%663 = vector.extract %216[0, 13, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%664 = vector.insert %663, %662 [0, 0, 15, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%665 = vector.extract %216[0, 14, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%666 = vector.insert %665, %664 [0, 0, 0, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%667 = vector.extract %216[0, 14, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%668 = vector.insert %667, %666 [0, 0, 1, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%669 = vector.extract %216[0, 14, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%670 = vector.insert %669, %668 [0, 0, 2, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%671 = vector.extract %216[0, 14, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%672 = vector.insert %671, %670 [0, 0, 3, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%673 = vector.extract %216[0, 14, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%674 = vector.insert %673, %672 [0, 0, 4, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%675 = vector.extract %216[0, 14, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%676 = vector.insert %675, %674 [0, 0, 5, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%677 = vector.extract %216[0, 14, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%678 = vector.insert %677, %676 [0, 0, 6, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%679 = vector.extract %216[0, 14, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%680 = vector.insert %679, %678 [0, 0, 7, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%681 = vector.extract %216[0, 14, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%682 = vector.insert %681, %680 [0, 0, 8, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%683 = vector.extract %216[0, 14, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%684 = vector.insert %683, %682 [0, 0, 9, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%685 = vector.extract %216[0, 14, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%686 = vector.insert %685, %684 [0, 0, 10, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%687 = vector.extract %216[0, 14, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%688 = vector.insert %687, %686 [0, 0, 11, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%689 = vector.extract %216[0, 14, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%690 = vector.insert %689, %688 [0, 0, 12, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%691 = vector.extract %216[0, 14, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%692 = vector.insert %691, %690 [0, 0, 13, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%693 = vector.extract %216[0, 14, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%694 = vector.insert %693, %692 [0, 0, 14, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%695 = vector.extract %216[0, 14, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%696 = vector.insert %695, %694 [0, 0, 15, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%697 = vector.extract %216[0, 15, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%698 = vector.insert %697, %696 [0, 0, 0, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%699 = vector.extract %216[0, 15, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%700 = vector.insert %699, %698 [0, 0, 1, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%701 = vector.extract %216[0, 15, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%702 = vector.insert %701, %700 [0, 0, 2, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%703 = vector.extract %216[0, 15, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%704 = vector.insert %703, %702 [0, 0, 3, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%705 = vector.extract %216[0, 15, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%706 = vector.insert %705, %704 [0, 0, 4, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%707 = vector.extract %216[0, 15, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%708 = vector.insert %707, %706 [0, 0, 5, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%709 = vector.extract %216[0, 15, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%710 = vector.insert %709, %708 [0, 0, 6, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%711 = vector.extract %216[0, 15, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%712 = vector.insert %711, %710 [0, 0, 7, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%713 = vector.extract %216[0, 15, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%714 = vector.insert %713, %712 [0, 0, 8, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%715 = vector.extract %216[0, 15, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%716 = vector.insert %715, %714 [0, 0, 9, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%717 = vector.extract %216[0, 15, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%718 = vector.insert %717, %716 [0, 0, 10, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%719 = vector.extract %216[0, 15, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%720 = vector.insert %719, %718 [0, 0, 11, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%721 = vector.extract %216[0, 15, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%722 = vector.insert %721, %720 [0, 0, 12, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%723 = vector.extract %216[0, 15, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%724 = vector.insert %723, %722 [0, 0, 13, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%725 = vector.extract %216[0, 15, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%726 = vector.insert %725, %724 [0, 0, 14, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%727 = vector.extract %216[0, 15, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%728 = vector.insert %727, %726 [0, 0, 15, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%729 = vector.extract %216[1, 0, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%730 = vector.insert %729, %728 [0, 1, 0, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%731 = vector.extract %216[1, 0, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%732 = vector.insert %731, %730 [0, 1, 1, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%733 = vector.extract %216[1, 0, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%734 = vector.insert %733, %732 [0, 1, 2, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%735 = vector.extract %216[1, 0, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%736 = vector.insert %735, %734 [0, 1, 3, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%737 = vector.extract %216[1, 0, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%738 = vector.insert %737, %736 [0, 1, 4, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%739 = vector.extract %216[1, 0, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%740 = vector.insert %739, %738 [0, 1, 5, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%741 = vector.extract %216[1, 0, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%742 = vector.insert %741, %740 [0, 1, 6, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%743 = vector.extract %216[1, 0, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%744 = vector.insert %743, %742 [0, 1, 7, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%745 = vector.extract %216[1, 0, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%746 = vector.insert %745, %744 [0, 1, 8, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%747 = vector.extract %216[1, 0, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%748 = vector.insert %747, %746 [0, 1, 9, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%749 = vector.extract %216[1, 0, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%750 = vector.insert %749, %748 [0, 1, 10, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%751 = vector.extract %216[1, 0, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%752 = vector.insert %751, %750 [0, 1, 11, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%753 = vector.extract %216[1, 0, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%754 = vector.insert %753, %752 [0, 1, 12, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%755 = vector.extract %216[1, 0, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%756 = vector.insert %755, %754 [0, 1, 13, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%757 = vector.extract %216[1, 0, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%758 = vector.insert %757, %756 [0, 1, 14, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%759 = vector.extract %216[1, 0, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%760 = vector.insert %759, %758 [0, 1, 15, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%761 = vector.extract %216[1, 1, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%762 = vector.insert %761, %760 [0, 1, 0, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%763 = vector.extract %216[1, 1, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%764 = vector.insert %763, %762 [0, 1, 1, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%765 = vector.extract %216[1, 1, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%766 = vector.insert %765, %764 [0, 1, 2, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%767 = vector.extract %216[1, 1, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%768 = vector.insert %767, %766 [0, 1, 3, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%769 = vector.extract %216[1, 1, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%770 = vector.insert %769, %768 [0, 1, 4, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%771 = vector.extract %216[1, 1, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%772 = vector.insert %771, %770 [0, 1, 5, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%773 = vector.extract %216[1, 1, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%774 = vector.insert %773, %772 [0, 1, 6, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%775 = vector.extract %216[1, 1, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%776 = vector.insert %775, %774 [0, 1, 7, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%777 = vector.extract %216[1, 1, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%778 = vector.insert %777, %776 [0, 1, 8, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%779 = vector.extract %216[1, 1, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%780 = vector.insert %779, %778 [0, 1, 9, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%781 = vector.extract %216[1, 1, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%782 = vector.insert %781, %780 [0, 1, 10, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%783 = vector.extract %216[1, 1, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%784 = vector.insert %783, %782 [0, 1, 11, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%785 = vector.extract %216[1, 1, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%786 = vector.insert %785, %784 [0, 1, 12, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%787 = vector.extract %216[1, 1, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%788 = vector.insert %787, %786 [0, 1, 13, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%789 = vector.extract %216[1, 1, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%790 = vector.insert %789, %788 [0, 1, 14, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%791 = vector.extract %216[1, 1, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%792 = vector.insert %791, %790 [0, 1, 15, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%793 = vector.extract %216[1, 2, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%794 = vector.insert %793, %792 [0, 1, 0, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%795 = vector.extract %216[1, 2, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%796 = vector.insert %795, %794 [0, 1, 1, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%797 = vector.extract %216[1, 2, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%798 = vector.insert %797, %796 [0, 1, 2, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%799 = vector.extract %216[1, 2, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%800 = vector.insert %799, %798 [0, 1, 3, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%801 = vector.extract %216[1, 2, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%802 = vector.insert %801, %800 [0, 1, 4, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%803 = vector.extract %216[1, 2, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%804 = vector.insert %803, %802 [0, 1, 5, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%805 = vector.extract %216[1, 2, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%806 = vector.insert %805, %804 [0, 1, 6, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%807 = vector.extract %216[1, 2, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%808 = vector.insert %807, %806 [0, 1, 7, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%809 = vector.extract %216[1, 2, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%810 = vector.insert %809, %808 [0, 1, 8, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%811 = vector.extract %216[1, 2, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%812 = vector.insert %811, %810 [0, 1, 9, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%813 = vector.extract %216[1, 2, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%814 = vector.insert %813, %812 [0, 1, 10, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%815 = vector.extract %216[1, 2, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%816 = vector.insert %815, %814 [0, 1, 11, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%817 = vector.extract %216[1, 2, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%818 = vector.insert %817, %816 [0, 1, 12, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%819 = vector.extract %216[1, 2, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%820 = vector.insert %819, %818 [0, 1, 13, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%821 = vector.extract %216[1, 2, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%822 = vector.insert %821, %820 [0, 1, 14, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%823 = vector.extract %216[1, 2, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%824 = vector.insert %823, %822 [0, 1, 15, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%825 = vector.extract %216[1, 3, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%826 = vector.insert %825, %824 [0, 1, 0, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%827 = vector.extract %216[1, 3, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%828 = vector.insert %827, %826 [0, 1, 1, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%829 = vector.extract %216[1, 3, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%830 = vector.insert %829, %828 [0, 1, 2, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%831 = vector.extract %216[1, 3, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%832 = vector.insert %831, %830 [0, 1, 3, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%833 = vector.extract %216[1, 3, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%834 = vector.insert %833, %832 [0, 1, 4, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%835 = vector.extract %216[1, 3, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%836 = vector.insert %835, %834 [0, 1, 5, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%837 = vector.extract %216[1, 3, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%838 = vector.insert %837, %836 [0, 1, 6, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%839 = vector.extract %216[1, 3, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%840 = vector.insert %839, %838 [0, 1, 7, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%841 = vector.extract %216[1, 3, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%842 = vector.insert %841, %840 [0, 1, 8, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%843 = vector.extract %216[1, 3, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%844 = vector.insert %843, %842 [0, 1, 9, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%845 = vector.extract %216[1, 3, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%846 = vector.insert %845, %844 [0, 1, 10, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%847 = vector.extract %216[1, 3, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%848 = vector.insert %847, %846 [0, 1, 11, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%849 = vector.extract %216[1, 3, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%850 = vector.insert %849, %848 [0, 1, 12, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%851 = vector.extract %216[1, 3, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%852 = vector.insert %851, %850 [0, 1, 13, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%853 = vector.extract %216[1, 3, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%854 = vector.insert %853, %852 [0, 1, 14, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%855 = vector.extract %216[1, 3, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%856 = vector.insert %855, %854 [0, 1, 15, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%857 = vector.extract %216[1, 4, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%858 = vector.insert %857, %856 [0, 1, 0, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%859 = vector.extract %216[1, 4, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%860 = vector.insert %859, %858 [0, 1, 1, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%861 = vector.extract %216[1, 4, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%862 = vector.insert %861, %860 [0, 1, 2, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%863 = vector.extract %216[1, 4, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%864 = vector.insert %863, %862 [0, 1, 3, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%865 = vector.extract %216[1, 4, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%866 = vector.insert %865, %864 [0, 1, 4, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%867 = vector.extract %216[1, 4, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%868 = vector.insert %867, %866 [0, 1, 5, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%869 = vector.extract %216[1, 4, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%870 = vector.insert %869, %868 [0, 1, 6, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%871 = vector.extract %216[1, 4, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%872 = vector.insert %871, %870 [0, 1, 7, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%873 = vector.extract %216[1, 4, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%874 = vector.insert %873, %872 [0, 1, 8, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%875 = vector.extract %216[1, 4, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%876 = vector.insert %875, %874 [0, 1, 9, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%877 = vector.extract %216[1, 4, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%878 = vector.insert %877, %876 [0, 1, 10, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%879 = vector.extract %216[1, 4, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%880 = vector.insert %879, %878 [0, 1, 11, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%881 = vector.extract %216[1, 4, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%882 = vector.insert %881, %880 [0, 1, 12, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%883 = vector.extract %216[1, 4, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%884 = vector.insert %883, %882 [0, 1, 13, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%885 = vector.extract %216[1, 4, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%886 = vector.insert %885, %884 [0, 1, 14, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%887 = vector.extract %216[1, 4, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%888 = vector.insert %887, %886 [0, 1, 15, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%889 = vector.extract %216[1, 5, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%890 = vector.insert %889, %888 [0, 1, 0, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%891 = vector.extract %216[1, 5, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%892 = vector.insert %891, %890 [0, 1, 1, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%893 = vector.extract %216[1, 5, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%894 = vector.insert %893, %892 [0, 1, 2, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%895 = vector.extract %216[1, 5, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%896 = vector.insert %895, %894 [0, 1, 3, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%897 = vector.extract %216[1, 5, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%898 = vector.insert %897, %896 [0, 1, 4, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%899 = vector.extract %216[1, 5, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%900 = vector.insert %899, %898 [0, 1, 5, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%901 = vector.extract %216[1, 5, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%902 = vector.insert %901, %900 [0, 1, 6, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%903 = vector.extract %216[1, 5, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%904 = vector.insert %903, %902 [0, 1, 7, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%905 = vector.extract %216[1, 5, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%906 = vector.insert %905, %904 [0, 1, 8, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%907 = vector.extract %216[1, 5, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%908 = vector.insert %907, %906 [0, 1, 9, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%909 = vector.extract %216[1, 5, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%910 = vector.insert %909, %908 [0, 1, 10, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%911 = vector.extract %216[1, 5, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%912 = vector.insert %911, %910 [0, 1, 11, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%913 = vector.extract %216[1, 5, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%914 = vector.insert %913, %912 [0, 1, 12, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%915 = vector.extract %216[1, 5, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%916 = vector.insert %915, %914 [0, 1, 13, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%917 = vector.extract %216[1, 5, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%918 = vector.insert %917, %916 [0, 1, 14, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%919 = vector.extract %216[1, 5, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%920 = vector.insert %919, %918 [0, 1, 15, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%921 = vector.extract %216[1, 6, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%922 = vector.insert %921, %920 [0, 1, 0, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%923 = vector.extract %216[1, 6, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%924 = vector.insert %923, %922 [0, 1, 1, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%925 = vector.extract %216[1, 6, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%926 = vector.insert %925, %924 [0, 1, 2, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%927 = vector.extract %216[1, 6, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%928 = vector.insert %927, %926 [0, 1, 3, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%929 = vector.extract %216[1, 6, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%930 = vector.insert %929, %928 [0, 1, 4, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%931 = vector.extract %216[1, 6, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%932 = vector.insert %931, %930 [0, 1, 5, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%933 = vector.extract %216[1, 6, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%934 = vector.insert %933, %932 [0, 1, 6, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%935 = vector.extract %216[1, 6, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%936 = vector.insert %935, %934 [0, 1, 7, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%937 = vector.extract %216[1, 6, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%938 = vector.insert %937, %936 [0, 1, 8, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%939 = vector.extract %216[1, 6, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%940 = vector.insert %939, %938 [0, 1, 9, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%941 = vector.extract %216[1, 6, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%942 = vector.insert %941, %940 [0, 1, 10, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%943 = vector.extract %216[1, 6, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%944 = vector.insert %943, %942 [0, 1, 11, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%945 = vector.extract %216[1, 6, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%946 = vector.insert %945, %944 [0, 1, 12, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%947 = vector.extract %216[1, 6, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%948 = vector.insert %947, %946 [0, 1, 13, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%949 = vector.extract %216[1, 6, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%950 = vector.insert %949, %948 [0, 1, 14, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%951 = vector.extract %216[1, 6, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%952 = vector.insert %951, %950 [0, 1, 15, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%953 = vector.extract %216[1, 7, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%954 = vector.insert %953, %952 [0, 1, 0, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%955 = vector.extract %216[1, 7, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%956 = vector.insert %955, %954 [0, 1, 1, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%957 = vector.extract %216[1, 7, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%958 = vector.insert %957, %956 [0, 1, 2, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%959 = vector.extract %216[1, 7, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%960 = vector.insert %959, %958 [0, 1, 3, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%961 = vector.extract %216[1, 7, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%962 = vector.insert %961, %960 [0, 1, 4, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%963 = vector.extract %216[1, 7, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%964 = vector.insert %963, %962 [0, 1, 5, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%965 = vector.extract %216[1, 7, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%966 = vector.insert %965, %964 [0, 1, 6, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%967 = vector.extract %216[1, 7, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%968 = vector.insert %967, %966 [0, 1, 7, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%969 = vector.extract %216[1, 7, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%970 = vector.insert %969, %968 [0, 1, 8, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%971 = vector.extract %216[1, 7, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%972 = vector.insert %971, %970 [0, 1, 9, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%973 = vector.extract %216[1, 7, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%974 = vector.insert %973, %972 [0, 1, 10, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%975 = vector.extract %216[1, 7, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%976 = vector.insert %975, %974 [0, 1, 11, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%977 = vector.extract %216[1, 7, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%978 = vector.insert %977, %976 [0, 1, 12, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%979 = vector.extract %216[1, 7, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%980 = vector.insert %979, %978 [0, 1, 13, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%981 = vector.extract %216[1, 7, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%982 = vector.insert %981, %980 [0, 1, 14, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%983 = vector.extract %216[1, 7, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%984 = vector.insert %983, %982 [0, 1, 15, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%985 = vector.extract %216[1, 8, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%986 = vector.insert %985, %984 [0, 1, 0, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%987 = vector.extract %216[1, 8, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%988 = vector.insert %987, %986 [0, 1, 1, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%989 = vector.extract %216[1, 8, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%990 = vector.insert %989, %988 [0, 1, 2, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%991 = vector.extract %216[1, 8, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%992 = vector.insert %991, %990 [0, 1, 3, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%993 = vector.extract %216[1, 8, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%994 = vector.insert %993, %992 [0, 1, 4, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%995 = vector.extract %216[1, 8, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%996 = vector.insert %995, %994 [0, 1, 5, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%997 = vector.extract %216[1, 8, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%998 = vector.insert %997, %996 [0, 1, 6, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%999 = vector.extract %216[1, 8, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1000 = vector.insert %999, %998 [0, 1, 7, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1001 = vector.extract %216[1, 8, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1002 = vector.insert %1001, %1000 [0, 1, 8, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1003 = vector.extract %216[1, 8, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1004 = vector.insert %1003, %1002 [0, 1, 9, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1005 = vector.extract %216[1, 8, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1006 = vector.insert %1005, %1004 [0, 1, 10, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1007 = vector.extract %216[1, 8, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1008 = vector.insert %1007, %1006 [0, 1, 11, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1009 = vector.extract %216[1, 8, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1010 = vector.insert %1009, %1008 [0, 1, 12, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1011 = vector.extract %216[1, 8, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1012 = vector.insert %1011, %1010 [0, 1, 13, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1013 = vector.extract %216[1, 8, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1014 = vector.insert %1013, %1012 [0, 1, 14, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1015 = vector.extract %216[1, 8, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1016 = vector.insert %1015, %1014 [0, 1, 15, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1017 = vector.extract %216[1, 9, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1018 = vector.insert %1017, %1016 [0, 1, 0, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1019 = vector.extract %216[1, 9, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1020 = vector.insert %1019, %1018 [0, 1, 1, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1021 = vector.extract %216[1, 9, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1022 = vector.insert %1021, %1020 [0, 1, 2, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1023 = vector.extract %216[1, 9, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1024 = vector.insert %1023, %1022 [0, 1, 3, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1025 = vector.extract %216[1, 9, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1026 = vector.insert %1025, %1024 [0, 1, 4, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1027 = vector.extract %216[1, 9, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1028 = vector.insert %1027, %1026 [0, 1, 5, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1029 = vector.extract %216[1, 9, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1030 = vector.insert %1029, %1028 [0, 1, 6, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1031 = vector.extract %216[1, 9, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1032 = vector.insert %1031, %1030 [0, 1, 7, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1033 = vector.extract %216[1, 9, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1034 = vector.insert %1033, %1032 [0, 1, 8, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1035 = vector.extract %216[1, 9, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1036 = vector.insert %1035, %1034 [0, 1, 9, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1037 = vector.extract %216[1, 9, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1038 = vector.insert %1037, %1036 [0, 1, 10, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1039 = vector.extract %216[1, 9, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1040 = vector.insert %1039, %1038 [0, 1, 11, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1041 = vector.extract %216[1, 9, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1042 = vector.insert %1041, %1040 [0, 1, 12, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1043 = vector.extract %216[1, 9, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1044 = vector.insert %1043, %1042 [0, 1, 13, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1045 = vector.extract %216[1, 9, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1046 = vector.insert %1045, %1044 [0, 1, 14, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1047 = vector.extract %216[1, 9, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1048 = vector.insert %1047, %1046 [0, 1, 15, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1049 = vector.extract %216[1, 10, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1050 = vector.insert %1049, %1048 [0, 1, 0, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1051 = vector.extract %216[1, 10, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1052 = vector.insert %1051, %1050 [0, 1, 1, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1053 = vector.extract %216[1, 10, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1054 = vector.insert %1053, %1052 [0, 1, 2, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1055 = vector.extract %216[1, 10, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1056 = vector.insert %1055, %1054 [0, 1, 3, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1057 = vector.extract %216[1, 10, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1058 = vector.insert %1057, %1056 [0, 1, 4, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1059 = vector.extract %216[1, 10, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1060 = vector.insert %1059, %1058 [0, 1, 5, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1061 = vector.extract %216[1, 10, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1062 = vector.insert %1061, %1060 [0, 1, 6, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1063 = vector.extract %216[1, 10, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1064 = vector.insert %1063, %1062 [0, 1, 7, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1065 = vector.extract %216[1, 10, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1066 = vector.insert %1065, %1064 [0, 1, 8, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1067 = vector.extract %216[1, 10, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1068 = vector.insert %1067, %1066 [0, 1, 9, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1069 = vector.extract %216[1, 10, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1070 = vector.insert %1069, %1068 [0, 1, 10, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1071 = vector.extract %216[1, 10, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1072 = vector.insert %1071, %1070 [0, 1, 11, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1073 = vector.extract %216[1, 10, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1074 = vector.insert %1073, %1072 [0, 1, 12, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1075 = vector.extract %216[1, 10, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1076 = vector.insert %1075, %1074 [0, 1, 13, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1077 = vector.extract %216[1, 10, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1078 = vector.insert %1077, %1076 [0, 1, 14, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1079 = vector.extract %216[1, 10, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1080 = vector.insert %1079, %1078 [0, 1, 15, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1081 = vector.extract %216[1, 11, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1082 = vector.insert %1081, %1080 [0, 1, 0, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1083 = vector.extract %216[1, 11, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1084 = vector.insert %1083, %1082 [0, 1, 1, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1085 = vector.extract %216[1, 11, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1086 = vector.insert %1085, %1084 [0, 1, 2, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1087 = vector.extract %216[1, 11, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1088 = vector.insert %1087, %1086 [0, 1, 3, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1089 = vector.extract %216[1, 11, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1090 = vector.insert %1089, %1088 [0, 1, 4, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1091 = vector.extract %216[1, 11, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1092 = vector.insert %1091, %1090 [0, 1, 5, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1093 = vector.extract %216[1, 11, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1094 = vector.insert %1093, %1092 [0, 1, 6, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1095 = vector.extract %216[1, 11, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1096 = vector.insert %1095, %1094 [0, 1, 7, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1097 = vector.extract %216[1, 11, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1098 = vector.insert %1097, %1096 [0, 1, 8, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1099 = vector.extract %216[1, 11, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1100 = vector.insert %1099, %1098 [0, 1, 9, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1101 = vector.extract %216[1, 11, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1102 = vector.insert %1101, %1100 [0, 1, 10, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1103 = vector.extract %216[1, 11, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1104 = vector.insert %1103, %1102 [0, 1, 11, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1105 = vector.extract %216[1, 11, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1106 = vector.insert %1105, %1104 [0, 1, 12, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1107 = vector.extract %216[1, 11, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1108 = vector.insert %1107, %1106 [0, 1, 13, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1109 = vector.extract %216[1, 11, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1110 = vector.insert %1109, %1108 [0, 1, 14, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1111 = vector.extract %216[1, 11, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1112 = vector.insert %1111, %1110 [0, 1, 15, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1113 = vector.extract %216[1, 12, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1114 = vector.insert %1113, %1112 [0, 1, 0, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1115 = vector.extract %216[1, 12, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1116 = vector.insert %1115, %1114 [0, 1, 1, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1117 = vector.extract %216[1, 12, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1118 = vector.insert %1117, %1116 [0, 1, 2, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1119 = vector.extract %216[1, 12, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1120 = vector.insert %1119, %1118 [0, 1, 3, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1121 = vector.extract %216[1, 12, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1122 = vector.insert %1121, %1120 [0, 1, 4, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1123 = vector.extract %216[1, 12, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1124 = vector.insert %1123, %1122 [0, 1, 5, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1125 = vector.extract %216[1, 12, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1126 = vector.insert %1125, %1124 [0, 1, 6, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1127 = vector.extract %216[1, 12, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1128 = vector.insert %1127, %1126 [0, 1, 7, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1129 = vector.extract %216[1, 12, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1130 = vector.insert %1129, %1128 [0, 1, 8, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1131 = vector.extract %216[1, 12, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1132 = vector.insert %1131, %1130 [0, 1, 9, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1133 = vector.extract %216[1, 12, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1134 = vector.insert %1133, %1132 [0, 1, 10, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1135 = vector.extract %216[1, 12, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1136 = vector.insert %1135, %1134 [0, 1, 11, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1137 = vector.extract %216[1, 12, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1138 = vector.insert %1137, %1136 [0, 1, 12, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1139 = vector.extract %216[1, 12, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1140 = vector.insert %1139, %1138 [0, 1, 13, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1141 = vector.extract %216[1, 12, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1142 = vector.insert %1141, %1140 [0, 1, 14, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1143 = vector.extract %216[1, 12, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1144 = vector.insert %1143, %1142 [0, 1, 15, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1145 = vector.extract %216[1, 13, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1146 = vector.insert %1145, %1144 [0, 1, 0, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1147 = vector.extract %216[1, 13, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1148 = vector.insert %1147, %1146 [0, 1, 1, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1149 = vector.extract %216[1, 13, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1150 = vector.insert %1149, %1148 [0, 1, 2, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1151 = vector.extract %216[1, 13, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1152 = vector.insert %1151, %1150 [0, 1, 3, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1153 = vector.extract %216[1, 13, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1154 = vector.insert %1153, %1152 [0, 1, 4, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1155 = vector.extract %216[1, 13, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1156 = vector.insert %1155, %1154 [0, 1, 5, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1157 = vector.extract %216[1, 13, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1158 = vector.insert %1157, %1156 [0, 1, 6, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1159 = vector.extract %216[1, 13, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1160 = vector.insert %1159, %1158 [0, 1, 7, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1161 = vector.extract %216[1, 13, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1162 = vector.insert %1161, %1160 [0, 1, 8, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1163 = vector.extract %216[1, 13, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1164 = vector.insert %1163, %1162 [0, 1, 9, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1165 = vector.extract %216[1, 13, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1166 = vector.insert %1165, %1164 [0, 1, 10, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1167 = vector.extract %216[1, 13, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1168 = vector.insert %1167, %1166 [0, 1, 11, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1169 = vector.extract %216[1, 13, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1170 = vector.insert %1169, %1168 [0, 1, 12, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1171 = vector.extract %216[1, 13, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1172 = vector.insert %1171, %1170 [0, 1, 13, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1173 = vector.extract %216[1, 13, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1174 = vector.insert %1173, %1172 [0, 1, 14, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1175 = vector.extract %216[1, 13, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1176 = vector.insert %1175, %1174 [0, 1, 15, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1177 = vector.extract %216[1, 14, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1178 = vector.insert %1177, %1176 [0, 1, 0, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1179 = vector.extract %216[1, 14, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1180 = vector.insert %1179, %1178 [0, 1, 1, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1181 = vector.extract %216[1, 14, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1182 = vector.insert %1181, %1180 [0, 1, 2, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1183 = vector.extract %216[1, 14, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1184 = vector.insert %1183, %1182 [0, 1, 3, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1185 = vector.extract %216[1, 14, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1186 = vector.insert %1185, %1184 [0, 1, 4, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1187 = vector.extract %216[1, 14, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1188 = vector.insert %1187, %1186 [0, 1, 5, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1189 = vector.extract %216[1, 14, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1190 = vector.insert %1189, %1188 [0, 1, 6, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1191 = vector.extract %216[1, 14, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1192 = vector.insert %1191, %1190 [0, 1, 7, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1193 = vector.extract %216[1, 14, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1194 = vector.insert %1193, %1192 [0, 1, 8, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1195 = vector.extract %216[1, 14, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1196 = vector.insert %1195, %1194 [0, 1, 9, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1197 = vector.extract %216[1, 14, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1198 = vector.insert %1197, %1196 [0, 1, 10, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1199 = vector.extract %216[1, 14, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1200 = vector.insert %1199, %1198 [0, 1, 11, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1201 = vector.extract %216[1, 14, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1202 = vector.insert %1201, %1200 [0, 1, 12, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1203 = vector.extract %216[1, 14, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1204 = vector.insert %1203, %1202 [0, 1, 13, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1205 = vector.extract %216[1, 14, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1206 = vector.insert %1205, %1204 [0, 1, 14, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1207 = vector.extract %216[1, 14, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1208 = vector.insert %1207, %1206 [0, 1, 15, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1209 = vector.extract %216[1, 15, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1210 = vector.insert %1209, %1208 [0, 1, 0, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1211 = vector.extract %216[1, 15, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1212 = vector.insert %1211, %1210 [0, 1, 1, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1213 = vector.extract %216[1, 15, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1214 = vector.insert %1213, %1212 [0, 1, 2, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1215 = vector.extract %216[1, 15, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1216 = vector.insert %1215, %1214 [0, 1, 3, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1217 = vector.extract %216[1, 15, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1218 = vector.insert %1217, %1216 [0, 1, 4, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1219 = vector.extract %216[1, 15, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1220 = vector.insert %1219, %1218 [0, 1, 5, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1221 = vector.extract %216[1, 15, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1222 = vector.insert %1221, %1220 [0, 1, 6, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1223 = vector.extract %216[1, 15, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1224 = vector.insert %1223, %1222 [0, 1, 7, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1225 = vector.extract %216[1, 15, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1226 = vector.insert %1225, %1224 [0, 1, 8, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1227 = vector.extract %216[1, 15, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1228 = vector.insert %1227, %1226 [0, 1, 9, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1229 = vector.extract %216[1, 15, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1230 = vector.insert %1229, %1228 [0, 1, 10, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1231 = vector.extract %216[1, 15, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1232 = vector.insert %1231, %1230 [0, 1, 11, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1233 = vector.extract %216[1, 15, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1234 = vector.insert %1233, %1232 [0, 1, 12, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1235 = vector.extract %216[1, 15, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1236 = vector.insert %1235, %1234 [0, 1, 13, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1237 = vector.extract %216[1, 15, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1238 = vector.insert %1237, %1236 [0, 1, 14, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1239 = vector.extract %216[1, 15, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1240 = vector.insert %1239, %1238 [0, 1, 15, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1241 = vector.extract %216[2, 0, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1242 = vector.insert %1241, %1240 [0, 2, 0, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1243 = vector.extract %216[2, 0, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1244 = vector.insert %1243, %1242 [0, 2, 1, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1245 = vector.extract %216[2, 0, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1246 = vector.insert %1245, %1244 [0, 2, 2, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1247 = vector.extract %216[2, 0, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1248 = vector.insert %1247, %1246 [0, 2, 3, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1249 = vector.extract %216[2, 0, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1250 = vector.insert %1249, %1248 [0, 2, 4, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1251 = vector.extract %216[2, 0, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1252 = vector.insert %1251, %1250 [0, 2, 5, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1253 = vector.extract %216[2, 0, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1254 = vector.insert %1253, %1252 [0, 2, 6, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1255 = vector.extract %216[2, 0, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1256 = vector.insert %1255, %1254 [0, 2, 7, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1257 = vector.extract %216[2, 0, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1258 = vector.insert %1257, %1256 [0, 2, 8, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1259 = vector.extract %216[2, 0, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1260 = vector.insert %1259, %1258 [0, 2, 9, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1261 = vector.extract %216[2, 0, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1262 = vector.insert %1261, %1260 [0, 2, 10, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1263 = vector.extract %216[2, 0, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1264 = vector.insert %1263, %1262 [0, 2, 11, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1265 = vector.extract %216[2, 0, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1266 = vector.insert %1265, %1264 [0, 2, 12, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1267 = vector.extract %216[2, 0, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1268 = vector.insert %1267, %1266 [0, 2, 13, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1269 = vector.extract %216[2, 0, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1270 = vector.insert %1269, %1268 [0, 2, 14, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1271 = vector.extract %216[2, 0, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1272 = vector.insert %1271, %1270 [0, 2, 15, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1273 = vector.extract %216[2, 1, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1274 = vector.insert %1273, %1272 [0, 2, 0, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1275 = vector.extract %216[2, 1, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1276 = vector.insert %1275, %1274 [0, 2, 1, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1277 = vector.extract %216[2, 1, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1278 = vector.insert %1277, %1276 [0, 2, 2, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1279 = vector.extract %216[2, 1, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1280 = vector.insert %1279, %1278 [0, 2, 3, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1281 = vector.extract %216[2, 1, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1282 = vector.insert %1281, %1280 [0, 2, 4, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1283 = vector.extract %216[2, 1, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1284 = vector.insert %1283, %1282 [0, 2, 5, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1285 = vector.extract %216[2, 1, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1286 = vector.insert %1285, %1284 [0, 2, 6, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1287 = vector.extract %216[2, 1, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1288 = vector.insert %1287, %1286 [0, 2, 7, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1289 = vector.extract %216[2, 1, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1290 = vector.insert %1289, %1288 [0, 2, 8, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1291 = vector.extract %216[2, 1, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1292 = vector.insert %1291, %1290 [0, 2, 9, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1293 = vector.extract %216[2, 1, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1294 = vector.insert %1293, %1292 [0, 2, 10, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1295 = vector.extract %216[2, 1, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1296 = vector.insert %1295, %1294 [0, 2, 11, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1297 = vector.extract %216[2, 1, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1298 = vector.insert %1297, %1296 [0, 2, 12, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1299 = vector.extract %216[2, 1, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1300 = vector.insert %1299, %1298 [0, 2, 13, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1301 = vector.extract %216[2, 1, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1302 = vector.insert %1301, %1300 [0, 2, 14, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1303 = vector.extract %216[2, 1, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1304 = vector.insert %1303, %1302 [0, 2, 15, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1305 = vector.extract %216[2, 2, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1306 = vector.insert %1305, %1304 [0, 2, 0, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1307 = vector.extract %216[2, 2, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1308 = vector.insert %1307, %1306 [0, 2, 1, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1309 = vector.extract %216[2, 2, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1310 = vector.insert %1309, %1308 [0, 2, 2, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1311 = vector.extract %216[2, 2, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1312 = vector.insert %1311, %1310 [0, 2, 3, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1313 = vector.extract %216[2, 2, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1314 = vector.insert %1313, %1312 [0, 2, 4, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1315 = vector.extract %216[2, 2, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1316 = vector.insert %1315, %1314 [0, 2, 5, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1317 = vector.extract %216[2, 2, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1318 = vector.insert %1317, %1316 [0, 2, 6, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1319 = vector.extract %216[2, 2, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1320 = vector.insert %1319, %1318 [0, 2, 7, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1321 = vector.extract %216[2, 2, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1322 = vector.insert %1321, %1320 [0, 2, 8, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1323 = vector.extract %216[2, 2, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1324 = vector.insert %1323, %1322 [0, 2, 9, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1325 = vector.extract %216[2, 2, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1326 = vector.insert %1325, %1324 [0, 2, 10, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1327 = vector.extract %216[2, 2, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1328 = vector.insert %1327, %1326 [0, 2, 11, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1329 = vector.extract %216[2, 2, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1330 = vector.insert %1329, %1328 [0, 2, 12, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1331 = vector.extract %216[2, 2, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1332 = vector.insert %1331, %1330 [0, 2, 13, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1333 = vector.extract %216[2, 2, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1334 = vector.insert %1333, %1332 [0, 2, 14, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1335 = vector.extract %216[2, 2, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1336 = vector.insert %1335, %1334 [0, 2, 15, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1337 = vector.extract %216[2, 3, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1338 = vector.insert %1337, %1336 [0, 2, 0, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1339 = vector.extract %216[2, 3, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1340 = vector.insert %1339, %1338 [0, 2, 1, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1341 = vector.extract %216[2, 3, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1342 = vector.insert %1341, %1340 [0, 2, 2, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1343 = vector.extract %216[2, 3, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1344 = vector.insert %1343, %1342 [0, 2, 3, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1345 = vector.extract %216[2, 3, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1346 = vector.insert %1345, %1344 [0, 2, 4, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1347 = vector.extract %216[2, 3, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1348 = vector.insert %1347, %1346 [0, 2, 5, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1349 = vector.extract %216[2, 3, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1350 = vector.insert %1349, %1348 [0, 2, 6, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1351 = vector.extract %216[2, 3, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1352 = vector.insert %1351, %1350 [0, 2, 7, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1353 = vector.extract %216[2, 3, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1354 = vector.insert %1353, %1352 [0, 2, 8, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1355 = vector.extract %216[2, 3, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1356 = vector.insert %1355, %1354 [0, 2, 9, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1357 = vector.extract %216[2, 3, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1358 = vector.insert %1357, %1356 [0, 2, 10, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1359 = vector.extract %216[2, 3, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1360 = vector.insert %1359, %1358 [0, 2, 11, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1361 = vector.extract %216[2, 3, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1362 = vector.insert %1361, %1360 [0, 2, 12, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1363 = vector.extract %216[2, 3, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1364 = vector.insert %1363, %1362 [0, 2, 13, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1365 = vector.extract %216[2, 3, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1366 = vector.insert %1365, %1364 [0, 2, 14, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1367 = vector.extract %216[2, 3, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1368 = vector.insert %1367, %1366 [0, 2, 15, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1369 = vector.extract %216[2, 4, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1370 = vector.insert %1369, %1368 [0, 2, 0, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1371 = vector.extract %216[2, 4, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1372 = vector.insert %1371, %1370 [0, 2, 1, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1373 = vector.extract %216[2, 4, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1374 = vector.insert %1373, %1372 [0, 2, 2, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1375 = vector.extract %216[2, 4, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1376 = vector.insert %1375, %1374 [0, 2, 3, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1377 = vector.extract %216[2, 4, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1378 = vector.insert %1377, %1376 [0, 2, 4, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1379 = vector.extract %216[2, 4, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1380 = vector.insert %1379, %1378 [0, 2, 5, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1381 = vector.extract %216[2, 4, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1382 = vector.insert %1381, %1380 [0, 2, 6, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1383 = vector.extract %216[2, 4, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1384 = vector.insert %1383, %1382 [0, 2, 7, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1385 = vector.extract %216[2, 4, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1386 = vector.insert %1385, %1384 [0, 2, 8, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1387 = vector.extract %216[2, 4, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1388 = vector.insert %1387, %1386 [0, 2, 9, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1389 = vector.extract %216[2, 4, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1390 = vector.insert %1389, %1388 [0, 2, 10, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1391 = vector.extract %216[2, 4, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1392 = vector.insert %1391, %1390 [0, 2, 11, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1393 = vector.extract %216[2, 4, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1394 = vector.insert %1393, %1392 [0, 2, 12, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1395 = vector.extract %216[2, 4, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1396 = vector.insert %1395, %1394 [0, 2, 13, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1397 = vector.extract %216[2, 4, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1398 = vector.insert %1397, %1396 [0, 2, 14, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1399 = vector.extract %216[2, 4, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1400 = vector.insert %1399, %1398 [0, 2, 15, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1401 = vector.extract %216[2, 5, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1402 = vector.insert %1401, %1400 [0, 2, 0, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1403 = vector.extract %216[2, 5, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1404 = vector.insert %1403, %1402 [0, 2, 1, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1405 = vector.extract %216[2, 5, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1406 = vector.insert %1405, %1404 [0, 2, 2, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1407 = vector.extract %216[2, 5, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1408 = vector.insert %1407, %1406 [0, 2, 3, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1409 = vector.extract %216[2, 5, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1410 = vector.insert %1409, %1408 [0, 2, 4, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1411 = vector.extract %216[2, 5, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1412 = vector.insert %1411, %1410 [0, 2, 5, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1413 = vector.extract %216[2, 5, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1414 = vector.insert %1413, %1412 [0, 2, 6, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1415 = vector.extract %216[2, 5, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1416 = vector.insert %1415, %1414 [0, 2, 7, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1417 = vector.extract %216[2, 5, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1418 = vector.insert %1417, %1416 [0, 2, 8, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1419 = vector.extract %216[2, 5, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1420 = vector.insert %1419, %1418 [0, 2, 9, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1421 = vector.extract %216[2, 5, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1422 = vector.insert %1421, %1420 [0, 2, 10, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1423 = vector.extract %216[2, 5, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1424 = vector.insert %1423, %1422 [0, 2, 11, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1425 = vector.extract %216[2, 5, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1426 = vector.insert %1425, %1424 [0, 2, 12, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1427 = vector.extract %216[2, 5, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1428 = vector.insert %1427, %1426 [0, 2, 13, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1429 = vector.extract %216[2, 5, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1430 = vector.insert %1429, %1428 [0, 2, 14, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1431 = vector.extract %216[2, 5, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1432 = vector.insert %1431, %1430 [0, 2, 15, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1433 = vector.extract %216[2, 6, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1434 = vector.insert %1433, %1432 [0, 2, 0, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1435 = vector.extract %216[2, 6, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1436 = vector.insert %1435, %1434 [0, 2, 1, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1437 = vector.extract %216[2, 6, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1438 = vector.insert %1437, %1436 [0, 2, 2, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1439 = vector.extract %216[2, 6, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1440 = vector.insert %1439, %1438 [0, 2, 3, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1441 = vector.extract %216[2, 6, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1442 = vector.insert %1441, %1440 [0, 2, 4, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1443 = vector.extract %216[2, 6, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1444 = vector.insert %1443, %1442 [0, 2, 5, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1445 = vector.extract %216[2, 6, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1446 = vector.insert %1445, %1444 [0, 2, 6, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1447 = vector.extract %216[2, 6, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1448 = vector.insert %1447, %1446 [0, 2, 7, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1449 = vector.extract %216[2, 6, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1450 = vector.insert %1449, %1448 [0, 2, 8, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1451 = vector.extract %216[2, 6, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1452 = vector.insert %1451, %1450 [0, 2, 9, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1453 = vector.extract %216[2, 6, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1454 = vector.insert %1453, %1452 [0, 2, 10, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1455 = vector.extract %216[2, 6, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1456 = vector.insert %1455, %1454 [0, 2, 11, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1457 = vector.extract %216[2, 6, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1458 = vector.insert %1457, %1456 [0, 2, 12, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1459 = vector.extract %216[2, 6, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1460 = vector.insert %1459, %1458 [0, 2, 13, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1461 = vector.extract %216[2, 6, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1462 = vector.insert %1461, %1460 [0, 2, 14, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1463 = vector.extract %216[2, 6, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1464 = vector.insert %1463, %1462 [0, 2, 15, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1465 = vector.extract %216[2, 7, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1466 = vector.insert %1465, %1464 [0, 2, 0, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1467 = vector.extract %216[2, 7, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1468 = vector.insert %1467, %1466 [0, 2, 1, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1469 = vector.extract %216[2, 7, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1470 = vector.insert %1469, %1468 [0, 2, 2, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1471 = vector.extract %216[2, 7, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1472 = vector.insert %1471, %1470 [0, 2, 3, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1473 = vector.extract %216[2, 7, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1474 = vector.insert %1473, %1472 [0, 2, 4, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1475 = vector.extract %216[2, 7, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1476 = vector.insert %1475, %1474 [0, 2, 5, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1477 = vector.extract %216[2, 7, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1478 = vector.insert %1477, %1476 [0, 2, 6, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1479 = vector.extract %216[2, 7, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1480 = vector.insert %1479, %1478 [0, 2, 7, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1481 = vector.extract %216[2, 7, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1482 = vector.insert %1481, %1480 [0, 2, 8, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1483 = vector.extract %216[2, 7, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1484 = vector.insert %1483, %1482 [0, 2, 9, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1485 = vector.extract %216[2, 7, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1486 = vector.insert %1485, %1484 [0, 2, 10, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1487 = vector.extract %216[2, 7, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1488 = vector.insert %1487, %1486 [0, 2, 11, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1489 = vector.extract %216[2, 7, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1490 = vector.insert %1489, %1488 [0, 2, 12, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1491 = vector.extract %216[2, 7, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1492 = vector.insert %1491, %1490 [0, 2, 13, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1493 = vector.extract %216[2, 7, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1494 = vector.insert %1493, %1492 [0, 2, 14, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1495 = vector.extract %216[2, 7, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1496 = vector.insert %1495, %1494 [0, 2, 15, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1497 = vector.extract %216[2, 8, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1498 = vector.insert %1497, %1496 [0, 2, 0, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1499 = vector.extract %216[2, 8, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1500 = vector.insert %1499, %1498 [0, 2, 1, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1501 = vector.extract %216[2, 8, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1502 = vector.insert %1501, %1500 [0, 2, 2, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1503 = vector.extract %216[2, 8, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1504 = vector.insert %1503, %1502 [0, 2, 3, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1505 = vector.extract %216[2, 8, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1506 = vector.insert %1505, %1504 [0, 2, 4, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1507 = vector.extract %216[2, 8, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1508 = vector.insert %1507, %1506 [0, 2, 5, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1509 = vector.extract %216[2, 8, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1510 = vector.insert %1509, %1508 [0, 2, 6, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1511 = vector.extract %216[2, 8, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1512 = vector.insert %1511, %1510 [0, 2, 7, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1513 = vector.extract %216[2, 8, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1514 = vector.insert %1513, %1512 [0, 2, 8, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1515 = vector.extract %216[2, 8, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1516 = vector.insert %1515, %1514 [0, 2, 9, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1517 = vector.extract %216[2, 8, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1518 = vector.insert %1517, %1516 [0, 2, 10, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1519 = vector.extract %216[2, 8, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1520 = vector.insert %1519, %1518 [0, 2, 11, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1521 = vector.extract %216[2, 8, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1522 = vector.insert %1521, %1520 [0, 2, 12, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1523 = vector.extract %216[2, 8, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1524 = vector.insert %1523, %1522 [0, 2, 13, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1525 = vector.extract %216[2, 8, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1526 = vector.insert %1525, %1524 [0, 2, 14, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1527 = vector.extract %216[2, 8, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1528 = vector.insert %1527, %1526 [0, 2, 15, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1529 = vector.extract %216[2, 9, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1530 = vector.insert %1529, %1528 [0, 2, 0, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1531 = vector.extract %216[2, 9, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1532 = vector.insert %1531, %1530 [0, 2, 1, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1533 = vector.extract %216[2, 9, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1534 = vector.insert %1533, %1532 [0, 2, 2, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1535 = vector.extract %216[2, 9, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1536 = vector.insert %1535, %1534 [0, 2, 3, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1537 = vector.extract %216[2, 9, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1538 = vector.insert %1537, %1536 [0, 2, 4, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1539 = vector.extract %216[2, 9, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1540 = vector.insert %1539, %1538 [0, 2, 5, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1541 = vector.extract %216[2, 9, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1542 = vector.insert %1541, %1540 [0, 2, 6, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1543 = vector.extract %216[2, 9, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1544 = vector.insert %1543, %1542 [0, 2, 7, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1545 = vector.extract %216[2, 9, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1546 = vector.insert %1545, %1544 [0, 2, 8, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1547 = vector.extract %216[2, 9, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1548 = vector.insert %1547, %1546 [0, 2, 9, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1549 = vector.extract %216[2, 9, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1550 = vector.insert %1549, %1548 [0, 2, 10, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1551 = vector.extract %216[2, 9, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1552 = vector.insert %1551, %1550 [0, 2, 11, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1553 = vector.extract %216[2, 9, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1554 = vector.insert %1553, %1552 [0, 2, 12, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1555 = vector.extract %216[2, 9, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1556 = vector.insert %1555, %1554 [0, 2, 13, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1557 = vector.extract %216[2, 9, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1558 = vector.insert %1557, %1556 [0, 2, 14, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1559 = vector.extract %216[2, 9, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1560 = vector.insert %1559, %1558 [0, 2, 15, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1561 = vector.extract %216[2, 10, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1562 = vector.insert %1561, %1560 [0, 2, 0, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1563 = vector.extract %216[2, 10, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1564 = vector.insert %1563, %1562 [0, 2, 1, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1565 = vector.extract %216[2, 10, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1566 = vector.insert %1565, %1564 [0, 2, 2, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1567 = vector.extract %216[2, 10, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1568 = vector.insert %1567, %1566 [0, 2, 3, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1569 = vector.extract %216[2, 10, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1570 = vector.insert %1569, %1568 [0, 2, 4, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1571 = vector.extract %216[2, 10, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1572 = vector.insert %1571, %1570 [0, 2, 5, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1573 = vector.extract %216[2, 10, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1574 = vector.insert %1573, %1572 [0, 2, 6, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1575 = vector.extract %216[2, 10, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1576 = vector.insert %1575, %1574 [0, 2, 7, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1577 = vector.extract %216[2, 10, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1578 = vector.insert %1577, %1576 [0, 2, 8, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1579 = vector.extract %216[2, 10, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1580 = vector.insert %1579, %1578 [0, 2, 9, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1581 = vector.extract %216[2, 10, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1582 = vector.insert %1581, %1580 [0, 2, 10, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1583 = vector.extract %216[2, 10, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1584 = vector.insert %1583, %1582 [0, 2, 11, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1585 = vector.extract %216[2, 10, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1586 = vector.insert %1585, %1584 [0, 2, 12, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1587 = vector.extract %216[2, 10, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1588 = vector.insert %1587, %1586 [0, 2, 13, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1589 = vector.extract %216[2, 10, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1590 = vector.insert %1589, %1588 [0, 2, 14, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1591 = vector.extract %216[2, 10, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1592 = vector.insert %1591, %1590 [0, 2, 15, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1593 = vector.extract %216[2, 11, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1594 = vector.insert %1593, %1592 [0, 2, 0, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1595 = vector.extract %216[2, 11, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1596 = vector.insert %1595, %1594 [0, 2, 1, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1597 = vector.extract %216[2, 11, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1598 = vector.insert %1597, %1596 [0, 2, 2, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1599 = vector.extract %216[2, 11, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1600 = vector.insert %1599, %1598 [0, 2, 3, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1601 = vector.extract %216[2, 11, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1602 = vector.insert %1601, %1600 [0, 2, 4, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1603 = vector.extract %216[2, 11, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1604 = vector.insert %1603, %1602 [0, 2, 5, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1605 = vector.extract %216[2, 11, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1606 = vector.insert %1605, %1604 [0, 2, 6, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1607 = vector.extract %216[2, 11, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1608 = vector.insert %1607, %1606 [0, 2, 7, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1609 = vector.extract %216[2, 11, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1610 = vector.insert %1609, %1608 [0, 2, 8, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1611 = vector.extract %216[2, 11, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1612 = vector.insert %1611, %1610 [0, 2, 9, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1613 = vector.extract %216[2, 11, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1614 = vector.insert %1613, %1612 [0, 2, 10, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1615 = vector.extract %216[2, 11, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1616 = vector.insert %1615, %1614 [0, 2, 11, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1617 = vector.extract %216[2, 11, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1618 = vector.insert %1617, %1616 [0, 2, 12, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1619 = vector.extract %216[2, 11, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1620 = vector.insert %1619, %1618 [0, 2, 13, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1621 = vector.extract %216[2, 11, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1622 = vector.insert %1621, %1620 [0, 2, 14, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1623 = vector.extract %216[2, 11, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1624 = vector.insert %1623, %1622 [0, 2, 15, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1625 = vector.extract %216[2, 12, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1626 = vector.insert %1625, %1624 [0, 2, 0, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1627 = vector.extract %216[2, 12, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1628 = vector.insert %1627, %1626 [0, 2, 1, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1629 = vector.extract %216[2, 12, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1630 = vector.insert %1629, %1628 [0, 2, 2, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1631 = vector.extract %216[2, 12, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1632 = vector.insert %1631, %1630 [0, 2, 3, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1633 = vector.extract %216[2, 12, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1634 = vector.insert %1633, %1632 [0, 2, 4, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1635 = vector.extract %216[2, 12, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1636 = vector.insert %1635, %1634 [0, 2, 5, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1637 = vector.extract %216[2, 12, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1638 = vector.insert %1637, %1636 [0, 2, 6, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1639 = vector.extract %216[2, 12, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1640 = vector.insert %1639, %1638 [0, 2, 7, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1641 = vector.extract %216[2, 12, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1642 = vector.insert %1641, %1640 [0, 2, 8, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1643 = vector.extract %216[2, 12, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1644 = vector.insert %1643, %1642 [0, 2, 9, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1645 = vector.extract %216[2, 12, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1646 = vector.insert %1645, %1644 [0, 2, 10, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1647 = vector.extract %216[2, 12, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1648 = vector.insert %1647, %1646 [0, 2, 11, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1649 = vector.extract %216[2, 12, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1650 = vector.insert %1649, %1648 [0, 2, 12, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1651 = vector.extract %216[2, 12, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1652 = vector.insert %1651, %1650 [0, 2, 13, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1653 = vector.extract %216[2, 12, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1654 = vector.insert %1653, %1652 [0, 2, 14, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1655 = vector.extract %216[2, 12, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1656 = vector.insert %1655, %1654 [0, 2, 15, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1657 = vector.extract %216[2, 13, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1658 = vector.insert %1657, %1656 [0, 2, 0, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1659 = vector.extract %216[2, 13, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1660 = vector.insert %1659, %1658 [0, 2, 1, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1661 = vector.extract %216[2, 13, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1662 = vector.insert %1661, %1660 [0, 2, 2, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1663 = vector.extract %216[2, 13, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1664 = vector.insert %1663, %1662 [0, 2, 3, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1665 = vector.extract %216[2, 13, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1666 = vector.insert %1665, %1664 [0, 2, 4, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1667 = vector.extract %216[2, 13, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1668 = vector.insert %1667, %1666 [0, 2, 5, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1669 = vector.extract %216[2, 13, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1670 = vector.insert %1669, %1668 [0, 2, 6, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1671 = vector.extract %216[2, 13, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1672 = vector.insert %1671, %1670 [0, 2, 7, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1673 = vector.extract %216[2, 13, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1674 = vector.insert %1673, %1672 [0, 2, 8, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1675 = vector.extract %216[2, 13, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1676 = vector.insert %1675, %1674 [0, 2, 9, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1677 = vector.extract %216[2, 13, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1678 = vector.insert %1677, %1676 [0, 2, 10, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1679 = vector.extract %216[2, 13, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1680 = vector.insert %1679, %1678 [0, 2, 11, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1681 = vector.extract %216[2, 13, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1682 = vector.insert %1681, %1680 [0, 2, 12, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1683 = vector.extract %216[2, 13, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1684 = vector.insert %1683, %1682 [0, 2, 13, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1685 = vector.extract %216[2, 13, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1686 = vector.insert %1685, %1684 [0, 2, 14, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1687 = vector.extract %216[2, 13, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1688 = vector.insert %1687, %1686 [0, 2, 15, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1689 = vector.extract %216[2, 14, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1690 = vector.insert %1689, %1688 [0, 2, 0, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1691 = vector.extract %216[2, 14, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1692 = vector.insert %1691, %1690 [0, 2, 1, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1693 = vector.extract %216[2, 14, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1694 = vector.insert %1693, %1692 [0, 2, 2, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1695 = vector.extract %216[2, 14, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1696 = vector.insert %1695, %1694 [0, 2, 3, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1697 = vector.extract %216[2, 14, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1698 = vector.insert %1697, %1696 [0, 2, 4, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1699 = vector.extract %216[2, 14, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1700 = vector.insert %1699, %1698 [0, 2, 5, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1701 = vector.extract %216[2, 14, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1702 = vector.insert %1701, %1700 [0, 2, 6, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1703 = vector.extract %216[2, 14, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1704 = vector.insert %1703, %1702 [0, 2, 7, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1705 = vector.extract %216[2, 14, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1706 = vector.insert %1705, %1704 [0, 2, 8, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1707 = vector.extract %216[2, 14, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1708 = vector.insert %1707, %1706 [0, 2, 9, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1709 = vector.extract %216[2, 14, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1710 = vector.insert %1709, %1708 [0, 2, 10, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1711 = vector.extract %216[2, 14, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1712 = vector.insert %1711, %1710 [0, 2, 11, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1713 = vector.extract %216[2, 14, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1714 = vector.insert %1713, %1712 [0, 2, 12, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1715 = vector.extract %216[2, 14, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1716 = vector.insert %1715, %1714 [0, 2, 13, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1717 = vector.extract %216[2, 14, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1718 = vector.insert %1717, %1716 [0, 2, 14, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1719 = vector.extract %216[2, 14, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1720 = vector.insert %1719, %1718 [0, 2, 15, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1721 = vector.extract %216[2, 15, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1722 = vector.insert %1721, %1720 [0, 2, 0, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1723 = vector.extract %216[2, 15, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1724 = vector.insert %1723, %1722 [0, 2, 1, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1725 = vector.extract %216[2, 15, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1726 = vector.insert %1725, %1724 [0, 2, 2, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1727 = vector.extract %216[2, 15, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1728 = vector.insert %1727, %1726 [0, 2, 3, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1729 = vector.extract %216[2, 15, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1730 = vector.insert %1729, %1728 [0, 2, 4, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1731 = vector.extract %216[2, 15, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1732 = vector.insert %1731, %1730 [0, 2, 5, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1733 = vector.extract %216[2, 15, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1734 = vector.insert %1733, %1732 [0, 2, 6, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1735 = vector.extract %216[2, 15, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1736 = vector.insert %1735, %1734 [0, 2, 7, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1737 = vector.extract %216[2, 15, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1738 = vector.insert %1737, %1736 [0, 2, 8, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1739 = vector.extract %216[2, 15, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1740 = vector.insert %1739, %1738 [0, 2, 9, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1741 = vector.extract %216[2, 15, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1742 = vector.insert %1741, %1740 [0, 2, 10, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1743 = vector.extract %216[2, 15, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1744 = vector.insert %1743, %1742 [0, 2, 11, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1745 = vector.extract %216[2, 15, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1746 = vector.insert %1745, %1744 [0, 2, 12, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1747 = vector.extract %216[2, 15, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1748 = vector.insert %1747, %1746 [0, 2, 13, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1749 = vector.extract %216[2, 15, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1750 = vector.insert %1749, %1748 [0, 2, 14, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1751 = vector.extract %216[2, 15, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1752 = vector.insert %1751, %1750 [0, 2, 15, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1753 = vector.extract %216[3, 0, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1754 = vector.insert %1753, %1752 [0, 3, 0, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1755 = vector.extract %216[3, 0, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1756 = vector.insert %1755, %1754 [0, 3, 1, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1757 = vector.extract %216[3, 0, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1758 = vector.insert %1757, %1756 [0, 3, 2, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1759 = vector.extract %216[3, 0, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1760 = vector.insert %1759, %1758 [0, 3, 3, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1761 = vector.extract %216[3, 0, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1762 = vector.insert %1761, %1760 [0, 3, 4, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1763 = vector.extract %216[3, 0, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1764 = vector.insert %1763, %1762 [0, 3, 5, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1765 = vector.extract %216[3, 0, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1766 = vector.insert %1765, %1764 [0, 3, 6, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1767 = vector.extract %216[3, 0, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1768 = vector.insert %1767, %1766 [0, 3, 7, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1769 = vector.extract %216[3, 0, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1770 = vector.insert %1769, %1768 [0, 3, 8, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1771 = vector.extract %216[3, 0, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1772 = vector.insert %1771, %1770 [0, 3, 9, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1773 = vector.extract %216[3, 0, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1774 = vector.insert %1773, %1772 [0, 3, 10, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1775 = vector.extract %216[3, 0, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1776 = vector.insert %1775, %1774 [0, 3, 11, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1777 = vector.extract %216[3, 0, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1778 = vector.insert %1777, %1776 [0, 3, 12, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1779 = vector.extract %216[3, 0, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1780 = vector.insert %1779, %1778 [0, 3, 13, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1781 = vector.extract %216[3, 0, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1782 = vector.insert %1781, %1780 [0, 3, 14, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1783 = vector.extract %216[3, 0, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1784 = vector.insert %1783, %1782 [0, 3, 15, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1785 = vector.extract %216[3, 1, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1786 = vector.insert %1785, %1784 [0, 3, 0, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1787 = vector.extract %216[3, 1, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1788 = vector.insert %1787, %1786 [0, 3, 1, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1789 = vector.extract %216[3, 1, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1790 = vector.insert %1789, %1788 [0, 3, 2, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1791 = vector.extract %216[3, 1, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1792 = vector.insert %1791, %1790 [0, 3, 3, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1793 = vector.extract %216[3, 1, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1794 = vector.insert %1793, %1792 [0, 3, 4, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1795 = vector.extract %216[3, 1, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1796 = vector.insert %1795, %1794 [0, 3, 5, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1797 = vector.extract %216[3, 1, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1798 = vector.insert %1797, %1796 [0, 3, 6, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1799 = vector.extract %216[3, 1, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1800 = vector.insert %1799, %1798 [0, 3, 7, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1801 = vector.extract %216[3, 1, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1802 = vector.insert %1801, %1800 [0, 3, 8, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1803 = vector.extract %216[3, 1, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1804 = vector.insert %1803, %1802 [0, 3, 9, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1805 = vector.extract %216[3, 1, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1806 = vector.insert %1805, %1804 [0, 3, 10, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1807 = vector.extract %216[3, 1, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1808 = vector.insert %1807, %1806 [0, 3, 11, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1809 = vector.extract %216[3, 1, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1810 = vector.insert %1809, %1808 [0, 3, 12, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1811 = vector.extract %216[3, 1, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1812 = vector.insert %1811, %1810 [0, 3, 13, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1813 = vector.extract %216[3, 1, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1814 = vector.insert %1813, %1812 [0, 3, 14, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1815 = vector.extract %216[3, 1, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1816 = vector.insert %1815, %1814 [0, 3, 15, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1817 = vector.extract %216[3, 2, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1818 = vector.insert %1817, %1816 [0, 3, 0, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1819 = vector.extract %216[3, 2, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1820 = vector.insert %1819, %1818 [0, 3, 1, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1821 = vector.extract %216[3, 2, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1822 = vector.insert %1821, %1820 [0, 3, 2, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1823 = vector.extract %216[3, 2, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1824 = vector.insert %1823, %1822 [0, 3, 3, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1825 = vector.extract %216[3, 2, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1826 = vector.insert %1825, %1824 [0, 3, 4, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1827 = vector.extract %216[3, 2, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1828 = vector.insert %1827, %1826 [0, 3, 5, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1829 = vector.extract %216[3, 2, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1830 = vector.insert %1829, %1828 [0, 3, 6, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1831 = vector.extract %216[3, 2, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1832 = vector.insert %1831, %1830 [0, 3, 7, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1833 = vector.extract %216[3, 2, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1834 = vector.insert %1833, %1832 [0, 3, 8, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1835 = vector.extract %216[3, 2, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1836 = vector.insert %1835, %1834 [0, 3, 9, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1837 = vector.extract %216[3, 2, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1838 = vector.insert %1837, %1836 [0, 3, 10, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1839 = vector.extract %216[3, 2, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1840 = vector.insert %1839, %1838 [0, 3, 11, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1841 = vector.extract %216[3, 2, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1842 = vector.insert %1841, %1840 [0, 3, 12, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1843 = vector.extract %216[3, 2, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1844 = vector.insert %1843, %1842 [0, 3, 13, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1845 = vector.extract %216[3, 2, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1846 = vector.insert %1845, %1844 [0, 3, 14, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1847 = vector.extract %216[3, 2, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1848 = vector.insert %1847, %1846 [0, 3, 15, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1849 = vector.extract %216[3, 3, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1850 = vector.insert %1849, %1848 [0, 3, 0, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1851 = vector.extract %216[3, 3, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1852 = vector.insert %1851, %1850 [0, 3, 1, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1853 = vector.extract %216[3, 3, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1854 = vector.insert %1853, %1852 [0, 3, 2, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1855 = vector.extract %216[3, 3, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1856 = vector.insert %1855, %1854 [0, 3, 3, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1857 = vector.extract %216[3, 3, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1858 = vector.insert %1857, %1856 [0, 3, 4, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1859 = vector.extract %216[3, 3, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1860 = vector.insert %1859, %1858 [0, 3, 5, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1861 = vector.extract %216[3, 3, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1862 = vector.insert %1861, %1860 [0, 3, 6, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1863 = vector.extract %216[3, 3, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1864 = vector.insert %1863, %1862 [0, 3, 7, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1865 = vector.extract %216[3, 3, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1866 = vector.insert %1865, %1864 [0, 3, 8, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1867 = vector.extract %216[3, 3, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1868 = vector.insert %1867, %1866 [0, 3, 9, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1869 = vector.extract %216[3, 3, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1870 = vector.insert %1869, %1868 [0, 3, 10, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1871 = vector.extract %216[3, 3, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1872 = vector.insert %1871, %1870 [0, 3, 11, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1873 = vector.extract %216[3, 3, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1874 = vector.insert %1873, %1872 [0, 3, 12, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1875 = vector.extract %216[3, 3, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1876 = vector.insert %1875, %1874 [0, 3, 13, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1877 = vector.extract %216[3, 3, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1878 = vector.insert %1877, %1876 [0, 3, 14, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1879 = vector.extract %216[3, 3, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1880 = vector.insert %1879, %1878 [0, 3, 15, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1881 = vector.extract %216[3, 4, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1882 = vector.insert %1881, %1880 [0, 3, 0, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1883 = vector.extract %216[3, 4, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1884 = vector.insert %1883, %1882 [0, 3, 1, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1885 = vector.extract %216[3, 4, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1886 = vector.insert %1885, %1884 [0, 3, 2, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1887 = vector.extract %216[3, 4, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1888 = vector.insert %1887, %1886 [0, 3, 3, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1889 = vector.extract %216[3, 4, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1890 = vector.insert %1889, %1888 [0, 3, 4, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1891 = vector.extract %216[3, 4, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1892 = vector.insert %1891, %1890 [0, 3, 5, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1893 = vector.extract %216[3, 4, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1894 = vector.insert %1893, %1892 [0, 3, 6, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1895 = vector.extract %216[3, 4, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1896 = vector.insert %1895, %1894 [0, 3, 7, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1897 = vector.extract %216[3, 4, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1898 = vector.insert %1897, %1896 [0, 3, 8, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1899 = vector.extract %216[3, 4, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1900 = vector.insert %1899, %1898 [0, 3, 9, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1901 = vector.extract %216[3, 4, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1902 = vector.insert %1901, %1900 [0, 3, 10, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1903 = vector.extract %216[3, 4, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1904 = vector.insert %1903, %1902 [0, 3, 11, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1905 = vector.extract %216[3, 4, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1906 = vector.insert %1905, %1904 [0, 3, 12, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1907 = vector.extract %216[3, 4, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1908 = vector.insert %1907, %1906 [0, 3, 13, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1909 = vector.extract %216[3, 4, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1910 = vector.insert %1909, %1908 [0, 3, 14, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1911 = vector.extract %216[3, 4, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1912 = vector.insert %1911, %1910 [0, 3, 15, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1913 = vector.extract %216[3, 5, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1914 = vector.insert %1913, %1912 [0, 3, 0, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1915 = vector.extract %216[3, 5, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1916 = vector.insert %1915, %1914 [0, 3, 1, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1917 = vector.extract %216[3, 5, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1918 = vector.insert %1917, %1916 [0, 3, 2, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1919 = vector.extract %216[3, 5, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1920 = vector.insert %1919, %1918 [0, 3, 3, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1921 = vector.extract %216[3, 5, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1922 = vector.insert %1921, %1920 [0, 3, 4, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1923 = vector.extract %216[3, 5, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1924 = vector.insert %1923, %1922 [0, 3, 5, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1925 = vector.extract %216[3, 5, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1926 = vector.insert %1925, %1924 [0, 3, 6, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1927 = vector.extract %216[3, 5, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1928 = vector.insert %1927, %1926 [0, 3, 7, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1929 = vector.extract %216[3, 5, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1930 = vector.insert %1929, %1928 [0, 3, 8, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1931 = vector.extract %216[3, 5, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1932 = vector.insert %1931, %1930 [0, 3, 9, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1933 = vector.extract %216[3, 5, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1934 = vector.insert %1933, %1932 [0, 3, 10, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1935 = vector.extract %216[3, 5, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1936 = vector.insert %1935, %1934 [0, 3, 11, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1937 = vector.extract %216[3, 5, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1938 = vector.insert %1937, %1936 [0, 3, 12, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1939 = vector.extract %216[3, 5, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1940 = vector.insert %1939, %1938 [0, 3, 13, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1941 = vector.extract %216[3, 5, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1942 = vector.insert %1941, %1940 [0, 3, 14, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1943 = vector.extract %216[3, 5, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1944 = vector.insert %1943, %1942 [0, 3, 15, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1945 = vector.extract %216[3, 6, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1946 = vector.insert %1945, %1944 [0, 3, 0, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1947 = vector.extract %216[3, 6, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1948 = vector.insert %1947, %1946 [0, 3, 1, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1949 = vector.extract %216[3, 6, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1950 = vector.insert %1949, %1948 [0, 3, 2, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1951 = vector.extract %216[3, 6, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1952 = vector.insert %1951, %1950 [0, 3, 3, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1953 = vector.extract %216[3, 6, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1954 = vector.insert %1953, %1952 [0, 3, 4, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1955 = vector.extract %216[3, 6, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1956 = vector.insert %1955, %1954 [0, 3, 5, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1957 = vector.extract %216[3, 6, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1958 = vector.insert %1957, %1956 [0, 3, 6, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1959 = vector.extract %216[3, 6, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1960 = vector.insert %1959, %1958 [0, 3, 7, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1961 = vector.extract %216[3, 6, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1962 = vector.insert %1961, %1960 [0, 3, 8, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1963 = vector.extract %216[3, 6, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1964 = vector.insert %1963, %1962 [0, 3, 9, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1965 = vector.extract %216[3, 6, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1966 = vector.insert %1965, %1964 [0, 3, 10, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1967 = vector.extract %216[3, 6, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1968 = vector.insert %1967, %1966 [0, 3, 11, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1969 = vector.extract %216[3, 6, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1970 = vector.insert %1969, %1968 [0, 3, 12, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1971 = vector.extract %216[3, 6, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1972 = vector.insert %1971, %1970 [0, 3, 13, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1973 = vector.extract %216[3, 6, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1974 = vector.insert %1973, %1972 [0, 3, 14, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1975 = vector.extract %216[3, 6, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1976 = vector.insert %1975, %1974 [0, 3, 15, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1977 = vector.extract %216[3, 7, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1978 = vector.insert %1977, %1976 [0, 3, 0, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1979 = vector.extract %216[3, 7, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1980 = vector.insert %1979, %1978 [0, 3, 1, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1981 = vector.extract %216[3, 7, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1982 = vector.insert %1981, %1980 [0, 3, 2, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1983 = vector.extract %216[3, 7, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1984 = vector.insert %1983, %1982 [0, 3, 3, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1985 = vector.extract %216[3, 7, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1986 = vector.insert %1985, %1984 [0, 3, 4, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1987 = vector.extract %216[3, 7, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1988 = vector.insert %1987, %1986 [0, 3, 5, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1989 = vector.extract %216[3, 7, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1990 = vector.insert %1989, %1988 [0, 3, 6, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1991 = vector.extract %216[3, 7, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1992 = vector.insert %1991, %1990 [0, 3, 7, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1993 = vector.extract %216[3, 7, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1994 = vector.insert %1993, %1992 [0, 3, 8, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1995 = vector.extract %216[3, 7, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1996 = vector.insert %1995, %1994 [0, 3, 9, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1997 = vector.extract %216[3, 7, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1998 = vector.insert %1997, %1996 [0, 3, 10, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1999 = vector.extract %216[3, 7, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2000 = vector.insert %1999, %1998 [0, 3, 11, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2001 = vector.extract %216[3, 7, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2002 = vector.insert %2001, %2000 [0, 3, 12, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2003 = vector.extract %216[3, 7, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2004 = vector.insert %2003, %2002 [0, 3, 13, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2005 = vector.extract %216[3, 7, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2006 = vector.insert %2005, %2004 [0, 3, 14, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2007 = vector.extract %216[3, 7, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2008 = vector.insert %2007, %2006 [0, 3, 15, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2009 = vector.extract %216[3, 8, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2010 = vector.insert %2009, %2008 [0, 3, 0, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2011 = vector.extract %216[3, 8, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2012 = vector.insert %2011, %2010 [0, 3, 1, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2013 = vector.extract %216[3, 8, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2014 = vector.insert %2013, %2012 [0, 3, 2, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2015 = vector.extract %216[3, 8, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2016 = vector.insert %2015, %2014 [0, 3, 3, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2017 = vector.extract %216[3, 8, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2018 = vector.insert %2017, %2016 [0, 3, 4, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2019 = vector.extract %216[3, 8, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2020 = vector.insert %2019, %2018 [0, 3, 5, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2021 = vector.extract %216[3, 8, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2022 = vector.insert %2021, %2020 [0, 3, 6, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2023 = vector.extract %216[3, 8, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2024 = vector.insert %2023, %2022 [0, 3, 7, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2025 = vector.extract %216[3, 8, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2026 = vector.insert %2025, %2024 [0, 3, 8, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2027 = vector.extract %216[3, 8, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2028 = vector.insert %2027, %2026 [0, 3, 9, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2029 = vector.extract %216[3, 8, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2030 = vector.insert %2029, %2028 [0, 3, 10, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2031 = vector.extract %216[3, 8, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2032 = vector.insert %2031, %2030 [0, 3, 11, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2033 = vector.extract %216[3, 8, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2034 = vector.insert %2033, %2032 [0, 3, 12, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2035 = vector.extract %216[3, 8, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2036 = vector.insert %2035, %2034 [0, 3, 13, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2037 = vector.extract %216[3, 8, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2038 = vector.insert %2037, %2036 [0, 3, 14, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2039 = vector.extract %216[3, 8, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2040 = vector.insert %2039, %2038 [0, 3, 15, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2041 = vector.extract %216[3, 9, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2042 = vector.insert %2041, %2040 [0, 3, 0, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2043 = vector.extract %216[3, 9, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2044 = vector.insert %2043, %2042 [0, 3, 1, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2045 = vector.extract %216[3, 9, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2046 = vector.insert %2045, %2044 [0, 3, 2, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2047 = vector.extract %216[3, 9, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2048 = vector.insert %2047, %2046 [0, 3, 3, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2049 = vector.extract %216[3, 9, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2050 = vector.insert %2049, %2048 [0, 3, 4, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2051 = vector.extract %216[3, 9, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2052 = vector.insert %2051, %2050 [0, 3, 5, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2053 = vector.extract %216[3, 9, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2054 = vector.insert %2053, %2052 [0, 3, 6, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2055 = vector.extract %216[3, 9, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2056 = vector.insert %2055, %2054 [0, 3, 7, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2057 = vector.extract %216[3, 9, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2058 = vector.insert %2057, %2056 [0, 3, 8, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2059 = vector.extract %216[3, 9, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2060 = vector.insert %2059, %2058 [0, 3, 9, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2061 = vector.extract %216[3, 9, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2062 = vector.insert %2061, %2060 [0, 3, 10, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2063 = vector.extract %216[3, 9, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2064 = vector.insert %2063, %2062 [0, 3, 11, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2065 = vector.extract %216[3, 9, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2066 = vector.insert %2065, %2064 [0, 3, 12, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2067 = vector.extract %216[3, 9, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2068 = vector.insert %2067, %2066 [0, 3, 13, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2069 = vector.extract %216[3, 9, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2070 = vector.insert %2069, %2068 [0, 3, 14, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2071 = vector.extract %216[3, 9, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2072 = vector.insert %2071, %2070 [0, 3, 15, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2073 = vector.extract %216[3, 10, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2074 = vector.insert %2073, %2072 [0, 3, 0, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2075 = vector.extract %216[3, 10, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2076 = vector.insert %2075, %2074 [0, 3, 1, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2077 = vector.extract %216[3, 10, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2078 = vector.insert %2077, %2076 [0, 3, 2, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2079 = vector.extract %216[3, 10, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2080 = vector.insert %2079, %2078 [0, 3, 3, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2081 = vector.extract %216[3, 10, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2082 = vector.insert %2081, %2080 [0, 3, 4, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2083 = vector.extract %216[3, 10, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2084 = vector.insert %2083, %2082 [0, 3, 5, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2085 = vector.extract %216[3, 10, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2086 = vector.insert %2085, %2084 [0, 3, 6, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2087 = vector.extract %216[3, 10, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2088 = vector.insert %2087, %2086 [0, 3, 7, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2089 = vector.extract %216[3, 10, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2090 = vector.insert %2089, %2088 [0, 3, 8, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2091 = vector.extract %216[3, 10, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2092 = vector.insert %2091, %2090 [0, 3, 9, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2093 = vector.extract %216[3, 10, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2094 = vector.insert %2093, %2092 [0, 3, 10, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2095 = vector.extract %216[3, 10, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2096 = vector.insert %2095, %2094 [0, 3, 11, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2097 = vector.extract %216[3, 10, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2098 = vector.insert %2097, %2096 [0, 3, 12, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2099 = vector.extract %216[3, 10, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2100 = vector.insert %2099, %2098 [0, 3, 13, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2101 = vector.extract %216[3, 10, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2102 = vector.insert %2101, %2100 [0, 3, 14, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2103 = vector.extract %216[3, 10, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2104 = vector.insert %2103, %2102 [0, 3, 15, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2105 = vector.extract %216[3, 11, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2106 = vector.insert %2105, %2104 [0, 3, 0, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2107 = vector.extract %216[3, 11, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2108 = vector.insert %2107, %2106 [0, 3, 1, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2109 = vector.extract %216[3, 11, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2110 = vector.insert %2109, %2108 [0, 3, 2, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2111 = vector.extract %216[3, 11, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2112 = vector.insert %2111, %2110 [0, 3, 3, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2113 = vector.extract %216[3, 11, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2114 = vector.insert %2113, %2112 [0, 3, 4, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2115 = vector.extract %216[3, 11, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2116 = vector.insert %2115, %2114 [0, 3, 5, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2117 = vector.extract %216[3, 11, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2118 = vector.insert %2117, %2116 [0, 3, 6, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2119 = vector.extract %216[3, 11, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2120 = vector.insert %2119, %2118 [0, 3, 7, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2121 = vector.extract %216[3, 11, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2122 = vector.insert %2121, %2120 [0, 3, 8, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2123 = vector.extract %216[3, 11, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2124 = vector.insert %2123, %2122 [0, 3, 9, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2125 = vector.extract %216[3, 11, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2126 = vector.insert %2125, %2124 [0, 3, 10, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2127 = vector.extract %216[3, 11, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2128 = vector.insert %2127, %2126 [0, 3, 11, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2129 = vector.extract %216[3, 11, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2130 = vector.insert %2129, %2128 [0, 3, 12, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2131 = vector.extract %216[3, 11, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2132 = vector.insert %2131, %2130 [0, 3, 13, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2133 = vector.extract %216[3, 11, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2134 = vector.insert %2133, %2132 [0, 3, 14, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2135 = vector.extract %216[3, 11, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2136 = vector.insert %2135, %2134 [0, 3, 15, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2137 = vector.extract %216[3, 12, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2138 = vector.insert %2137, %2136 [0, 3, 0, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2139 = vector.extract %216[3, 12, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2140 = vector.insert %2139, %2138 [0, 3, 1, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2141 = vector.extract %216[3, 12, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2142 = vector.insert %2141, %2140 [0, 3, 2, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2143 = vector.extract %216[3, 12, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2144 = vector.insert %2143, %2142 [0, 3, 3, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2145 = vector.extract %216[3, 12, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2146 = vector.insert %2145, %2144 [0, 3, 4, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2147 = vector.extract %216[3, 12, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2148 = vector.insert %2147, %2146 [0, 3, 5, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2149 = vector.extract %216[3, 12, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2150 = vector.insert %2149, %2148 [0, 3, 6, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2151 = vector.extract %216[3, 12, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2152 = vector.insert %2151, %2150 [0, 3, 7, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2153 = vector.extract %216[3, 12, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2154 = vector.insert %2153, %2152 [0, 3, 8, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2155 = vector.extract %216[3, 12, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2156 = vector.insert %2155, %2154 [0, 3, 9, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2157 = vector.extract %216[3, 12, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2158 = vector.insert %2157, %2156 [0, 3, 10, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2159 = vector.extract %216[3, 12, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2160 = vector.insert %2159, %2158 [0, 3, 11, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2161 = vector.extract %216[3, 12, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2162 = vector.insert %2161, %2160 [0, 3, 12, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2163 = vector.extract %216[3, 12, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2164 = vector.insert %2163, %2162 [0, 3, 13, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2165 = vector.extract %216[3, 12, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2166 = vector.insert %2165, %2164 [0, 3, 14, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2167 = vector.extract %216[3, 12, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2168 = vector.insert %2167, %2166 [0, 3, 15, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2169 = vector.extract %216[3, 13, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2170 = vector.insert %2169, %2168 [0, 3, 0, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2171 = vector.extract %216[3, 13, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2172 = vector.insert %2171, %2170 [0, 3, 1, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2173 = vector.extract %216[3, 13, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2174 = vector.insert %2173, %2172 [0, 3, 2, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2175 = vector.extract %216[3, 13, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2176 = vector.insert %2175, %2174 [0, 3, 3, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2177 = vector.extract %216[3, 13, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2178 = vector.insert %2177, %2176 [0, 3, 4, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2179 = vector.extract %216[3, 13, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2180 = vector.insert %2179, %2178 [0, 3, 5, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2181 = vector.extract %216[3, 13, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2182 = vector.insert %2181, %2180 [0, 3, 6, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2183 = vector.extract %216[3, 13, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2184 = vector.insert %2183, %2182 [0, 3, 7, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2185 = vector.extract %216[3, 13, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2186 = vector.insert %2185, %2184 [0, 3, 8, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2187 = vector.extract %216[3, 13, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2188 = vector.insert %2187, %2186 [0, 3, 9, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2189 = vector.extract %216[3, 13, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2190 = vector.insert %2189, %2188 [0, 3, 10, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2191 = vector.extract %216[3, 13, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2192 = vector.insert %2191, %2190 [0, 3, 11, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2193 = vector.extract %216[3, 13, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2194 = vector.insert %2193, %2192 [0, 3, 12, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2195 = vector.extract %216[3, 13, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2196 = vector.insert %2195, %2194 [0, 3, 13, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2197 = vector.extract %216[3, 13, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2198 = vector.insert %2197, %2196 [0, 3, 14, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2199 = vector.extract %216[3, 13, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2200 = vector.insert %2199, %2198 [0, 3, 15, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2201 = vector.extract %216[3, 14, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2202 = vector.insert %2201, %2200 [0, 3, 0, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2203 = vector.extract %216[3, 14, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2204 = vector.insert %2203, %2202 [0, 3, 1, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2205 = vector.extract %216[3, 14, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2206 = vector.insert %2205, %2204 [0, 3, 2, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2207 = vector.extract %216[3, 14, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2208 = vector.insert %2207, %2206 [0, 3, 3, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2209 = vector.extract %216[3, 14, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2210 = vector.insert %2209, %2208 [0, 3, 4, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2211 = vector.extract %216[3, 14, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2212 = vector.insert %2211, %2210 [0, 3, 5, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2213 = vector.extract %216[3, 14, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2214 = vector.insert %2213, %2212 [0, 3, 6, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2215 = vector.extract %216[3, 14, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2216 = vector.insert %2215, %2214 [0, 3, 7, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2217 = vector.extract %216[3, 14, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2218 = vector.insert %2217, %2216 [0, 3, 8, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2219 = vector.extract %216[3, 14, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2220 = vector.insert %2219, %2218 [0, 3, 9, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2221 = vector.extract %216[3, 14, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2222 = vector.insert %2221, %2220 [0, 3, 10, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2223 = vector.extract %216[3, 14, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2224 = vector.insert %2223, %2222 [0, 3, 11, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2225 = vector.extract %216[3, 14, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2226 = vector.insert %2225, %2224 [0, 3, 12, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2227 = vector.extract %216[3, 14, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2228 = vector.insert %2227, %2226 [0, 3, 13, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2229 = vector.extract %216[3, 14, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2230 = vector.insert %2229, %2228 [0, 3, 14, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2231 = vector.extract %216[3, 14, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2232 = vector.insert %2231, %2230 [0, 3, 15, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2233 = vector.extract %216[3, 15, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2234 = vector.insert %2233, %2232 [0, 3, 0, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2235 = vector.extract %216[3, 15, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2236 = vector.insert %2235, %2234 [0, 3, 1, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2237 = vector.extract %216[3, 15, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2238 = vector.insert %2237, %2236 [0, 3, 2, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2239 = vector.extract %216[3, 15, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2240 = vector.insert %2239, %2238 [0, 3, 3, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2241 = vector.extract %216[3, 15, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2242 = vector.insert %2241, %2240 [0, 3, 4, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2243 = vector.extract %216[3, 15, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2244 = vector.insert %2243, %2242 [0, 3, 5, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2245 = vector.extract %216[3, 15, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2246 = vector.insert %2245, %2244 [0, 3, 6, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2247 = vector.extract %216[3, 15, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2248 = vector.insert %2247, %2246 [0, 3, 7, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2249 = vector.extract %216[3, 15, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2250 = vector.insert %2249, %2248 [0, 3, 8, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2251 = vector.extract %216[3, 15, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2252 = vector.insert %2251, %2250 [0, 3, 9, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2253 = vector.extract %216[3, 15, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2254 = vector.insert %2253, %2252 [0, 3, 10, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2255 = vector.extract %216[3, 15, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2256 = vector.insert %2255, %2254 [0, 3, 11, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2257 = vector.extract %216[3, 15, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2258 = vector.insert %2257, %2256 [0, 3, 12, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2259 = vector.extract %216[3, 15, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2260 = vector.insert %2259, %2258 [0, 3, 13, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2261 = vector.extract %216[3, 15, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2262 = vector.insert %2261, %2260 [0, 3, 14, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2263 = vector.extract %216[3, 15, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2264 = vector.insert %2263, %2262 [0, 3, 15, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2265 = vector.extract %2264[0] : vector<4x16x16x1xf16> from vector<1x4x16x16x1xf16> | |
%subview_5 = memref.subview %subview[0, 0, 0, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%2266 = vector.shape_cast %2265 : vector<4x16x16x1xf16> to vector<4x16x16xf16> | |
%2267 = vector.extract %2266[0, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2267, %subview_5[%arg3, %c0, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2268 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%2269 = vector.extract %2266[0, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2269, %subview_5[%arg3, %c0, %2268, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2270 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%2271 = vector.extract %2266[0, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2271, %subview_5[%arg3, %c0, %2270, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2272 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%2273 = vector.extract %2266[0, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2273, %subview_5[%arg3, %c0, %2272, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2274 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%2275 = vector.extract %2266[0, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2275, %subview_5[%arg3, %c0, %2274, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2276 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%2277 = vector.extract %2266[0, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2277, %subview_5[%arg3, %c0, %2276, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2278 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%2279 = vector.extract %2266[0, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2279, %subview_5[%arg3, %c0, %2278, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2280 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%2281 = vector.extract %2266[0, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2281, %subview_5[%arg3, %c0, %2280, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2282 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%2283 = vector.extract %2266[0, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2283, %subview_5[%arg3, %c0, %2282, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2284 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%2285 = vector.extract %2266[0, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2285, %subview_5[%arg3, %c0, %2284, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2286 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%2287 = vector.extract %2266[0, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2287, %subview_5[%arg3, %c0, %2286, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2288 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%2289 = vector.extract %2266[0, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2289, %subview_5[%arg3, %c0, %2288, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2290 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%2291 = vector.extract %2266[0, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2291, %subview_5[%arg3, %c0, %2290, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2292 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%2293 = vector.extract %2266[0, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2293, %subview_5[%arg3, %c0, %2292, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2294 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%2295 = vector.extract %2266[0, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2295, %subview_5[%arg3, %c0, %2294, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2296 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%2297 = vector.extract %2266[0, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2297, %subview_5[%arg3, %c0, %2296, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2298 = vector.extract %2266[1, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2298, %subview_5[%arg3, %c1, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2299 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%2300 = vector.extract %2266[1, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2300, %subview_5[%arg3, %c1, %2299, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2301 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%2302 = vector.extract %2266[1, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2302, %subview_5[%arg3, %c1, %2301, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2303 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%2304 = vector.extract %2266[1, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2304, %subview_5[%arg3, %c1, %2303, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2305 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%2306 = vector.extract %2266[1, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2306, %subview_5[%arg3, %c1, %2305, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2307 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%2308 = vector.extract %2266[1, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2308, %subview_5[%arg3, %c1, %2307, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2309 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%2310 = vector.extract %2266[1, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2310, %subview_5[%arg3, %c1, %2309, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2311 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%2312 = vector.extract %2266[1, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2312, %subview_5[%arg3, %c1, %2311, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2313 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%2314 = vector.extract %2266[1, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2314, %subview_5[%arg3, %c1, %2313, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2315 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%2316 = vector.extract %2266[1, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2316, %subview_5[%arg3, %c1, %2315, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2317 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%2318 = vector.extract %2266[1, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2318, %subview_5[%arg3, %c1, %2317, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2319 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%2320 = vector.extract %2266[1, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2320, %subview_5[%arg3, %c1, %2319, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2321 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%2322 = vector.extract %2266[1, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2322, %subview_5[%arg3, %c1, %2321, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2323 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%2324 = vector.extract %2266[1, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2324, %subview_5[%arg3, %c1, %2323, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2325 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%2326 = vector.extract %2266[1, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2326, %subview_5[%arg3, %c1, %2325, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2327 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%2328 = vector.extract %2266[1, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2328, %subview_5[%arg3, %c1, %2327, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2329 = vector.extract %2266[2, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2329, %subview_5[%arg3, %c2, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2330 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%2331 = vector.extract %2266[2, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2331, %subview_5[%arg3, %c2, %2330, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2332 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%2333 = vector.extract %2266[2, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2333, %subview_5[%arg3, %c2, %2332, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2334 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%2335 = vector.extract %2266[2, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2335, %subview_5[%arg3, %c2, %2334, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2336 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%2337 = vector.extract %2266[2, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2337, %subview_5[%arg3, %c2, %2336, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2338 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%2339 = vector.extract %2266[2, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2339, %subview_5[%arg3, %c2, %2338, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2340 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%2341 = vector.extract %2266[2, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2341, %subview_5[%arg3, %c2, %2340, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2342 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%2343 = vector.extract %2266[2, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2343, %subview_5[%arg3, %c2, %2342, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2344 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%2345 = vector.extract %2266[2, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2345, %subview_5[%arg3, %c2, %2344, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2346 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%2347 = vector.extract %2266[2, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2347, %subview_5[%arg3, %c2, %2346, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2348 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%2349 = vector.extract %2266[2, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2349, %subview_5[%arg3, %c2, %2348, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2350 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%2351 = vector.extract %2266[2, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2351, %subview_5[%arg3, %c2, %2350, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2352 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%2353 = vector.extract %2266[2, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2353, %subview_5[%arg3, %c2, %2352, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2354 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%2355 = vector.extract %2266[2, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2355, %subview_5[%arg3, %c2, %2354, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2356 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%2357 = vector.extract %2266[2, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2357, %subview_5[%arg3, %c2, %2356, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2358 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%2359 = vector.extract %2266[2, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2359, %subview_5[%arg3, %c2, %2358, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2360 = vector.extract %2266[3, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2360, %subview_5[%arg3, %c3, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2361 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%2362 = vector.extract %2266[3, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2362, %subview_5[%arg3, %c3, %2361, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2363 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%2364 = vector.extract %2266[3, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2364, %subview_5[%arg3, %c3, %2363, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2365 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%2366 = vector.extract %2266[3, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2366, %subview_5[%arg3, %c3, %2365, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2367 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%2368 = vector.extract %2266[3, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2368, %subview_5[%arg3, %c3, %2367, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2369 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%2370 = vector.extract %2266[3, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2370, %subview_5[%arg3, %c3, %2369, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2371 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%2372 = vector.extract %2266[3, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2372, %subview_5[%arg3, %c3, %2371, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2373 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%2374 = vector.extract %2266[3, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2374, %subview_5[%arg3, %c3, %2373, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2375 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%2376 = vector.extract %2266[3, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2376, %subview_5[%arg3, %c3, %2375, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2377 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%2378 = vector.extract %2266[3, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2378, %subview_5[%arg3, %c3, %2377, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2379 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%2380 = vector.extract %2266[3, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2380, %subview_5[%arg3, %c3, %2379, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2381 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%2382 = vector.extract %2266[3, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2382, %subview_5[%arg3, %c3, %2381, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2383 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%2384 = vector.extract %2266[3, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2384, %subview_5[%arg3, %c3, %2383, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2385 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%2386 = vector.extract %2266[3, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2386, %subview_5[%arg3, %c3, %2385, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2387 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%2388 = vector.extract %2266[3, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2388, %subview_5[%arg3, %c3, %2387, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2389 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%2390 = vector.extract %2266[3, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2390, %subview_5[%arg3, %c3, %2389, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer (canonicalize) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant dense<0.000000e+00> : vector<1x4x16x16x1xf16> | |
%cst_0 = arith.constant dense<0.000000e+00> : vector<4x16x16xf16> | |
%c63 = arith.constant 63 : index | |
%c62 = arith.constant 62 : index | |
%c61 = arith.constant 61 : index | |
%c60 = arith.constant 60 : index | |
%c59 = arith.constant 59 : index | |
%c58 = arith.constant 58 : index | |
%c57 = arith.constant 57 : index | |
%c56 = arith.constant 56 : index | |
%c55 = arith.constant 55 : index | |
%c54 = arith.constant 54 : index | |
%c53 = arith.constant 53 : index | |
%c52 = arith.constant 52 : index | |
%c51 = arith.constant 51 : index | |
%c50 = arith.constant 50 : index | |
%c49 = arith.constant 49 : index | |
%c48 = arith.constant 48 : index | |
%c47 = arith.constant 47 : index | |
%c46 = arith.constant 46 : index | |
%c45 = arith.constant 45 : index | |
%c44 = arith.constant 44 : index | |
%c43 = arith.constant 43 : index | |
%c42 = arith.constant 42 : index | |
%c41 = arith.constant 41 : index | |
%c40 = arith.constant 40 : index | |
%c39 = arith.constant 39 : index | |
%c38 = arith.constant 38 : index | |
%c37 = arith.constant 37 : index | |
%c36 = arith.constant 36 : index | |
%c35 = arith.constant 35 : index | |
%c34 = arith.constant 34 : index | |
%c33 = arith.constant 33 : index | |
%c32 = arith.constant 32 : index | |
%c31 = arith.constant 31 : index | |
%c30 = arith.constant 30 : index | |
%c29 = arith.constant 29 : index | |
%c28 = arith.constant 28 : index | |
%c27 = arith.constant 27 : index | |
%c26 = arith.constant 26 : index | |
%c25 = arith.constant 25 : index | |
%c24 = arith.constant 24 : index | |
%c23 = arith.constant 23 : index | |
%c22 = arith.constant 22 : index | |
%c21 = arith.constant 21 : index | |
%c20 = arith.constant 20 : index | |
%c19 = arith.constant 19 : index | |
%c18 = arith.constant 18 : index | |
%c17 = arith.constant 17 : index | |
%c15 = arith.constant 15 : index | |
%c14 = arith.constant 14 : index | |
%c13 = arith.constant 13 : index | |
%c12 = arith.constant 12 : index | |
%c11 = arith.constant 11 : index | |
%c10 = arith.constant 10 : index | |
%c9 = arith.constant 9 : index | |
%c8 = arith.constant 8 : index | |
%c7 = arith.constant 7 : index | |
%c6 = arith.constant 6 : index | |
%c5 = arith.constant 5 : index | |
%c4 = arith.constant 4 : index | |
%c3 = arith.constant 3 : index | |
%c2 = arith.constant 2 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_1 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.load %subview_1[%c0, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%25 = vector.load %subview_1[%c1, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%26 = vector.load %subview_1[%c2, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%27 = vector.load %subview_1[%c3, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%28 = vector.load %subview_1[%c4, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%29 = vector.load %subview_1[%c5, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%30 = vector.load %subview_1[%c6, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%31 = vector.load %subview_1[%c7, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%32 = vector.load %subview_1[%c8, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%33 = vector.load %subview_1[%c9, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%34 = vector.load %subview_1[%c10, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%35 = vector.load %subview_1[%c11, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%36 = vector.load %subview_1[%c12, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%37 = vector.load %subview_1[%c13, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%38 = vector.load %subview_1[%c14, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%39 = vector.load %subview_1[%c15, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%40 = vector.load %subview_1[%c16, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%41 = vector.load %subview_1[%c17, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%42 = vector.load %subview_1[%c18, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%43 = vector.load %subview_1[%c19, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%44 = vector.load %subview_1[%c20, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%45 = vector.load %subview_1[%c21, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%46 = vector.load %subview_1[%c22, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%47 = vector.load %subview_1[%c23, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%48 = vector.load %subview_1[%c24, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%49 = vector.load %subview_1[%c25, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%50 = vector.load %subview_1[%c26, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%51 = vector.load %subview_1[%c27, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%52 = vector.load %subview_1[%c28, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%53 = vector.load %subview_1[%c29, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%54 = vector.load %subview_1[%c30, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%55 = vector.load %subview_1[%c31, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%56 = vector.load %subview_1[%c32, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%57 = vector.load %subview_1[%c33, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%58 = vector.load %subview_1[%c34, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%59 = vector.load %subview_1[%c35, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%60 = vector.load %subview_1[%c36, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%61 = vector.load %subview_1[%c37, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%62 = vector.load %subview_1[%c38, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%63 = vector.load %subview_1[%c39, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%64 = vector.load %subview_1[%c40, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%65 = vector.load %subview_1[%c41, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%66 = vector.load %subview_1[%c42, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%67 = vector.load %subview_1[%c43, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%68 = vector.load %subview_1[%c44, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%69 = vector.load %subview_1[%c45, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%70 = vector.load %subview_1[%c46, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%71 = vector.load %subview_1[%c47, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%72 = vector.load %subview_1[%c48, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%73 = vector.load %subview_1[%c49, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%74 = vector.load %subview_1[%c50, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%75 = vector.load %subview_1[%c51, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%76 = vector.load %subview_1[%c52, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%77 = vector.load %subview_1[%c53, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%78 = vector.load %subview_1[%c54, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%79 = vector.load %subview_1[%c55, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%80 = vector.load %subview_1[%c56, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%81 = vector.load %subview_1[%c57, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%82 = vector.load %subview_1[%c58, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%83 = vector.load %subview_1[%c59, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%84 = vector.load %subview_1[%c60, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%85 = vector.load %subview_1[%c61, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%86 = vector.load %subview_1[%c62, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%87 = vector.load %subview_1[%c63, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%subview_2 = memref.subview %alloca[0, 0, 0] [1, 64, 16] [1, 1, 1] : memref<1x64x16xf16> to memref<64x16xf16> | |
vector.store %24, %subview_2[%c0, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %25, %subview_2[%c1, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %26, %subview_2[%c2, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %27, %subview_2[%c3, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %28, %subview_2[%c4, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %29, %subview_2[%c5, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %30, %subview_2[%c6, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %31, %subview_2[%c7, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %32, %subview_2[%c8, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %33, %subview_2[%c9, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %34, %subview_2[%c10, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %35, %subview_2[%c11, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %36, %subview_2[%c12, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %37, %subview_2[%c13, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %38, %subview_2[%c14, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %39, %subview_2[%c15, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %40, %subview_2[%c16, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %41, %subview_2[%c17, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %42, %subview_2[%c18, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %43, %subview_2[%c19, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %44, %subview_2[%c20, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %45, %subview_2[%c21, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %46, %subview_2[%c22, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %47, %subview_2[%c23, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %48, %subview_2[%c24, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %49, %subview_2[%c25, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %50, %subview_2[%c26, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %51, %subview_2[%c27, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %52, %subview_2[%c28, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %53, %subview_2[%c29, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %54, %subview_2[%c30, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %55, %subview_2[%c31, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %56, %subview_2[%c32, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %57, %subview_2[%c33, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %58, %subview_2[%c34, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %59, %subview_2[%c35, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %60, %subview_2[%c36, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %61, %subview_2[%c37, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %62, %subview_2[%c38, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %63, %subview_2[%c39, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %64, %subview_2[%c40, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %65, %subview_2[%c41, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %66, %subview_2[%c42, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %67, %subview_2[%c43, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %68, %subview_2[%c44, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %69, %subview_2[%c45, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %70, %subview_2[%c46, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %71, %subview_2[%c47, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %72, %subview_2[%c48, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %73, %subview_2[%c49, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %74, %subview_2[%c50, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %75, %subview_2[%c51, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %76, %subview_2[%c52, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %77, %subview_2[%c53, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %78, %subview_2[%c54, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %79, %subview_2[%c55, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %80, %subview_2[%c56, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %81, %subview_2[%c57, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %82, %subview_2[%c58, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %83, %subview_2[%c59, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %84, %subview_2[%c60, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %85, %subview_2[%c61, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %86, %subview_2[%c62, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %87, %subview_2[%c63, %c0] : memref<64x16xf16>, vector<16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%subview_3 = memref.subview %expand_shape[0, 0, 0, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : memref<1x4x16x16x1xf16> to memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> | |
%subview_4 = memref.subview %subview_3[0, 0, 0, 0] [1, 4, 16, 16] [1, 1, 1, 1] : memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> to memref<4x16x16xf16> | |
%88 = vector.load %subview_4[%c0, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%89 = vector.insert %88, %cst_0 [0, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%90 = vector.load %subview_4[%c0, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%91 = vector.insert %90, %89 [0, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%92 = vector.load %subview_4[%c0, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%93 = vector.insert %92, %91 [0, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%94 = vector.load %subview_4[%c0, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%95 = vector.insert %94, %93 [0, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%96 = vector.load %subview_4[%c0, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%97 = vector.insert %96, %95 [0, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%98 = vector.load %subview_4[%c0, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%99 = vector.insert %98, %97 [0, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%100 = vector.load %subview_4[%c0, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%101 = vector.insert %100, %99 [0, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%102 = vector.load %subview_4[%c0, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%103 = vector.insert %102, %101 [0, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%104 = vector.load %subview_4[%c0, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%105 = vector.insert %104, %103 [0, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%106 = vector.load %subview_4[%c0, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%107 = vector.insert %106, %105 [0, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%108 = vector.load %subview_4[%c0, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%109 = vector.insert %108, %107 [0, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%110 = vector.load %subview_4[%c0, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%111 = vector.insert %110, %109 [0, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%112 = vector.load %subview_4[%c0, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%113 = vector.insert %112, %111 [0, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%114 = vector.load %subview_4[%c0, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%115 = vector.insert %114, %113 [0, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%116 = vector.load %subview_4[%c0, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%117 = vector.insert %116, %115 [0, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%118 = vector.load %subview_4[%c0, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%119 = vector.insert %118, %117 [0, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%120 = vector.load %subview_4[%c1, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%121 = vector.insert %120, %119 [1, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%122 = vector.load %subview_4[%c1, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%123 = vector.insert %122, %121 [1, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%124 = vector.load %subview_4[%c1, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%125 = vector.insert %124, %123 [1, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%126 = vector.load %subview_4[%c1, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%127 = vector.insert %126, %125 [1, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%128 = vector.load %subview_4[%c1, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%129 = vector.insert %128, %127 [1, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%130 = vector.load %subview_4[%c1, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%131 = vector.insert %130, %129 [1, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%132 = vector.load %subview_4[%c1, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%133 = vector.insert %132, %131 [1, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%134 = vector.load %subview_4[%c1, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%135 = vector.insert %134, %133 [1, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%136 = vector.load %subview_4[%c1, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%137 = vector.insert %136, %135 [1, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%138 = vector.load %subview_4[%c1, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%139 = vector.insert %138, %137 [1, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%140 = vector.load %subview_4[%c1, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%141 = vector.insert %140, %139 [1, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%142 = vector.load %subview_4[%c1, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%143 = vector.insert %142, %141 [1, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%144 = vector.load %subview_4[%c1, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%145 = vector.insert %144, %143 [1, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%146 = vector.load %subview_4[%c1, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%147 = vector.insert %146, %145 [1, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%148 = vector.load %subview_4[%c1, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%149 = vector.insert %148, %147 [1, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%150 = vector.load %subview_4[%c1, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%151 = vector.insert %150, %149 [1, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%152 = vector.load %subview_4[%c2, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%153 = vector.insert %152, %151 [2, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%154 = vector.load %subview_4[%c2, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%155 = vector.insert %154, %153 [2, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%156 = vector.load %subview_4[%c2, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%157 = vector.insert %156, %155 [2, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%158 = vector.load %subview_4[%c2, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%159 = vector.insert %158, %157 [2, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%160 = vector.load %subview_4[%c2, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%161 = vector.insert %160, %159 [2, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%162 = vector.load %subview_4[%c2, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%163 = vector.insert %162, %161 [2, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%164 = vector.load %subview_4[%c2, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%165 = vector.insert %164, %163 [2, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%166 = vector.load %subview_4[%c2, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%167 = vector.insert %166, %165 [2, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%168 = vector.load %subview_4[%c2, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%169 = vector.insert %168, %167 [2, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%170 = vector.load %subview_4[%c2, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%171 = vector.insert %170, %169 [2, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%172 = vector.load %subview_4[%c2, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%173 = vector.insert %172, %171 [2, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%174 = vector.load %subview_4[%c2, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%175 = vector.insert %174, %173 [2, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%176 = vector.load %subview_4[%c2, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%177 = vector.insert %176, %175 [2, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%178 = vector.load %subview_4[%c2, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%179 = vector.insert %178, %177 [2, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%180 = vector.load %subview_4[%c2, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%181 = vector.insert %180, %179 [2, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%182 = vector.load %subview_4[%c2, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%183 = vector.insert %182, %181 [2, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%184 = vector.load %subview_4[%c3, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%185 = vector.insert %184, %183 [3, 0] : vector<16xf16> into vector<4x16x16xf16> | |
%186 = vector.load %subview_4[%c3, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%187 = vector.insert %186, %185 [3, 1] : vector<16xf16> into vector<4x16x16xf16> | |
%188 = vector.load %subview_4[%c3, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%189 = vector.insert %188, %187 [3, 2] : vector<16xf16> into vector<4x16x16xf16> | |
%190 = vector.load %subview_4[%c3, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%191 = vector.insert %190, %189 [3, 3] : vector<16xf16> into vector<4x16x16xf16> | |
%192 = vector.load %subview_4[%c3, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%193 = vector.insert %192, %191 [3, 4] : vector<16xf16> into vector<4x16x16xf16> | |
%194 = vector.load %subview_4[%c3, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%195 = vector.insert %194, %193 [3, 5] : vector<16xf16> into vector<4x16x16xf16> | |
%196 = vector.load %subview_4[%c3, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%197 = vector.insert %196, %195 [3, 6] : vector<16xf16> into vector<4x16x16xf16> | |
%198 = vector.load %subview_4[%c3, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%199 = vector.insert %198, %197 [3, 7] : vector<16xf16> into vector<4x16x16xf16> | |
%200 = vector.load %subview_4[%c3, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%201 = vector.insert %200, %199 [3, 8] : vector<16xf16> into vector<4x16x16xf16> | |
%202 = vector.load %subview_4[%c3, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%203 = vector.insert %202, %201 [3, 9] : vector<16xf16> into vector<4x16x16xf16> | |
%204 = vector.load %subview_4[%c3, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%205 = vector.insert %204, %203 [3, 10] : vector<16xf16> into vector<4x16x16xf16> | |
%206 = vector.load %subview_4[%c3, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%207 = vector.insert %206, %205 [3, 11] : vector<16xf16> into vector<4x16x16xf16> | |
%208 = vector.load %subview_4[%c3, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%209 = vector.insert %208, %207 [3, 12] : vector<16xf16> into vector<4x16x16xf16> | |
%210 = vector.load %subview_4[%c3, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%211 = vector.insert %210, %209 [3, 13] : vector<16xf16> into vector<4x16x16xf16> | |
%212 = vector.load %subview_4[%c3, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%213 = vector.insert %212, %211 [3, 14] : vector<16xf16> into vector<4x16x16xf16> | |
%214 = vector.load %subview_4[%c3, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%215 = vector.insert %214, %213 [3, 15] : vector<16xf16> into vector<4x16x16xf16> | |
%216 = vector.shape_cast %215 : vector<4x16x16xf16> to vector<4x16x16x1xf16> | |
%217 = vector.extract %216[0, 0, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%218 = vector.insert %217, %cst [0, 0, 0, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%219 = vector.extract %216[0, 0, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%220 = vector.insert %219, %218 [0, 0, 1, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%221 = vector.extract %216[0, 0, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%222 = vector.insert %221, %220 [0, 0, 2, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%223 = vector.extract %216[0, 0, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%224 = vector.insert %223, %222 [0, 0, 3, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%225 = vector.extract %216[0, 0, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%226 = vector.insert %225, %224 [0, 0, 4, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%227 = vector.extract %216[0, 0, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%228 = vector.insert %227, %226 [0, 0, 5, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%229 = vector.extract %216[0, 0, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%230 = vector.insert %229, %228 [0, 0, 6, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%231 = vector.extract %216[0, 0, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%232 = vector.insert %231, %230 [0, 0, 7, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%233 = vector.extract %216[0, 0, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%234 = vector.insert %233, %232 [0, 0, 8, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%235 = vector.extract %216[0, 0, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%236 = vector.insert %235, %234 [0, 0, 9, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%237 = vector.extract %216[0, 0, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%238 = vector.insert %237, %236 [0, 0, 10, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%239 = vector.extract %216[0, 0, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%240 = vector.insert %239, %238 [0, 0, 11, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%241 = vector.extract %216[0, 0, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%242 = vector.insert %241, %240 [0, 0, 12, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%243 = vector.extract %216[0, 0, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%244 = vector.insert %243, %242 [0, 0, 13, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%245 = vector.extract %216[0, 0, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%246 = vector.insert %245, %244 [0, 0, 14, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%247 = vector.extract %216[0, 0, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%248 = vector.insert %247, %246 [0, 0, 15, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%249 = vector.extract %216[0, 1, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%250 = vector.insert %249, %248 [0, 0, 0, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%251 = vector.extract %216[0, 1, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%252 = vector.insert %251, %250 [0, 0, 1, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%253 = vector.extract %216[0, 1, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%254 = vector.insert %253, %252 [0, 0, 2, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%255 = vector.extract %216[0, 1, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%256 = vector.insert %255, %254 [0, 0, 3, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%257 = vector.extract %216[0, 1, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%258 = vector.insert %257, %256 [0, 0, 4, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%259 = vector.extract %216[0, 1, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%260 = vector.insert %259, %258 [0, 0, 5, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%261 = vector.extract %216[0, 1, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%262 = vector.insert %261, %260 [0, 0, 6, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%263 = vector.extract %216[0, 1, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%264 = vector.insert %263, %262 [0, 0, 7, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%265 = vector.extract %216[0, 1, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%266 = vector.insert %265, %264 [0, 0, 8, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%267 = vector.extract %216[0, 1, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%268 = vector.insert %267, %266 [0, 0, 9, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%269 = vector.extract %216[0, 1, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%270 = vector.insert %269, %268 [0, 0, 10, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%271 = vector.extract %216[0, 1, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%272 = vector.insert %271, %270 [0, 0, 11, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%273 = vector.extract %216[0, 1, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%274 = vector.insert %273, %272 [0, 0, 12, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%275 = vector.extract %216[0, 1, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%276 = vector.insert %275, %274 [0, 0, 13, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%277 = vector.extract %216[0, 1, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%278 = vector.insert %277, %276 [0, 0, 14, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%279 = vector.extract %216[0, 1, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%280 = vector.insert %279, %278 [0, 0, 15, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%281 = vector.extract %216[0, 2, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%282 = vector.insert %281, %280 [0, 0, 0, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%283 = vector.extract %216[0, 2, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%284 = vector.insert %283, %282 [0, 0, 1, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%285 = vector.extract %216[0, 2, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%286 = vector.insert %285, %284 [0, 0, 2, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%287 = vector.extract %216[0, 2, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%288 = vector.insert %287, %286 [0, 0, 3, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%289 = vector.extract %216[0, 2, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%290 = vector.insert %289, %288 [0, 0, 4, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%291 = vector.extract %216[0, 2, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%292 = vector.insert %291, %290 [0, 0, 5, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%293 = vector.extract %216[0, 2, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%294 = vector.insert %293, %292 [0, 0, 6, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%295 = vector.extract %216[0, 2, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%296 = vector.insert %295, %294 [0, 0, 7, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%297 = vector.extract %216[0, 2, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%298 = vector.insert %297, %296 [0, 0, 8, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%299 = vector.extract %216[0, 2, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%300 = vector.insert %299, %298 [0, 0, 9, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%301 = vector.extract %216[0, 2, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%302 = vector.insert %301, %300 [0, 0, 10, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%303 = vector.extract %216[0, 2, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%304 = vector.insert %303, %302 [0, 0, 11, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%305 = vector.extract %216[0, 2, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%306 = vector.insert %305, %304 [0, 0, 12, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%307 = vector.extract %216[0, 2, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%308 = vector.insert %307, %306 [0, 0, 13, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%309 = vector.extract %216[0, 2, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%310 = vector.insert %309, %308 [0, 0, 14, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%311 = vector.extract %216[0, 2, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%312 = vector.insert %311, %310 [0, 0, 15, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%313 = vector.extract %216[0, 3, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%314 = vector.insert %313, %312 [0, 0, 0, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%315 = vector.extract %216[0, 3, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%316 = vector.insert %315, %314 [0, 0, 1, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%317 = vector.extract %216[0, 3, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%318 = vector.insert %317, %316 [0, 0, 2, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%319 = vector.extract %216[0, 3, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%320 = vector.insert %319, %318 [0, 0, 3, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%321 = vector.extract %216[0, 3, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%322 = vector.insert %321, %320 [0, 0, 4, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%323 = vector.extract %216[0, 3, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%324 = vector.insert %323, %322 [0, 0, 5, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%325 = vector.extract %216[0, 3, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%326 = vector.insert %325, %324 [0, 0, 6, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%327 = vector.extract %216[0, 3, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%328 = vector.insert %327, %326 [0, 0, 7, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%329 = vector.extract %216[0, 3, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%330 = vector.insert %329, %328 [0, 0, 8, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%331 = vector.extract %216[0, 3, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%332 = vector.insert %331, %330 [0, 0, 9, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%333 = vector.extract %216[0, 3, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%334 = vector.insert %333, %332 [0, 0, 10, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%335 = vector.extract %216[0, 3, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%336 = vector.insert %335, %334 [0, 0, 11, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%337 = vector.extract %216[0, 3, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%338 = vector.insert %337, %336 [0, 0, 12, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%339 = vector.extract %216[0, 3, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%340 = vector.insert %339, %338 [0, 0, 13, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%341 = vector.extract %216[0, 3, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%342 = vector.insert %341, %340 [0, 0, 14, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%343 = vector.extract %216[0, 3, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%344 = vector.insert %343, %342 [0, 0, 15, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%345 = vector.extract %216[0, 4, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%346 = vector.insert %345, %344 [0, 0, 0, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%347 = vector.extract %216[0, 4, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%348 = vector.insert %347, %346 [0, 0, 1, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%349 = vector.extract %216[0, 4, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%350 = vector.insert %349, %348 [0, 0, 2, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%351 = vector.extract %216[0, 4, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%352 = vector.insert %351, %350 [0, 0, 3, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%353 = vector.extract %216[0, 4, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%354 = vector.insert %353, %352 [0, 0, 4, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%355 = vector.extract %216[0, 4, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%356 = vector.insert %355, %354 [0, 0, 5, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%357 = vector.extract %216[0, 4, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%358 = vector.insert %357, %356 [0, 0, 6, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%359 = vector.extract %216[0, 4, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%360 = vector.insert %359, %358 [0, 0, 7, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%361 = vector.extract %216[0, 4, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%362 = vector.insert %361, %360 [0, 0, 8, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%363 = vector.extract %216[0, 4, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%364 = vector.insert %363, %362 [0, 0, 9, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%365 = vector.extract %216[0, 4, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%366 = vector.insert %365, %364 [0, 0, 10, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%367 = vector.extract %216[0, 4, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%368 = vector.insert %367, %366 [0, 0, 11, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%369 = vector.extract %216[0, 4, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%370 = vector.insert %369, %368 [0, 0, 12, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%371 = vector.extract %216[0, 4, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%372 = vector.insert %371, %370 [0, 0, 13, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%373 = vector.extract %216[0, 4, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%374 = vector.insert %373, %372 [0, 0, 14, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%375 = vector.extract %216[0, 4, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%376 = vector.insert %375, %374 [0, 0, 15, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%377 = vector.extract %216[0, 5, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%378 = vector.insert %377, %376 [0, 0, 0, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%379 = vector.extract %216[0, 5, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%380 = vector.insert %379, %378 [0, 0, 1, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%381 = vector.extract %216[0, 5, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%382 = vector.insert %381, %380 [0, 0, 2, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%383 = vector.extract %216[0, 5, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%384 = vector.insert %383, %382 [0, 0, 3, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%385 = vector.extract %216[0, 5, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%386 = vector.insert %385, %384 [0, 0, 4, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%387 = vector.extract %216[0, 5, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%388 = vector.insert %387, %386 [0, 0, 5, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%389 = vector.extract %216[0, 5, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%390 = vector.insert %389, %388 [0, 0, 6, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%391 = vector.extract %216[0, 5, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%392 = vector.insert %391, %390 [0, 0, 7, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%393 = vector.extract %216[0, 5, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%394 = vector.insert %393, %392 [0, 0, 8, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%395 = vector.extract %216[0, 5, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%396 = vector.insert %395, %394 [0, 0, 9, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%397 = vector.extract %216[0, 5, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%398 = vector.insert %397, %396 [0, 0, 10, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%399 = vector.extract %216[0, 5, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%400 = vector.insert %399, %398 [0, 0, 11, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%401 = vector.extract %216[0, 5, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%402 = vector.insert %401, %400 [0, 0, 12, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%403 = vector.extract %216[0, 5, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%404 = vector.insert %403, %402 [0, 0, 13, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%405 = vector.extract %216[0, 5, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%406 = vector.insert %405, %404 [0, 0, 14, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%407 = vector.extract %216[0, 5, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%408 = vector.insert %407, %406 [0, 0, 15, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%409 = vector.extract %216[0, 6, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%410 = vector.insert %409, %408 [0, 0, 0, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%411 = vector.extract %216[0, 6, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%412 = vector.insert %411, %410 [0, 0, 1, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%413 = vector.extract %216[0, 6, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%414 = vector.insert %413, %412 [0, 0, 2, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%415 = vector.extract %216[0, 6, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%416 = vector.insert %415, %414 [0, 0, 3, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%417 = vector.extract %216[0, 6, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%418 = vector.insert %417, %416 [0, 0, 4, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%419 = vector.extract %216[0, 6, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%420 = vector.insert %419, %418 [0, 0, 5, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%421 = vector.extract %216[0, 6, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%422 = vector.insert %421, %420 [0, 0, 6, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%423 = vector.extract %216[0, 6, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%424 = vector.insert %423, %422 [0, 0, 7, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%425 = vector.extract %216[0, 6, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%426 = vector.insert %425, %424 [0, 0, 8, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%427 = vector.extract %216[0, 6, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%428 = vector.insert %427, %426 [0, 0, 9, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%429 = vector.extract %216[0, 6, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%430 = vector.insert %429, %428 [0, 0, 10, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%431 = vector.extract %216[0, 6, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%432 = vector.insert %431, %430 [0, 0, 11, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%433 = vector.extract %216[0, 6, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%434 = vector.insert %433, %432 [0, 0, 12, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%435 = vector.extract %216[0, 6, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%436 = vector.insert %435, %434 [0, 0, 13, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%437 = vector.extract %216[0, 6, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%438 = vector.insert %437, %436 [0, 0, 14, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%439 = vector.extract %216[0, 6, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%440 = vector.insert %439, %438 [0, 0, 15, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%441 = vector.extract %216[0, 7, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%442 = vector.insert %441, %440 [0, 0, 0, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%443 = vector.extract %216[0, 7, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%444 = vector.insert %443, %442 [0, 0, 1, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%445 = vector.extract %216[0, 7, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%446 = vector.insert %445, %444 [0, 0, 2, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%447 = vector.extract %216[0, 7, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%448 = vector.insert %447, %446 [0, 0, 3, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%449 = vector.extract %216[0, 7, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%450 = vector.insert %449, %448 [0, 0, 4, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%451 = vector.extract %216[0, 7, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%452 = vector.insert %451, %450 [0, 0, 5, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%453 = vector.extract %216[0, 7, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%454 = vector.insert %453, %452 [0, 0, 6, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%455 = vector.extract %216[0, 7, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%456 = vector.insert %455, %454 [0, 0, 7, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%457 = vector.extract %216[0, 7, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%458 = vector.insert %457, %456 [0, 0, 8, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%459 = vector.extract %216[0, 7, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%460 = vector.insert %459, %458 [0, 0, 9, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%461 = vector.extract %216[0, 7, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%462 = vector.insert %461, %460 [0, 0, 10, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%463 = vector.extract %216[0, 7, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%464 = vector.insert %463, %462 [0, 0, 11, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%465 = vector.extract %216[0, 7, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%466 = vector.insert %465, %464 [0, 0, 12, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%467 = vector.extract %216[0, 7, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%468 = vector.insert %467, %466 [0, 0, 13, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%469 = vector.extract %216[0, 7, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%470 = vector.insert %469, %468 [0, 0, 14, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%471 = vector.extract %216[0, 7, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%472 = vector.insert %471, %470 [0, 0, 15, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%473 = vector.extract %216[0, 8, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%474 = vector.insert %473, %472 [0, 0, 0, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%475 = vector.extract %216[0, 8, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%476 = vector.insert %475, %474 [0, 0, 1, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%477 = vector.extract %216[0, 8, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%478 = vector.insert %477, %476 [0, 0, 2, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%479 = vector.extract %216[0, 8, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%480 = vector.insert %479, %478 [0, 0, 3, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%481 = vector.extract %216[0, 8, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%482 = vector.insert %481, %480 [0, 0, 4, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%483 = vector.extract %216[0, 8, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%484 = vector.insert %483, %482 [0, 0, 5, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%485 = vector.extract %216[0, 8, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%486 = vector.insert %485, %484 [0, 0, 6, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%487 = vector.extract %216[0, 8, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%488 = vector.insert %487, %486 [0, 0, 7, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%489 = vector.extract %216[0, 8, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%490 = vector.insert %489, %488 [0, 0, 8, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%491 = vector.extract %216[0, 8, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%492 = vector.insert %491, %490 [0, 0, 9, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%493 = vector.extract %216[0, 8, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%494 = vector.insert %493, %492 [0, 0, 10, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%495 = vector.extract %216[0, 8, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%496 = vector.insert %495, %494 [0, 0, 11, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%497 = vector.extract %216[0, 8, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%498 = vector.insert %497, %496 [0, 0, 12, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%499 = vector.extract %216[0, 8, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%500 = vector.insert %499, %498 [0, 0, 13, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%501 = vector.extract %216[0, 8, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%502 = vector.insert %501, %500 [0, 0, 14, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%503 = vector.extract %216[0, 8, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%504 = vector.insert %503, %502 [0, 0, 15, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%505 = vector.extract %216[0, 9, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%506 = vector.insert %505, %504 [0, 0, 0, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%507 = vector.extract %216[0, 9, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%508 = vector.insert %507, %506 [0, 0, 1, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%509 = vector.extract %216[0, 9, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%510 = vector.insert %509, %508 [0, 0, 2, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%511 = vector.extract %216[0, 9, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%512 = vector.insert %511, %510 [0, 0, 3, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%513 = vector.extract %216[0, 9, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%514 = vector.insert %513, %512 [0, 0, 4, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%515 = vector.extract %216[0, 9, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%516 = vector.insert %515, %514 [0, 0, 5, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%517 = vector.extract %216[0, 9, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%518 = vector.insert %517, %516 [0, 0, 6, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%519 = vector.extract %216[0, 9, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%520 = vector.insert %519, %518 [0, 0, 7, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%521 = vector.extract %216[0, 9, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%522 = vector.insert %521, %520 [0, 0, 8, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%523 = vector.extract %216[0, 9, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%524 = vector.insert %523, %522 [0, 0, 9, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%525 = vector.extract %216[0, 9, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%526 = vector.insert %525, %524 [0, 0, 10, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%527 = vector.extract %216[0, 9, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%528 = vector.insert %527, %526 [0, 0, 11, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%529 = vector.extract %216[0, 9, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%530 = vector.insert %529, %528 [0, 0, 12, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%531 = vector.extract %216[0, 9, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%532 = vector.insert %531, %530 [0, 0, 13, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%533 = vector.extract %216[0, 9, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%534 = vector.insert %533, %532 [0, 0, 14, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%535 = vector.extract %216[0, 9, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%536 = vector.insert %535, %534 [0, 0, 15, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%537 = vector.extract %216[0, 10, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%538 = vector.insert %537, %536 [0, 0, 0, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%539 = vector.extract %216[0, 10, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%540 = vector.insert %539, %538 [0, 0, 1, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%541 = vector.extract %216[0, 10, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%542 = vector.insert %541, %540 [0, 0, 2, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%543 = vector.extract %216[0, 10, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%544 = vector.insert %543, %542 [0, 0, 3, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%545 = vector.extract %216[0, 10, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%546 = vector.insert %545, %544 [0, 0, 4, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%547 = vector.extract %216[0, 10, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%548 = vector.insert %547, %546 [0, 0, 5, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%549 = vector.extract %216[0, 10, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%550 = vector.insert %549, %548 [0, 0, 6, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%551 = vector.extract %216[0, 10, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%552 = vector.insert %551, %550 [0, 0, 7, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%553 = vector.extract %216[0, 10, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%554 = vector.insert %553, %552 [0, 0, 8, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%555 = vector.extract %216[0, 10, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%556 = vector.insert %555, %554 [0, 0, 9, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%557 = vector.extract %216[0, 10, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%558 = vector.insert %557, %556 [0, 0, 10, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%559 = vector.extract %216[0, 10, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%560 = vector.insert %559, %558 [0, 0, 11, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%561 = vector.extract %216[0, 10, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%562 = vector.insert %561, %560 [0, 0, 12, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%563 = vector.extract %216[0, 10, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%564 = vector.insert %563, %562 [0, 0, 13, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%565 = vector.extract %216[0, 10, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%566 = vector.insert %565, %564 [0, 0, 14, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%567 = vector.extract %216[0, 10, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%568 = vector.insert %567, %566 [0, 0, 15, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%569 = vector.extract %216[0, 11, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%570 = vector.insert %569, %568 [0, 0, 0, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%571 = vector.extract %216[0, 11, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%572 = vector.insert %571, %570 [0, 0, 1, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%573 = vector.extract %216[0, 11, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%574 = vector.insert %573, %572 [0, 0, 2, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%575 = vector.extract %216[0, 11, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%576 = vector.insert %575, %574 [0, 0, 3, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%577 = vector.extract %216[0, 11, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%578 = vector.insert %577, %576 [0, 0, 4, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%579 = vector.extract %216[0, 11, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%580 = vector.insert %579, %578 [0, 0, 5, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%581 = vector.extract %216[0, 11, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%582 = vector.insert %581, %580 [0, 0, 6, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%583 = vector.extract %216[0, 11, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%584 = vector.insert %583, %582 [0, 0, 7, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%585 = vector.extract %216[0, 11, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%586 = vector.insert %585, %584 [0, 0, 8, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%587 = vector.extract %216[0, 11, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%588 = vector.insert %587, %586 [0, 0, 9, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%589 = vector.extract %216[0, 11, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%590 = vector.insert %589, %588 [0, 0, 10, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%591 = vector.extract %216[0, 11, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%592 = vector.insert %591, %590 [0, 0, 11, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%593 = vector.extract %216[0, 11, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%594 = vector.insert %593, %592 [0, 0, 12, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%595 = vector.extract %216[0, 11, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%596 = vector.insert %595, %594 [0, 0, 13, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%597 = vector.extract %216[0, 11, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%598 = vector.insert %597, %596 [0, 0, 14, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%599 = vector.extract %216[0, 11, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%600 = vector.insert %599, %598 [0, 0, 15, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%601 = vector.extract %216[0, 12, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%602 = vector.insert %601, %600 [0, 0, 0, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%603 = vector.extract %216[0, 12, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%604 = vector.insert %603, %602 [0, 0, 1, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%605 = vector.extract %216[0, 12, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%606 = vector.insert %605, %604 [0, 0, 2, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%607 = vector.extract %216[0, 12, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%608 = vector.insert %607, %606 [0, 0, 3, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%609 = vector.extract %216[0, 12, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%610 = vector.insert %609, %608 [0, 0, 4, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%611 = vector.extract %216[0, 12, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%612 = vector.insert %611, %610 [0, 0, 5, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%613 = vector.extract %216[0, 12, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%614 = vector.insert %613, %612 [0, 0, 6, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%615 = vector.extract %216[0, 12, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%616 = vector.insert %615, %614 [0, 0, 7, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%617 = vector.extract %216[0, 12, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%618 = vector.insert %617, %616 [0, 0, 8, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%619 = vector.extract %216[0, 12, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%620 = vector.insert %619, %618 [0, 0, 9, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%621 = vector.extract %216[0, 12, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%622 = vector.insert %621, %620 [0, 0, 10, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%623 = vector.extract %216[0, 12, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%624 = vector.insert %623, %622 [0, 0, 11, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%625 = vector.extract %216[0, 12, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%626 = vector.insert %625, %624 [0, 0, 12, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%627 = vector.extract %216[0, 12, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%628 = vector.insert %627, %626 [0, 0, 13, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%629 = vector.extract %216[0, 12, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%630 = vector.insert %629, %628 [0, 0, 14, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%631 = vector.extract %216[0, 12, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%632 = vector.insert %631, %630 [0, 0, 15, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%633 = vector.extract %216[0, 13, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%634 = vector.insert %633, %632 [0, 0, 0, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%635 = vector.extract %216[0, 13, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%636 = vector.insert %635, %634 [0, 0, 1, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%637 = vector.extract %216[0, 13, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%638 = vector.insert %637, %636 [0, 0, 2, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%639 = vector.extract %216[0, 13, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%640 = vector.insert %639, %638 [0, 0, 3, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%641 = vector.extract %216[0, 13, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%642 = vector.insert %641, %640 [0, 0, 4, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%643 = vector.extract %216[0, 13, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%644 = vector.insert %643, %642 [0, 0, 5, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%645 = vector.extract %216[0, 13, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%646 = vector.insert %645, %644 [0, 0, 6, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%647 = vector.extract %216[0, 13, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%648 = vector.insert %647, %646 [0, 0, 7, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%649 = vector.extract %216[0, 13, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%650 = vector.insert %649, %648 [0, 0, 8, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%651 = vector.extract %216[0, 13, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%652 = vector.insert %651, %650 [0, 0, 9, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%653 = vector.extract %216[0, 13, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%654 = vector.insert %653, %652 [0, 0, 10, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%655 = vector.extract %216[0, 13, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%656 = vector.insert %655, %654 [0, 0, 11, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%657 = vector.extract %216[0, 13, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%658 = vector.insert %657, %656 [0, 0, 12, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%659 = vector.extract %216[0, 13, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%660 = vector.insert %659, %658 [0, 0, 13, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%661 = vector.extract %216[0, 13, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%662 = vector.insert %661, %660 [0, 0, 14, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%663 = vector.extract %216[0, 13, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%664 = vector.insert %663, %662 [0, 0, 15, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%665 = vector.extract %216[0, 14, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%666 = vector.insert %665, %664 [0, 0, 0, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%667 = vector.extract %216[0, 14, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%668 = vector.insert %667, %666 [0, 0, 1, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%669 = vector.extract %216[0, 14, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%670 = vector.insert %669, %668 [0, 0, 2, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%671 = vector.extract %216[0, 14, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%672 = vector.insert %671, %670 [0, 0, 3, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%673 = vector.extract %216[0, 14, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%674 = vector.insert %673, %672 [0, 0, 4, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%675 = vector.extract %216[0, 14, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%676 = vector.insert %675, %674 [0, 0, 5, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%677 = vector.extract %216[0, 14, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%678 = vector.insert %677, %676 [0, 0, 6, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%679 = vector.extract %216[0, 14, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%680 = vector.insert %679, %678 [0, 0, 7, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%681 = vector.extract %216[0, 14, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%682 = vector.insert %681, %680 [0, 0, 8, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%683 = vector.extract %216[0, 14, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%684 = vector.insert %683, %682 [0, 0, 9, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%685 = vector.extract %216[0, 14, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%686 = vector.insert %685, %684 [0, 0, 10, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%687 = vector.extract %216[0, 14, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%688 = vector.insert %687, %686 [0, 0, 11, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%689 = vector.extract %216[0, 14, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%690 = vector.insert %689, %688 [0, 0, 12, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%691 = vector.extract %216[0, 14, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%692 = vector.insert %691, %690 [0, 0, 13, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%693 = vector.extract %216[0, 14, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%694 = vector.insert %693, %692 [0, 0, 14, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%695 = vector.extract %216[0, 14, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%696 = vector.insert %695, %694 [0, 0, 15, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%697 = vector.extract %216[0, 15, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%698 = vector.insert %697, %696 [0, 0, 0, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%699 = vector.extract %216[0, 15, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%700 = vector.insert %699, %698 [0, 0, 1, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%701 = vector.extract %216[0, 15, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%702 = vector.insert %701, %700 [0, 0, 2, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%703 = vector.extract %216[0, 15, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%704 = vector.insert %703, %702 [0, 0, 3, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%705 = vector.extract %216[0, 15, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%706 = vector.insert %705, %704 [0, 0, 4, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%707 = vector.extract %216[0, 15, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%708 = vector.insert %707, %706 [0, 0, 5, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%709 = vector.extract %216[0, 15, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%710 = vector.insert %709, %708 [0, 0, 6, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%711 = vector.extract %216[0, 15, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%712 = vector.insert %711, %710 [0, 0, 7, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%713 = vector.extract %216[0, 15, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%714 = vector.insert %713, %712 [0, 0, 8, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%715 = vector.extract %216[0, 15, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%716 = vector.insert %715, %714 [0, 0, 9, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%717 = vector.extract %216[0, 15, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%718 = vector.insert %717, %716 [0, 0, 10, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%719 = vector.extract %216[0, 15, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%720 = vector.insert %719, %718 [0, 0, 11, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%721 = vector.extract %216[0, 15, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%722 = vector.insert %721, %720 [0, 0, 12, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%723 = vector.extract %216[0, 15, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%724 = vector.insert %723, %722 [0, 0, 13, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%725 = vector.extract %216[0, 15, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%726 = vector.insert %725, %724 [0, 0, 14, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%727 = vector.extract %216[0, 15, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%728 = vector.insert %727, %726 [0, 0, 15, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%729 = vector.extract %216[1, 0, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%730 = vector.insert %729, %728 [0, 1, 0, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%731 = vector.extract %216[1, 0, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%732 = vector.insert %731, %730 [0, 1, 1, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%733 = vector.extract %216[1, 0, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%734 = vector.insert %733, %732 [0, 1, 2, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%735 = vector.extract %216[1, 0, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%736 = vector.insert %735, %734 [0, 1, 3, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%737 = vector.extract %216[1, 0, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%738 = vector.insert %737, %736 [0, 1, 4, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%739 = vector.extract %216[1, 0, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%740 = vector.insert %739, %738 [0, 1, 5, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%741 = vector.extract %216[1, 0, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%742 = vector.insert %741, %740 [0, 1, 6, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%743 = vector.extract %216[1, 0, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%744 = vector.insert %743, %742 [0, 1, 7, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%745 = vector.extract %216[1, 0, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%746 = vector.insert %745, %744 [0, 1, 8, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%747 = vector.extract %216[1, 0, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%748 = vector.insert %747, %746 [0, 1, 9, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%749 = vector.extract %216[1, 0, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%750 = vector.insert %749, %748 [0, 1, 10, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%751 = vector.extract %216[1, 0, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%752 = vector.insert %751, %750 [0, 1, 11, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%753 = vector.extract %216[1, 0, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%754 = vector.insert %753, %752 [0, 1, 12, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%755 = vector.extract %216[1, 0, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%756 = vector.insert %755, %754 [0, 1, 13, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%757 = vector.extract %216[1, 0, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%758 = vector.insert %757, %756 [0, 1, 14, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%759 = vector.extract %216[1, 0, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%760 = vector.insert %759, %758 [0, 1, 15, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%761 = vector.extract %216[1, 1, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%762 = vector.insert %761, %760 [0, 1, 0, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%763 = vector.extract %216[1, 1, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%764 = vector.insert %763, %762 [0, 1, 1, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%765 = vector.extract %216[1, 1, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%766 = vector.insert %765, %764 [0, 1, 2, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%767 = vector.extract %216[1, 1, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%768 = vector.insert %767, %766 [0, 1, 3, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%769 = vector.extract %216[1, 1, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%770 = vector.insert %769, %768 [0, 1, 4, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%771 = vector.extract %216[1, 1, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%772 = vector.insert %771, %770 [0, 1, 5, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%773 = vector.extract %216[1, 1, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%774 = vector.insert %773, %772 [0, 1, 6, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%775 = vector.extract %216[1, 1, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%776 = vector.insert %775, %774 [0, 1, 7, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%777 = vector.extract %216[1, 1, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%778 = vector.insert %777, %776 [0, 1, 8, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%779 = vector.extract %216[1, 1, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%780 = vector.insert %779, %778 [0, 1, 9, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%781 = vector.extract %216[1, 1, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%782 = vector.insert %781, %780 [0, 1, 10, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%783 = vector.extract %216[1, 1, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%784 = vector.insert %783, %782 [0, 1, 11, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%785 = vector.extract %216[1, 1, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%786 = vector.insert %785, %784 [0, 1, 12, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%787 = vector.extract %216[1, 1, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%788 = vector.insert %787, %786 [0, 1, 13, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%789 = vector.extract %216[1, 1, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%790 = vector.insert %789, %788 [0, 1, 14, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%791 = vector.extract %216[1, 1, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%792 = vector.insert %791, %790 [0, 1, 15, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%793 = vector.extract %216[1, 2, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%794 = vector.insert %793, %792 [0, 1, 0, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%795 = vector.extract %216[1, 2, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%796 = vector.insert %795, %794 [0, 1, 1, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%797 = vector.extract %216[1, 2, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%798 = vector.insert %797, %796 [0, 1, 2, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%799 = vector.extract %216[1, 2, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%800 = vector.insert %799, %798 [0, 1, 3, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%801 = vector.extract %216[1, 2, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%802 = vector.insert %801, %800 [0, 1, 4, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%803 = vector.extract %216[1, 2, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%804 = vector.insert %803, %802 [0, 1, 5, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%805 = vector.extract %216[1, 2, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%806 = vector.insert %805, %804 [0, 1, 6, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%807 = vector.extract %216[1, 2, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%808 = vector.insert %807, %806 [0, 1, 7, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%809 = vector.extract %216[1, 2, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%810 = vector.insert %809, %808 [0, 1, 8, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%811 = vector.extract %216[1, 2, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%812 = vector.insert %811, %810 [0, 1, 9, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%813 = vector.extract %216[1, 2, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%814 = vector.insert %813, %812 [0, 1, 10, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%815 = vector.extract %216[1, 2, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%816 = vector.insert %815, %814 [0, 1, 11, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%817 = vector.extract %216[1, 2, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%818 = vector.insert %817, %816 [0, 1, 12, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%819 = vector.extract %216[1, 2, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%820 = vector.insert %819, %818 [0, 1, 13, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%821 = vector.extract %216[1, 2, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%822 = vector.insert %821, %820 [0, 1, 14, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%823 = vector.extract %216[1, 2, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%824 = vector.insert %823, %822 [0, 1, 15, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%825 = vector.extract %216[1, 3, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%826 = vector.insert %825, %824 [0, 1, 0, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%827 = vector.extract %216[1, 3, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%828 = vector.insert %827, %826 [0, 1, 1, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%829 = vector.extract %216[1, 3, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%830 = vector.insert %829, %828 [0, 1, 2, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%831 = vector.extract %216[1, 3, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%832 = vector.insert %831, %830 [0, 1, 3, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%833 = vector.extract %216[1, 3, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%834 = vector.insert %833, %832 [0, 1, 4, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%835 = vector.extract %216[1, 3, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%836 = vector.insert %835, %834 [0, 1, 5, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%837 = vector.extract %216[1, 3, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%838 = vector.insert %837, %836 [0, 1, 6, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%839 = vector.extract %216[1, 3, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%840 = vector.insert %839, %838 [0, 1, 7, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%841 = vector.extract %216[1, 3, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%842 = vector.insert %841, %840 [0, 1, 8, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%843 = vector.extract %216[1, 3, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%844 = vector.insert %843, %842 [0, 1, 9, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%845 = vector.extract %216[1, 3, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%846 = vector.insert %845, %844 [0, 1, 10, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%847 = vector.extract %216[1, 3, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%848 = vector.insert %847, %846 [0, 1, 11, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%849 = vector.extract %216[1, 3, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%850 = vector.insert %849, %848 [0, 1, 12, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%851 = vector.extract %216[1, 3, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%852 = vector.insert %851, %850 [0, 1, 13, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%853 = vector.extract %216[1, 3, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%854 = vector.insert %853, %852 [0, 1, 14, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%855 = vector.extract %216[1, 3, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%856 = vector.insert %855, %854 [0, 1, 15, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%857 = vector.extract %216[1, 4, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%858 = vector.insert %857, %856 [0, 1, 0, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%859 = vector.extract %216[1, 4, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%860 = vector.insert %859, %858 [0, 1, 1, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%861 = vector.extract %216[1, 4, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%862 = vector.insert %861, %860 [0, 1, 2, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%863 = vector.extract %216[1, 4, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%864 = vector.insert %863, %862 [0, 1, 3, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%865 = vector.extract %216[1, 4, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%866 = vector.insert %865, %864 [0, 1, 4, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%867 = vector.extract %216[1, 4, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%868 = vector.insert %867, %866 [0, 1, 5, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%869 = vector.extract %216[1, 4, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%870 = vector.insert %869, %868 [0, 1, 6, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%871 = vector.extract %216[1, 4, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%872 = vector.insert %871, %870 [0, 1, 7, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%873 = vector.extract %216[1, 4, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%874 = vector.insert %873, %872 [0, 1, 8, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%875 = vector.extract %216[1, 4, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%876 = vector.insert %875, %874 [0, 1, 9, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%877 = vector.extract %216[1, 4, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%878 = vector.insert %877, %876 [0, 1, 10, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%879 = vector.extract %216[1, 4, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%880 = vector.insert %879, %878 [0, 1, 11, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%881 = vector.extract %216[1, 4, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%882 = vector.insert %881, %880 [0, 1, 12, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%883 = vector.extract %216[1, 4, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%884 = vector.insert %883, %882 [0, 1, 13, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%885 = vector.extract %216[1, 4, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%886 = vector.insert %885, %884 [0, 1, 14, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%887 = vector.extract %216[1, 4, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%888 = vector.insert %887, %886 [0, 1, 15, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%889 = vector.extract %216[1, 5, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%890 = vector.insert %889, %888 [0, 1, 0, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%891 = vector.extract %216[1, 5, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%892 = vector.insert %891, %890 [0, 1, 1, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%893 = vector.extract %216[1, 5, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%894 = vector.insert %893, %892 [0, 1, 2, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%895 = vector.extract %216[1, 5, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%896 = vector.insert %895, %894 [0, 1, 3, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%897 = vector.extract %216[1, 5, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%898 = vector.insert %897, %896 [0, 1, 4, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%899 = vector.extract %216[1, 5, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%900 = vector.insert %899, %898 [0, 1, 5, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%901 = vector.extract %216[1, 5, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%902 = vector.insert %901, %900 [0, 1, 6, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%903 = vector.extract %216[1, 5, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%904 = vector.insert %903, %902 [0, 1, 7, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%905 = vector.extract %216[1, 5, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%906 = vector.insert %905, %904 [0, 1, 8, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%907 = vector.extract %216[1, 5, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%908 = vector.insert %907, %906 [0, 1, 9, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%909 = vector.extract %216[1, 5, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%910 = vector.insert %909, %908 [0, 1, 10, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%911 = vector.extract %216[1, 5, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%912 = vector.insert %911, %910 [0, 1, 11, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%913 = vector.extract %216[1, 5, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%914 = vector.insert %913, %912 [0, 1, 12, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%915 = vector.extract %216[1, 5, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%916 = vector.insert %915, %914 [0, 1, 13, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%917 = vector.extract %216[1, 5, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%918 = vector.insert %917, %916 [0, 1, 14, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%919 = vector.extract %216[1, 5, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%920 = vector.insert %919, %918 [0, 1, 15, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%921 = vector.extract %216[1, 6, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%922 = vector.insert %921, %920 [0, 1, 0, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%923 = vector.extract %216[1, 6, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%924 = vector.insert %923, %922 [0, 1, 1, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%925 = vector.extract %216[1, 6, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%926 = vector.insert %925, %924 [0, 1, 2, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%927 = vector.extract %216[1, 6, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%928 = vector.insert %927, %926 [0, 1, 3, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%929 = vector.extract %216[1, 6, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%930 = vector.insert %929, %928 [0, 1, 4, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%931 = vector.extract %216[1, 6, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%932 = vector.insert %931, %930 [0, 1, 5, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%933 = vector.extract %216[1, 6, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%934 = vector.insert %933, %932 [0, 1, 6, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%935 = vector.extract %216[1, 6, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%936 = vector.insert %935, %934 [0, 1, 7, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%937 = vector.extract %216[1, 6, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%938 = vector.insert %937, %936 [0, 1, 8, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%939 = vector.extract %216[1, 6, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%940 = vector.insert %939, %938 [0, 1, 9, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%941 = vector.extract %216[1, 6, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%942 = vector.insert %941, %940 [0, 1, 10, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%943 = vector.extract %216[1, 6, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%944 = vector.insert %943, %942 [0, 1, 11, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%945 = vector.extract %216[1, 6, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%946 = vector.insert %945, %944 [0, 1, 12, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%947 = vector.extract %216[1, 6, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%948 = vector.insert %947, %946 [0, 1, 13, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%949 = vector.extract %216[1, 6, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%950 = vector.insert %949, %948 [0, 1, 14, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%951 = vector.extract %216[1, 6, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%952 = vector.insert %951, %950 [0, 1, 15, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%953 = vector.extract %216[1, 7, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%954 = vector.insert %953, %952 [0, 1, 0, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%955 = vector.extract %216[1, 7, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%956 = vector.insert %955, %954 [0, 1, 1, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%957 = vector.extract %216[1, 7, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%958 = vector.insert %957, %956 [0, 1, 2, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%959 = vector.extract %216[1, 7, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%960 = vector.insert %959, %958 [0, 1, 3, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%961 = vector.extract %216[1, 7, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%962 = vector.insert %961, %960 [0, 1, 4, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%963 = vector.extract %216[1, 7, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%964 = vector.insert %963, %962 [0, 1, 5, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%965 = vector.extract %216[1, 7, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%966 = vector.insert %965, %964 [0, 1, 6, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%967 = vector.extract %216[1, 7, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%968 = vector.insert %967, %966 [0, 1, 7, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%969 = vector.extract %216[1, 7, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%970 = vector.insert %969, %968 [0, 1, 8, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%971 = vector.extract %216[1, 7, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%972 = vector.insert %971, %970 [0, 1, 9, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%973 = vector.extract %216[1, 7, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%974 = vector.insert %973, %972 [0, 1, 10, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%975 = vector.extract %216[1, 7, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%976 = vector.insert %975, %974 [0, 1, 11, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%977 = vector.extract %216[1, 7, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%978 = vector.insert %977, %976 [0, 1, 12, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%979 = vector.extract %216[1, 7, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%980 = vector.insert %979, %978 [0, 1, 13, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%981 = vector.extract %216[1, 7, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%982 = vector.insert %981, %980 [0, 1, 14, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%983 = vector.extract %216[1, 7, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%984 = vector.insert %983, %982 [0, 1, 15, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%985 = vector.extract %216[1, 8, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%986 = vector.insert %985, %984 [0, 1, 0, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%987 = vector.extract %216[1, 8, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%988 = vector.insert %987, %986 [0, 1, 1, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%989 = vector.extract %216[1, 8, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%990 = vector.insert %989, %988 [0, 1, 2, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%991 = vector.extract %216[1, 8, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%992 = vector.insert %991, %990 [0, 1, 3, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%993 = vector.extract %216[1, 8, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%994 = vector.insert %993, %992 [0, 1, 4, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%995 = vector.extract %216[1, 8, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%996 = vector.insert %995, %994 [0, 1, 5, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%997 = vector.extract %216[1, 8, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%998 = vector.insert %997, %996 [0, 1, 6, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%999 = vector.extract %216[1, 8, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1000 = vector.insert %999, %998 [0, 1, 7, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1001 = vector.extract %216[1, 8, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1002 = vector.insert %1001, %1000 [0, 1, 8, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1003 = vector.extract %216[1, 8, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1004 = vector.insert %1003, %1002 [0, 1, 9, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1005 = vector.extract %216[1, 8, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1006 = vector.insert %1005, %1004 [0, 1, 10, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1007 = vector.extract %216[1, 8, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1008 = vector.insert %1007, %1006 [0, 1, 11, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1009 = vector.extract %216[1, 8, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1010 = vector.insert %1009, %1008 [0, 1, 12, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1011 = vector.extract %216[1, 8, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1012 = vector.insert %1011, %1010 [0, 1, 13, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1013 = vector.extract %216[1, 8, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1014 = vector.insert %1013, %1012 [0, 1, 14, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1015 = vector.extract %216[1, 8, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1016 = vector.insert %1015, %1014 [0, 1, 15, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1017 = vector.extract %216[1, 9, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1018 = vector.insert %1017, %1016 [0, 1, 0, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1019 = vector.extract %216[1, 9, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1020 = vector.insert %1019, %1018 [0, 1, 1, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1021 = vector.extract %216[1, 9, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1022 = vector.insert %1021, %1020 [0, 1, 2, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1023 = vector.extract %216[1, 9, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1024 = vector.insert %1023, %1022 [0, 1, 3, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1025 = vector.extract %216[1, 9, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1026 = vector.insert %1025, %1024 [0, 1, 4, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1027 = vector.extract %216[1, 9, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1028 = vector.insert %1027, %1026 [0, 1, 5, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1029 = vector.extract %216[1, 9, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1030 = vector.insert %1029, %1028 [0, 1, 6, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1031 = vector.extract %216[1, 9, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1032 = vector.insert %1031, %1030 [0, 1, 7, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1033 = vector.extract %216[1, 9, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1034 = vector.insert %1033, %1032 [0, 1, 8, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1035 = vector.extract %216[1, 9, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1036 = vector.insert %1035, %1034 [0, 1, 9, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1037 = vector.extract %216[1, 9, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1038 = vector.insert %1037, %1036 [0, 1, 10, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1039 = vector.extract %216[1, 9, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1040 = vector.insert %1039, %1038 [0, 1, 11, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1041 = vector.extract %216[1, 9, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1042 = vector.insert %1041, %1040 [0, 1, 12, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1043 = vector.extract %216[1, 9, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1044 = vector.insert %1043, %1042 [0, 1, 13, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1045 = vector.extract %216[1, 9, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1046 = vector.insert %1045, %1044 [0, 1, 14, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1047 = vector.extract %216[1, 9, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1048 = vector.insert %1047, %1046 [0, 1, 15, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1049 = vector.extract %216[1, 10, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1050 = vector.insert %1049, %1048 [0, 1, 0, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1051 = vector.extract %216[1, 10, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1052 = vector.insert %1051, %1050 [0, 1, 1, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1053 = vector.extract %216[1, 10, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1054 = vector.insert %1053, %1052 [0, 1, 2, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1055 = vector.extract %216[1, 10, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1056 = vector.insert %1055, %1054 [0, 1, 3, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1057 = vector.extract %216[1, 10, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1058 = vector.insert %1057, %1056 [0, 1, 4, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1059 = vector.extract %216[1, 10, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1060 = vector.insert %1059, %1058 [0, 1, 5, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1061 = vector.extract %216[1, 10, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1062 = vector.insert %1061, %1060 [0, 1, 6, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1063 = vector.extract %216[1, 10, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1064 = vector.insert %1063, %1062 [0, 1, 7, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1065 = vector.extract %216[1, 10, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1066 = vector.insert %1065, %1064 [0, 1, 8, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1067 = vector.extract %216[1, 10, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1068 = vector.insert %1067, %1066 [0, 1, 9, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1069 = vector.extract %216[1, 10, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1070 = vector.insert %1069, %1068 [0, 1, 10, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1071 = vector.extract %216[1, 10, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1072 = vector.insert %1071, %1070 [0, 1, 11, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1073 = vector.extract %216[1, 10, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1074 = vector.insert %1073, %1072 [0, 1, 12, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1075 = vector.extract %216[1, 10, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1076 = vector.insert %1075, %1074 [0, 1, 13, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1077 = vector.extract %216[1, 10, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1078 = vector.insert %1077, %1076 [0, 1, 14, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1079 = vector.extract %216[1, 10, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1080 = vector.insert %1079, %1078 [0, 1, 15, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1081 = vector.extract %216[1, 11, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1082 = vector.insert %1081, %1080 [0, 1, 0, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1083 = vector.extract %216[1, 11, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1084 = vector.insert %1083, %1082 [0, 1, 1, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1085 = vector.extract %216[1, 11, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1086 = vector.insert %1085, %1084 [0, 1, 2, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1087 = vector.extract %216[1, 11, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1088 = vector.insert %1087, %1086 [0, 1, 3, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1089 = vector.extract %216[1, 11, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1090 = vector.insert %1089, %1088 [0, 1, 4, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1091 = vector.extract %216[1, 11, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1092 = vector.insert %1091, %1090 [0, 1, 5, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1093 = vector.extract %216[1, 11, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1094 = vector.insert %1093, %1092 [0, 1, 6, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1095 = vector.extract %216[1, 11, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1096 = vector.insert %1095, %1094 [0, 1, 7, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1097 = vector.extract %216[1, 11, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1098 = vector.insert %1097, %1096 [0, 1, 8, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1099 = vector.extract %216[1, 11, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1100 = vector.insert %1099, %1098 [0, 1, 9, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1101 = vector.extract %216[1, 11, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1102 = vector.insert %1101, %1100 [0, 1, 10, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1103 = vector.extract %216[1, 11, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1104 = vector.insert %1103, %1102 [0, 1, 11, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1105 = vector.extract %216[1, 11, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1106 = vector.insert %1105, %1104 [0, 1, 12, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1107 = vector.extract %216[1, 11, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1108 = vector.insert %1107, %1106 [0, 1, 13, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1109 = vector.extract %216[1, 11, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1110 = vector.insert %1109, %1108 [0, 1, 14, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1111 = vector.extract %216[1, 11, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1112 = vector.insert %1111, %1110 [0, 1, 15, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1113 = vector.extract %216[1, 12, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1114 = vector.insert %1113, %1112 [0, 1, 0, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1115 = vector.extract %216[1, 12, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1116 = vector.insert %1115, %1114 [0, 1, 1, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1117 = vector.extract %216[1, 12, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1118 = vector.insert %1117, %1116 [0, 1, 2, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1119 = vector.extract %216[1, 12, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1120 = vector.insert %1119, %1118 [0, 1, 3, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1121 = vector.extract %216[1, 12, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1122 = vector.insert %1121, %1120 [0, 1, 4, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1123 = vector.extract %216[1, 12, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1124 = vector.insert %1123, %1122 [0, 1, 5, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1125 = vector.extract %216[1, 12, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1126 = vector.insert %1125, %1124 [0, 1, 6, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1127 = vector.extract %216[1, 12, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1128 = vector.insert %1127, %1126 [0, 1, 7, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1129 = vector.extract %216[1, 12, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1130 = vector.insert %1129, %1128 [0, 1, 8, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1131 = vector.extract %216[1, 12, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1132 = vector.insert %1131, %1130 [0, 1, 9, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1133 = vector.extract %216[1, 12, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1134 = vector.insert %1133, %1132 [0, 1, 10, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1135 = vector.extract %216[1, 12, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1136 = vector.insert %1135, %1134 [0, 1, 11, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1137 = vector.extract %216[1, 12, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1138 = vector.insert %1137, %1136 [0, 1, 12, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1139 = vector.extract %216[1, 12, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1140 = vector.insert %1139, %1138 [0, 1, 13, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1141 = vector.extract %216[1, 12, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1142 = vector.insert %1141, %1140 [0, 1, 14, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1143 = vector.extract %216[1, 12, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1144 = vector.insert %1143, %1142 [0, 1, 15, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1145 = vector.extract %216[1, 13, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1146 = vector.insert %1145, %1144 [0, 1, 0, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1147 = vector.extract %216[1, 13, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1148 = vector.insert %1147, %1146 [0, 1, 1, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1149 = vector.extract %216[1, 13, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1150 = vector.insert %1149, %1148 [0, 1, 2, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1151 = vector.extract %216[1, 13, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1152 = vector.insert %1151, %1150 [0, 1, 3, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1153 = vector.extract %216[1, 13, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1154 = vector.insert %1153, %1152 [0, 1, 4, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1155 = vector.extract %216[1, 13, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1156 = vector.insert %1155, %1154 [0, 1, 5, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1157 = vector.extract %216[1, 13, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1158 = vector.insert %1157, %1156 [0, 1, 6, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1159 = vector.extract %216[1, 13, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1160 = vector.insert %1159, %1158 [0, 1, 7, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1161 = vector.extract %216[1, 13, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1162 = vector.insert %1161, %1160 [0, 1, 8, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1163 = vector.extract %216[1, 13, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1164 = vector.insert %1163, %1162 [0, 1, 9, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1165 = vector.extract %216[1, 13, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1166 = vector.insert %1165, %1164 [0, 1, 10, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1167 = vector.extract %216[1, 13, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1168 = vector.insert %1167, %1166 [0, 1, 11, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1169 = vector.extract %216[1, 13, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1170 = vector.insert %1169, %1168 [0, 1, 12, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1171 = vector.extract %216[1, 13, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1172 = vector.insert %1171, %1170 [0, 1, 13, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1173 = vector.extract %216[1, 13, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1174 = vector.insert %1173, %1172 [0, 1, 14, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1175 = vector.extract %216[1, 13, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1176 = vector.insert %1175, %1174 [0, 1, 15, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1177 = vector.extract %216[1, 14, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1178 = vector.insert %1177, %1176 [0, 1, 0, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1179 = vector.extract %216[1, 14, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1180 = vector.insert %1179, %1178 [0, 1, 1, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1181 = vector.extract %216[1, 14, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1182 = vector.insert %1181, %1180 [0, 1, 2, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1183 = vector.extract %216[1, 14, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1184 = vector.insert %1183, %1182 [0, 1, 3, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1185 = vector.extract %216[1, 14, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1186 = vector.insert %1185, %1184 [0, 1, 4, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1187 = vector.extract %216[1, 14, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1188 = vector.insert %1187, %1186 [0, 1, 5, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1189 = vector.extract %216[1, 14, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1190 = vector.insert %1189, %1188 [0, 1, 6, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1191 = vector.extract %216[1, 14, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1192 = vector.insert %1191, %1190 [0, 1, 7, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1193 = vector.extract %216[1, 14, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1194 = vector.insert %1193, %1192 [0, 1, 8, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1195 = vector.extract %216[1, 14, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1196 = vector.insert %1195, %1194 [0, 1, 9, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1197 = vector.extract %216[1, 14, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1198 = vector.insert %1197, %1196 [0, 1, 10, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1199 = vector.extract %216[1, 14, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1200 = vector.insert %1199, %1198 [0, 1, 11, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1201 = vector.extract %216[1, 14, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1202 = vector.insert %1201, %1200 [0, 1, 12, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1203 = vector.extract %216[1, 14, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1204 = vector.insert %1203, %1202 [0, 1, 13, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1205 = vector.extract %216[1, 14, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1206 = vector.insert %1205, %1204 [0, 1, 14, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1207 = vector.extract %216[1, 14, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1208 = vector.insert %1207, %1206 [0, 1, 15, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1209 = vector.extract %216[1, 15, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1210 = vector.insert %1209, %1208 [0, 1, 0, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1211 = vector.extract %216[1, 15, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1212 = vector.insert %1211, %1210 [0, 1, 1, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1213 = vector.extract %216[1, 15, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1214 = vector.insert %1213, %1212 [0, 1, 2, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1215 = vector.extract %216[1, 15, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1216 = vector.insert %1215, %1214 [0, 1, 3, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1217 = vector.extract %216[1, 15, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1218 = vector.insert %1217, %1216 [0, 1, 4, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1219 = vector.extract %216[1, 15, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1220 = vector.insert %1219, %1218 [0, 1, 5, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1221 = vector.extract %216[1, 15, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1222 = vector.insert %1221, %1220 [0, 1, 6, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1223 = vector.extract %216[1, 15, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1224 = vector.insert %1223, %1222 [0, 1, 7, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1225 = vector.extract %216[1, 15, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1226 = vector.insert %1225, %1224 [0, 1, 8, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1227 = vector.extract %216[1, 15, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1228 = vector.insert %1227, %1226 [0, 1, 9, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1229 = vector.extract %216[1, 15, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1230 = vector.insert %1229, %1228 [0, 1, 10, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1231 = vector.extract %216[1, 15, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1232 = vector.insert %1231, %1230 [0, 1, 11, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1233 = vector.extract %216[1, 15, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1234 = vector.insert %1233, %1232 [0, 1, 12, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1235 = vector.extract %216[1, 15, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1236 = vector.insert %1235, %1234 [0, 1, 13, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1237 = vector.extract %216[1, 15, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1238 = vector.insert %1237, %1236 [0, 1, 14, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1239 = vector.extract %216[1, 15, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1240 = vector.insert %1239, %1238 [0, 1, 15, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1241 = vector.extract %216[2, 0, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1242 = vector.insert %1241, %1240 [0, 2, 0, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1243 = vector.extract %216[2, 0, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1244 = vector.insert %1243, %1242 [0, 2, 1, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1245 = vector.extract %216[2, 0, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1246 = vector.insert %1245, %1244 [0, 2, 2, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1247 = vector.extract %216[2, 0, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1248 = vector.insert %1247, %1246 [0, 2, 3, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1249 = vector.extract %216[2, 0, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1250 = vector.insert %1249, %1248 [0, 2, 4, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1251 = vector.extract %216[2, 0, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1252 = vector.insert %1251, %1250 [0, 2, 5, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1253 = vector.extract %216[2, 0, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1254 = vector.insert %1253, %1252 [0, 2, 6, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1255 = vector.extract %216[2, 0, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1256 = vector.insert %1255, %1254 [0, 2, 7, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1257 = vector.extract %216[2, 0, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1258 = vector.insert %1257, %1256 [0, 2, 8, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1259 = vector.extract %216[2, 0, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1260 = vector.insert %1259, %1258 [0, 2, 9, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1261 = vector.extract %216[2, 0, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1262 = vector.insert %1261, %1260 [0, 2, 10, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1263 = vector.extract %216[2, 0, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1264 = vector.insert %1263, %1262 [0, 2, 11, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1265 = vector.extract %216[2, 0, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1266 = vector.insert %1265, %1264 [0, 2, 12, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1267 = vector.extract %216[2, 0, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1268 = vector.insert %1267, %1266 [0, 2, 13, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1269 = vector.extract %216[2, 0, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1270 = vector.insert %1269, %1268 [0, 2, 14, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1271 = vector.extract %216[2, 0, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1272 = vector.insert %1271, %1270 [0, 2, 15, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1273 = vector.extract %216[2, 1, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1274 = vector.insert %1273, %1272 [0, 2, 0, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1275 = vector.extract %216[2, 1, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1276 = vector.insert %1275, %1274 [0, 2, 1, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1277 = vector.extract %216[2, 1, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1278 = vector.insert %1277, %1276 [0, 2, 2, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1279 = vector.extract %216[2, 1, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1280 = vector.insert %1279, %1278 [0, 2, 3, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1281 = vector.extract %216[2, 1, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1282 = vector.insert %1281, %1280 [0, 2, 4, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1283 = vector.extract %216[2, 1, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1284 = vector.insert %1283, %1282 [0, 2, 5, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1285 = vector.extract %216[2, 1, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1286 = vector.insert %1285, %1284 [0, 2, 6, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1287 = vector.extract %216[2, 1, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1288 = vector.insert %1287, %1286 [0, 2, 7, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1289 = vector.extract %216[2, 1, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1290 = vector.insert %1289, %1288 [0, 2, 8, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1291 = vector.extract %216[2, 1, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1292 = vector.insert %1291, %1290 [0, 2, 9, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1293 = vector.extract %216[2, 1, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1294 = vector.insert %1293, %1292 [0, 2, 10, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1295 = vector.extract %216[2, 1, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1296 = vector.insert %1295, %1294 [0, 2, 11, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1297 = vector.extract %216[2, 1, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1298 = vector.insert %1297, %1296 [0, 2, 12, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1299 = vector.extract %216[2, 1, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1300 = vector.insert %1299, %1298 [0, 2, 13, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1301 = vector.extract %216[2, 1, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1302 = vector.insert %1301, %1300 [0, 2, 14, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1303 = vector.extract %216[2, 1, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1304 = vector.insert %1303, %1302 [0, 2, 15, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1305 = vector.extract %216[2, 2, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1306 = vector.insert %1305, %1304 [0, 2, 0, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1307 = vector.extract %216[2, 2, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1308 = vector.insert %1307, %1306 [0, 2, 1, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1309 = vector.extract %216[2, 2, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1310 = vector.insert %1309, %1308 [0, 2, 2, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1311 = vector.extract %216[2, 2, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1312 = vector.insert %1311, %1310 [0, 2, 3, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1313 = vector.extract %216[2, 2, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1314 = vector.insert %1313, %1312 [0, 2, 4, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1315 = vector.extract %216[2, 2, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1316 = vector.insert %1315, %1314 [0, 2, 5, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1317 = vector.extract %216[2, 2, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1318 = vector.insert %1317, %1316 [0, 2, 6, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1319 = vector.extract %216[2, 2, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1320 = vector.insert %1319, %1318 [0, 2, 7, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1321 = vector.extract %216[2, 2, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1322 = vector.insert %1321, %1320 [0, 2, 8, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1323 = vector.extract %216[2, 2, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1324 = vector.insert %1323, %1322 [0, 2, 9, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1325 = vector.extract %216[2, 2, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1326 = vector.insert %1325, %1324 [0, 2, 10, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1327 = vector.extract %216[2, 2, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1328 = vector.insert %1327, %1326 [0, 2, 11, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1329 = vector.extract %216[2, 2, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1330 = vector.insert %1329, %1328 [0, 2, 12, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1331 = vector.extract %216[2, 2, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1332 = vector.insert %1331, %1330 [0, 2, 13, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1333 = vector.extract %216[2, 2, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1334 = vector.insert %1333, %1332 [0, 2, 14, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1335 = vector.extract %216[2, 2, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1336 = vector.insert %1335, %1334 [0, 2, 15, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1337 = vector.extract %216[2, 3, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1338 = vector.insert %1337, %1336 [0, 2, 0, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1339 = vector.extract %216[2, 3, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1340 = vector.insert %1339, %1338 [0, 2, 1, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1341 = vector.extract %216[2, 3, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1342 = vector.insert %1341, %1340 [0, 2, 2, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1343 = vector.extract %216[2, 3, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1344 = vector.insert %1343, %1342 [0, 2, 3, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1345 = vector.extract %216[2, 3, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1346 = vector.insert %1345, %1344 [0, 2, 4, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1347 = vector.extract %216[2, 3, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1348 = vector.insert %1347, %1346 [0, 2, 5, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1349 = vector.extract %216[2, 3, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1350 = vector.insert %1349, %1348 [0, 2, 6, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1351 = vector.extract %216[2, 3, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1352 = vector.insert %1351, %1350 [0, 2, 7, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1353 = vector.extract %216[2, 3, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1354 = vector.insert %1353, %1352 [0, 2, 8, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1355 = vector.extract %216[2, 3, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1356 = vector.insert %1355, %1354 [0, 2, 9, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1357 = vector.extract %216[2, 3, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1358 = vector.insert %1357, %1356 [0, 2, 10, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1359 = vector.extract %216[2, 3, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1360 = vector.insert %1359, %1358 [0, 2, 11, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1361 = vector.extract %216[2, 3, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1362 = vector.insert %1361, %1360 [0, 2, 12, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1363 = vector.extract %216[2, 3, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1364 = vector.insert %1363, %1362 [0, 2, 13, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1365 = vector.extract %216[2, 3, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1366 = vector.insert %1365, %1364 [0, 2, 14, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1367 = vector.extract %216[2, 3, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1368 = vector.insert %1367, %1366 [0, 2, 15, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1369 = vector.extract %216[2, 4, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1370 = vector.insert %1369, %1368 [0, 2, 0, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1371 = vector.extract %216[2, 4, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1372 = vector.insert %1371, %1370 [0, 2, 1, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1373 = vector.extract %216[2, 4, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1374 = vector.insert %1373, %1372 [0, 2, 2, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1375 = vector.extract %216[2, 4, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1376 = vector.insert %1375, %1374 [0, 2, 3, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1377 = vector.extract %216[2, 4, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1378 = vector.insert %1377, %1376 [0, 2, 4, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1379 = vector.extract %216[2, 4, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1380 = vector.insert %1379, %1378 [0, 2, 5, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1381 = vector.extract %216[2, 4, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1382 = vector.insert %1381, %1380 [0, 2, 6, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1383 = vector.extract %216[2, 4, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1384 = vector.insert %1383, %1382 [0, 2, 7, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1385 = vector.extract %216[2, 4, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1386 = vector.insert %1385, %1384 [0, 2, 8, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1387 = vector.extract %216[2, 4, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1388 = vector.insert %1387, %1386 [0, 2, 9, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1389 = vector.extract %216[2, 4, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1390 = vector.insert %1389, %1388 [0, 2, 10, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1391 = vector.extract %216[2, 4, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1392 = vector.insert %1391, %1390 [0, 2, 11, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1393 = vector.extract %216[2, 4, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1394 = vector.insert %1393, %1392 [0, 2, 12, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1395 = vector.extract %216[2, 4, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1396 = vector.insert %1395, %1394 [0, 2, 13, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1397 = vector.extract %216[2, 4, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1398 = vector.insert %1397, %1396 [0, 2, 14, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1399 = vector.extract %216[2, 4, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1400 = vector.insert %1399, %1398 [0, 2, 15, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1401 = vector.extract %216[2, 5, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1402 = vector.insert %1401, %1400 [0, 2, 0, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1403 = vector.extract %216[2, 5, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1404 = vector.insert %1403, %1402 [0, 2, 1, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1405 = vector.extract %216[2, 5, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1406 = vector.insert %1405, %1404 [0, 2, 2, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1407 = vector.extract %216[2, 5, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1408 = vector.insert %1407, %1406 [0, 2, 3, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1409 = vector.extract %216[2, 5, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1410 = vector.insert %1409, %1408 [0, 2, 4, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1411 = vector.extract %216[2, 5, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1412 = vector.insert %1411, %1410 [0, 2, 5, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1413 = vector.extract %216[2, 5, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1414 = vector.insert %1413, %1412 [0, 2, 6, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1415 = vector.extract %216[2, 5, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1416 = vector.insert %1415, %1414 [0, 2, 7, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1417 = vector.extract %216[2, 5, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1418 = vector.insert %1417, %1416 [0, 2, 8, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1419 = vector.extract %216[2, 5, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1420 = vector.insert %1419, %1418 [0, 2, 9, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1421 = vector.extract %216[2, 5, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1422 = vector.insert %1421, %1420 [0, 2, 10, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1423 = vector.extract %216[2, 5, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1424 = vector.insert %1423, %1422 [0, 2, 11, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1425 = vector.extract %216[2, 5, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1426 = vector.insert %1425, %1424 [0, 2, 12, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1427 = vector.extract %216[2, 5, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1428 = vector.insert %1427, %1426 [0, 2, 13, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1429 = vector.extract %216[2, 5, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1430 = vector.insert %1429, %1428 [0, 2, 14, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1431 = vector.extract %216[2, 5, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1432 = vector.insert %1431, %1430 [0, 2, 15, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1433 = vector.extract %216[2, 6, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1434 = vector.insert %1433, %1432 [0, 2, 0, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1435 = vector.extract %216[2, 6, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1436 = vector.insert %1435, %1434 [0, 2, 1, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1437 = vector.extract %216[2, 6, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1438 = vector.insert %1437, %1436 [0, 2, 2, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1439 = vector.extract %216[2, 6, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1440 = vector.insert %1439, %1438 [0, 2, 3, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1441 = vector.extract %216[2, 6, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1442 = vector.insert %1441, %1440 [0, 2, 4, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1443 = vector.extract %216[2, 6, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1444 = vector.insert %1443, %1442 [0, 2, 5, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1445 = vector.extract %216[2, 6, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1446 = vector.insert %1445, %1444 [0, 2, 6, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1447 = vector.extract %216[2, 6, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1448 = vector.insert %1447, %1446 [0, 2, 7, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1449 = vector.extract %216[2, 6, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1450 = vector.insert %1449, %1448 [0, 2, 8, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1451 = vector.extract %216[2, 6, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1452 = vector.insert %1451, %1450 [0, 2, 9, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1453 = vector.extract %216[2, 6, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1454 = vector.insert %1453, %1452 [0, 2, 10, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1455 = vector.extract %216[2, 6, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1456 = vector.insert %1455, %1454 [0, 2, 11, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1457 = vector.extract %216[2, 6, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1458 = vector.insert %1457, %1456 [0, 2, 12, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1459 = vector.extract %216[2, 6, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1460 = vector.insert %1459, %1458 [0, 2, 13, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1461 = vector.extract %216[2, 6, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1462 = vector.insert %1461, %1460 [0, 2, 14, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1463 = vector.extract %216[2, 6, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1464 = vector.insert %1463, %1462 [0, 2, 15, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1465 = vector.extract %216[2, 7, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1466 = vector.insert %1465, %1464 [0, 2, 0, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1467 = vector.extract %216[2, 7, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1468 = vector.insert %1467, %1466 [0, 2, 1, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1469 = vector.extract %216[2, 7, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1470 = vector.insert %1469, %1468 [0, 2, 2, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1471 = vector.extract %216[2, 7, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1472 = vector.insert %1471, %1470 [0, 2, 3, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1473 = vector.extract %216[2, 7, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1474 = vector.insert %1473, %1472 [0, 2, 4, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1475 = vector.extract %216[2, 7, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1476 = vector.insert %1475, %1474 [0, 2, 5, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1477 = vector.extract %216[2, 7, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1478 = vector.insert %1477, %1476 [0, 2, 6, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1479 = vector.extract %216[2, 7, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1480 = vector.insert %1479, %1478 [0, 2, 7, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1481 = vector.extract %216[2, 7, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1482 = vector.insert %1481, %1480 [0, 2, 8, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1483 = vector.extract %216[2, 7, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1484 = vector.insert %1483, %1482 [0, 2, 9, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1485 = vector.extract %216[2, 7, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1486 = vector.insert %1485, %1484 [0, 2, 10, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1487 = vector.extract %216[2, 7, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1488 = vector.insert %1487, %1486 [0, 2, 11, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1489 = vector.extract %216[2, 7, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1490 = vector.insert %1489, %1488 [0, 2, 12, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1491 = vector.extract %216[2, 7, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1492 = vector.insert %1491, %1490 [0, 2, 13, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1493 = vector.extract %216[2, 7, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1494 = vector.insert %1493, %1492 [0, 2, 14, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1495 = vector.extract %216[2, 7, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1496 = vector.insert %1495, %1494 [0, 2, 15, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1497 = vector.extract %216[2, 8, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1498 = vector.insert %1497, %1496 [0, 2, 0, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1499 = vector.extract %216[2, 8, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1500 = vector.insert %1499, %1498 [0, 2, 1, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1501 = vector.extract %216[2, 8, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1502 = vector.insert %1501, %1500 [0, 2, 2, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1503 = vector.extract %216[2, 8, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1504 = vector.insert %1503, %1502 [0, 2, 3, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1505 = vector.extract %216[2, 8, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1506 = vector.insert %1505, %1504 [0, 2, 4, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1507 = vector.extract %216[2, 8, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1508 = vector.insert %1507, %1506 [0, 2, 5, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1509 = vector.extract %216[2, 8, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1510 = vector.insert %1509, %1508 [0, 2, 6, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1511 = vector.extract %216[2, 8, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1512 = vector.insert %1511, %1510 [0, 2, 7, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1513 = vector.extract %216[2, 8, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1514 = vector.insert %1513, %1512 [0, 2, 8, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1515 = vector.extract %216[2, 8, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1516 = vector.insert %1515, %1514 [0, 2, 9, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1517 = vector.extract %216[2, 8, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1518 = vector.insert %1517, %1516 [0, 2, 10, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1519 = vector.extract %216[2, 8, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1520 = vector.insert %1519, %1518 [0, 2, 11, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1521 = vector.extract %216[2, 8, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1522 = vector.insert %1521, %1520 [0, 2, 12, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1523 = vector.extract %216[2, 8, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1524 = vector.insert %1523, %1522 [0, 2, 13, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1525 = vector.extract %216[2, 8, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1526 = vector.insert %1525, %1524 [0, 2, 14, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1527 = vector.extract %216[2, 8, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1528 = vector.insert %1527, %1526 [0, 2, 15, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1529 = vector.extract %216[2, 9, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1530 = vector.insert %1529, %1528 [0, 2, 0, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1531 = vector.extract %216[2, 9, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1532 = vector.insert %1531, %1530 [0, 2, 1, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1533 = vector.extract %216[2, 9, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1534 = vector.insert %1533, %1532 [0, 2, 2, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1535 = vector.extract %216[2, 9, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1536 = vector.insert %1535, %1534 [0, 2, 3, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1537 = vector.extract %216[2, 9, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1538 = vector.insert %1537, %1536 [0, 2, 4, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1539 = vector.extract %216[2, 9, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1540 = vector.insert %1539, %1538 [0, 2, 5, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1541 = vector.extract %216[2, 9, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1542 = vector.insert %1541, %1540 [0, 2, 6, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1543 = vector.extract %216[2, 9, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1544 = vector.insert %1543, %1542 [0, 2, 7, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1545 = vector.extract %216[2, 9, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1546 = vector.insert %1545, %1544 [0, 2, 8, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1547 = vector.extract %216[2, 9, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1548 = vector.insert %1547, %1546 [0, 2, 9, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1549 = vector.extract %216[2, 9, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1550 = vector.insert %1549, %1548 [0, 2, 10, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1551 = vector.extract %216[2, 9, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1552 = vector.insert %1551, %1550 [0, 2, 11, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1553 = vector.extract %216[2, 9, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1554 = vector.insert %1553, %1552 [0, 2, 12, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1555 = vector.extract %216[2, 9, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1556 = vector.insert %1555, %1554 [0, 2, 13, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1557 = vector.extract %216[2, 9, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1558 = vector.insert %1557, %1556 [0, 2, 14, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1559 = vector.extract %216[2, 9, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1560 = vector.insert %1559, %1558 [0, 2, 15, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1561 = vector.extract %216[2, 10, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1562 = vector.insert %1561, %1560 [0, 2, 0, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1563 = vector.extract %216[2, 10, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1564 = vector.insert %1563, %1562 [0, 2, 1, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1565 = vector.extract %216[2, 10, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1566 = vector.insert %1565, %1564 [0, 2, 2, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1567 = vector.extract %216[2, 10, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1568 = vector.insert %1567, %1566 [0, 2, 3, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1569 = vector.extract %216[2, 10, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1570 = vector.insert %1569, %1568 [0, 2, 4, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1571 = vector.extract %216[2, 10, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1572 = vector.insert %1571, %1570 [0, 2, 5, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1573 = vector.extract %216[2, 10, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1574 = vector.insert %1573, %1572 [0, 2, 6, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1575 = vector.extract %216[2, 10, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1576 = vector.insert %1575, %1574 [0, 2, 7, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1577 = vector.extract %216[2, 10, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1578 = vector.insert %1577, %1576 [0, 2, 8, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1579 = vector.extract %216[2, 10, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1580 = vector.insert %1579, %1578 [0, 2, 9, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1581 = vector.extract %216[2, 10, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1582 = vector.insert %1581, %1580 [0, 2, 10, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1583 = vector.extract %216[2, 10, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1584 = vector.insert %1583, %1582 [0, 2, 11, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1585 = vector.extract %216[2, 10, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1586 = vector.insert %1585, %1584 [0, 2, 12, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1587 = vector.extract %216[2, 10, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1588 = vector.insert %1587, %1586 [0, 2, 13, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1589 = vector.extract %216[2, 10, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1590 = vector.insert %1589, %1588 [0, 2, 14, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1591 = vector.extract %216[2, 10, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1592 = vector.insert %1591, %1590 [0, 2, 15, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1593 = vector.extract %216[2, 11, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1594 = vector.insert %1593, %1592 [0, 2, 0, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1595 = vector.extract %216[2, 11, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1596 = vector.insert %1595, %1594 [0, 2, 1, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1597 = vector.extract %216[2, 11, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1598 = vector.insert %1597, %1596 [0, 2, 2, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1599 = vector.extract %216[2, 11, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1600 = vector.insert %1599, %1598 [0, 2, 3, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1601 = vector.extract %216[2, 11, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1602 = vector.insert %1601, %1600 [0, 2, 4, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1603 = vector.extract %216[2, 11, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1604 = vector.insert %1603, %1602 [0, 2, 5, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1605 = vector.extract %216[2, 11, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1606 = vector.insert %1605, %1604 [0, 2, 6, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1607 = vector.extract %216[2, 11, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1608 = vector.insert %1607, %1606 [0, 2, 7, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1609 = vector.extract %216[2, 11, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1610 = vector.insert %1609, %1608 [0, 2, 8, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1611 = vector.extract %216[2, 11, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1612 = vector.insert %1611, %1610 [0, 2, 9, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1613 = vector.extract %216[2, 11, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1614 = vector.insert %1613, %1612 [0, 2, 10, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1615 = vector.extract %216[2, 11, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1616 = vector.insert %1615, %1614 [0, 2, 11, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1617 = vector.extract %216[2, 11, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1618 = vector.insert %1617, %1616 [0, 2, 12, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1619 = vector.extract %216[2, 11, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1620 = vector.insert %1619, %1618 [0, 2, 13, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1621 = vector.extract %216[2, 11, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1622 = vector.insert %1621, %1620 [0, 2, 14, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1623 = vector.extract %216[2, 11, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1624 = vector.insert %1623, %1622 [0, 2, 15, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1625 = vector.extract %216[2, 12, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1626 = vector.insert %1625, %1624 [0, 2, 0, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1627 = vector.extract %216[2, 12, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1628 = vector.insert %1627, %1626 [0, 2, 1, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1629 = vector.extract %216[2, 12, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1630 = vector.insert %1629, %1628 [0, 2, 2, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1631 = vector.extract %216[2, 12, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1632 = vector.insert %1631, %1630 [0, 2, 3, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1633 = vector.extract %216[2, 12, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1634 = vector.insert %1633, %1632 [0, 2, 4, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1635 = vector.extract %216[2, 12, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1636 = vector.insert %1635, %1634 [0, 2, 5, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1637 = vector.extract %216[2, 12, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1638 = vector.insert %1637, %1636 [0, 2, 6, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1639 = vector.extract %216[2, 12, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1640 = vector.insert %1639, %1638 [0, 2, 7, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1641 = vector.extract %216[2, 12, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1642 = vector.insert %1641, %1640 [0, 2, 8, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1643 = vector.extract %216[2, 12, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1644 = vector.insert %1643, %1642 [0, 2, 9, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1645 = vector.extract %216[2, 12, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1646 = vector.insert %1645, %1644 [0, 2, 10, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1647 = vector.extract %216[2, 12, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1648 = vector.insert %1647, %1646 [0, 2, 11, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1649 = vector.extract %216[2, 12, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1650 = vector.insert %1649, %1648 [0, 2, 12, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1651 = vector.extract %216[2, 12, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1652 = vector.insert %1651, %1650 [0, 2, 13, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1653 = vector.extract %216[2, 12, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1654 = vector.insert %1653, %1652 [0, 2, 14, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1655 = vector.extract %216[2, 12, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1656 = vector.insert %1655, %1654 [0, 2, 15, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1657 = vector.extract %216[2, 13, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1658 = vector.insert %1657, %1656 [0, 2, 0, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1659 = vector.extract %216[2, 13, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1660 = vector.insert %1659, %1658 [0, 2, 1, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1661 = vector.extract %216[2, 13, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1662 = vector.insert %1661, %1660 [0, 2, 2, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1663 = vector.extract %216[2, 13, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1664 = vector.insert %1663, %1662 [0, 2, 3, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1665 = vector.extract %216[2, 13, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1666 = vector.insert %1665, %1664 [0, 2, 4, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1667 = vector.extract %216[2, 13, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1668 = vector.insert %1667, %1666 [0, 2, 5, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1669 = vector.extract %216[2, 13, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1670 = vector.insert %1669, %1668 [0, 2, 6, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1671 = vector.extract %216[2, 13, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1672 = vector.insert %1671, %1670 [0, 2, 7, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1673 = vector.extract %216[2, 13, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1674 = vector.insert %1673, %1672 [0, 2, 8, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1675 = vector.extract %216[2, 13, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1676 = vector.insert %1675, %1674 [0, 2, 9, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1677 = vector.extract %216[2, 13, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1678 = vector.insert %1677, %1676 [0, 2, 10, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1679 = vector.extract %216[2, 13, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1680 = vector.insert %1679, %1678 [0, 2, 11, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1681 = vector.extract %216[2, 13, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1682 = vector.insert %1681, %1680 [0, 2, 12, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1683 = vector.extract %216[2, 13, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1684 = vector.insert %1683, %1682 [0, 2, 13, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1685 = vector.extract %216[2, 13, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1686 = vector.insert %1685, %1684 [0, 2, 14, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1687 = vector.extract %216[2, 13, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1688 = vector.insert %1687, %1686 [0, 2, 15, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1689 = vector.extract %216[2, 14, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1690 = vector.insert %1689, %1688 [0, 2, 0, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1691 = vector.extract %216[2, 14, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1692 = vector.insert %1691, %1690 [0, 2, 1, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1693 = vector.extract %216[2, 14, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1694 = vector.insert %1693, %1692 [0, 2, 2, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1695 = vector.extract %216[2, 14, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1696 = vector.insert %1695, %1694 [0, 2, 3, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1697 = vector.extract %216[2, 14, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1698 = vector.insert %1697, %1696 [0, 2, 4, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1699 = vector.extract %216[2, 14, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1700 = vector.insert %1699, %1698 [0, 2, 5, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1701 = vector.extract %216[2, 14, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1702 = vector.insert %1701, %1700 [0, 2, 6, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1703 = vector.extract %216[2, 14, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1704 = vector.insert %1703, %1702 [0, 2, 7, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1705 = vector.extract %216[2, 14, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1706 = vector.insert %1705, %1704 [0, 2, 8, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1707 = vector.extract %216[2, 14, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1708 = vector.insert %1707, %1706 [0, 2, 9, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1709 = vector.extract %216[2, 14, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1710 = vector.insert %1709, %1708 [0, 2, 10, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1711 = vector.extract %216[2, 14, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1712 = vector.insert %1711, %1710 [0, 2, 11, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1713 = vector.extract %216[2, 14, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1714 = vector.insert %1713, %1712 [0, 2, 12, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1715 = vector.extract %216[2, 14, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1716 = vector.insert %1715, %1714 [0, 2, 13, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1717 = vector.extract %216[2, 14, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1718 = vector.insert %1717, %1716 [0, 2, 14, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1719 = vector.extract %216[2, 14, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1720 = vector.insert %1719, %1718 [0, 2, 15, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1721 = vector.extract %216[2, 15, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1722 = vector.insert %1721, %1720 [0, 2, 0, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1723 = vector.extract %216[2, 15, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1724 = vector.insert %1723, %1722 [0, 2, 1, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1725 = vector.extract %216[2, 15, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1726 = vector.insert %1725, %1724 [0, 2, 2, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1727 = vector.extract %216[2, 15, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1728 = vector.insert %1727, %1726 [0, 2, 3, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1729 = vector.extract %216[2, 15, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1730 = vector.insert %1729, %1728 [0, 2, 4, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1731 = vector.extract %216[2, 15, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1732 = vector.insert %1731, %1730 [0, 2, 5, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1733 = vector.extract %216[2, 15, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1734 = vector.insert %1733, %1732 [0, 2, 6, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1735 = vector.extract %216[2, 15, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1736 = vector.insert %1735, %1734 [0, 2, 7, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1737 = vector.extract %216[2, 15, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1738 = vector.insert %1737, %1736 [0, 2, 8, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1739 = vector.extract %216[2, 15, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1740 = vector.insert %1739, %1738 [0, 2, 9, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1741 = vector.extract %216[2, 15, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1742 = vector.insert %1741, %1740 [0, 2, 10, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1743 = vector.extract %216[2, 15, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1744 = vector.insert %1743, %1742 [0, 2, 11, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1745 = vector.extract %216[2, 15, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1746 = vector.insert %1745, %1744 [0, 2, 12, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1747 = vector.extract %216[2, 15, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1748 = vector.insert %1747, %1746 [0, 2, 13, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1749 = vector.extract %216[2, 15, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1750 = vector.insert %1749, %1748 [0, 2, 14, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1751 = vector.extract %216[2, 15, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1752 = vector.insert %1751, %1750 [0, 2, 15, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1753 = vector.extract %216[3, 0, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1754 = vector.insert %1753, %1752 [0, 3, 0, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1755 = vector.extract %216[3, 0, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1756 = vector.insert %1755, %1754 [0, 3, 1, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1757 = vector.extract %216[3, 0, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1758 = vector.insert %1757, %1756 [0, 3, 2, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1759 = vector.extract %216[3, 0, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1760 = vector.insert %1759, %1758 [0, 3, 3, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1761 = vector.extract %216[3, 0, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1762 = vector.insert %1761, %1760 [0, 3, 4, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1763 = vector.extract %216[3, 0, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1764 = vector.insert %1763, %1762 [0, 3, 5, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1765 = vector.extract %216[3, 0, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1766 = vector.insert %1765, %1764 [0, 3, 6, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1767 = vector.extract %216[3, 0, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1768 = vector.insert %1767, %1766 [0, 3, 7, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1769 = vector.extract %216[3, 0, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1770 = vector.insert %1769, %1768 [0, 3, 8, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1771 = vector.extract %216[3, 0, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1772 = vector.insert %1771, %1770 [0, 3, 9, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1773 = vector.extract %216[3, 0, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1774 = vector.insert %1773, %1772 [0, 3, 10, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1775 = vector.extract %216[3, 0, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1776 = vector.insert %1775, %1774 [0, 3, 11, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1777 = vector.extract %216[3, 0, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1778 = vector.insert %1777, %1776 [0, 3, 12, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1779 = vector.extract %216[3, 0, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1780 = vector.insert %1779, %1778 [0, 3, 13, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1781 = vector.extract %216[3, 0, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1782 = vector.insert %1781, %1780 [0, 3, 14, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1783 = vector.extract %216[3, 0, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1784 = vector.insert %1783, %1782 [0, 3, 15, 0] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1785 = vector.extract %216[3, 1, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1786 = vector.insert %1785, %1784 [0, 3, 0, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1787 = vector.extract %216[3, 1, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1788 = vector.insert %1787, %1786 [0, 3, 1, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1789 = vector.extract %216[3, 1, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1790 = vector.insert %1789, %1788 [0, 3, 2, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1791 = vector.extract %216[3, 1, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1792 = vector.insert %1791, %1790 [0, 3, 3, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1793 = vector.extract %216[3, 1, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1794 = vector.insert %1793, %1792 [0, 3, 4, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1795 = vector.extract %216[3, 1, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1796 = vector.insert %1795, %1794 [0, 3, 5, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1797 = vector.extract %216[3, 1, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1798 = vector.insert %1797, %1796 [0, 3, 6, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1799 = vector.extract %216[3, 1, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1800 = vector.insert %1799, %1798 [0, 3, 7, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1801 = vector.extract %216[3, 1, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1802 = vector.insert %1801, %1800 [0, 3, 8, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1803 = vector.extract %216[3, 1, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1804 = vector.insert %1803, %1802 [0, 3, 9, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1805 = vector.extract %216[3, 1, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1806 = vector.insert %1805, %1804 [0, 3, 10, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1807 = vector.extract %216[3, 1, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1808 = vector.insert %1807, %1806 [0, 3, 11, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1809 = vector.extract %216[3, 1, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1810 = vector.insert %1809, %1808 [0, 3, 12, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1811 = vector.extract %216[3, 1, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1812 = vector.insert %1811, %1810 [0, 3, 13, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1813 = vector.extract %216[3, 1, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1814 = vector.insert %1813, %1812 [0, 3, 14, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1815 = vector.extract %216[3, 1, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1816 = vector.insert %1815, %1814 [0, 3, 15, 1] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1817 = vector.extract %216[3, 2, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1818 = vector.insert %1817, %1816 [0, 3, 0, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1819 = vector.extract %216[3, 2, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1820 = vector.insert %1819, %1818 [0, 3, 1, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1821 = vector.extract %216[3, 2, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1822 = vector.insert %1821, %1820 [0, 3, 2, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1823 = vector.extract %216[3, 2, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1824 = vector.insert %1823, %1822 [0, 3, 3, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1825 = vector.extract %216[3, 2, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1826 = vector.insert %1825, %1824 [0, 3, 4, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1827 = vector.extract %216[3, 2, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1828 = vector.insert %1827, %1826 [0, 3, 5, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1829 = vector.extract %216[3, 2, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1830 = vector.insert %1829, %1828 [0, 3, 6, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1831 = vector.extract %216[3, 2, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1832 = vector.insert %1831, %1830 [0, 3, 7, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1833 = vector.extract %216[3, 2, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1834 = vector.insert %1833, %1832 [0, 3, 8, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1835 = vector.extract %216[3, 2, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1836 = vector.insert %1835, %1834 [0, 3, 9, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1837 = vector.extract %216[3, 2, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1838 = vector.insert %1837, %1836 [0, 3, 10, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1839 = vector.extract %216[3, 2, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1840 = vector.insert %1839, %1838 [0, 3, 11, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1841 = vector.extract %216[3, 2, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1842 = vector.insert %1841, %1840 [0, 3, 12, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1843 = vector.extract %216[3, 2, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1844 = vector.insert %1843, %1842 [0, 3, 13, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1845 = vector.extract %216[3, 2, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1846 = vector.insert %1845, %1844 [0, 3, 14, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1847 = vector.extract %216[3, 2, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1848 = vector.insert %1847, %1846 [0, 3, 15, 2] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1849 = vector.extract %216[3, 3, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1850 = vector.insert %1849, %1848 [0, 3, 0, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1851 = vector.extract %216[3, 3, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1852 = vector.insert %1851, %1850 [0, 3, 1, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1853 = vector.extract %216[3, 3, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1854 = vector.insert %1853, %1852 [0, 3, 2, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1855 = vector.extract %216[3, 3, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1856 = vector.insert %1855, %1854 [0, 3, 3, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1857 = vector.extract %216[3, 3, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1858 = vector.insert %1857, %1856 [0, 3, 4, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1859 = vector.extract %216[3, 3, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1860 = vector.insert %1859, %1858 [0, 3, 5, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1861 = vector.extract %216[3, 3, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1862 = vector.insert %1861, %1860 [0, 3, 6, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1863 = vector.extract %216[3, 3, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1864 = vector.insert %1863, %1862 [0, 3, 7, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1865 = vector.extract %216[3, 3, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1866 = vector.insert %1865, %1864 [0, 3, 8, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1867 = vector.extract %216[3, 3, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1868 = vector.insert %1867, %1866 [0, 3, 9, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1869 = vector.extract %216[3, 3, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1870 = vector.insert %1869, %1868 [0, 3, 10, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1871 = vector.extract %216[3, 3, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1872 = vector.insert %1871, %1870 [0, 3, 11, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1873 = vector.extract %216[3, 3, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1874 = vector.insert %1873, %1872 [0, 3, 12, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1875 = vector.extract %216[3, 3, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1876 = vector.insert %1875, %1874 [0, 3, 13, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1877 = vector.extract %216[3, 3, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1878 = vector.insert %1877, %1876 [0, 3, 14, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1879 = vector.extract %216[3, 3, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1880 = vector.insert %1879, %1878 [0, 3, 15, 3] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1881 = vector.extract %216[3, 4, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1882 = vector.insert %1881, %1880 [0, 3, 0, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1883 = vector.extract %216[3, 4, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1884 = vector.insert %1883, %1882 [0, 3, 1, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1885 = vector.extract %216[3, 4, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1886 = vector.insert %1885, %1884 [0, 3, 2, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1887 = vector.extract %216[3, 4, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1888 = vector.insert %1887, %1886 [0, 3, 3, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1889 = vector.extract %216[3, 4, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1890 = vector.insert %1889, %1888 [0, 3, 4, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1891 = vector.extract %216[3, 4, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1892 = vector.insert %1891, %1890 [0, 3, 5, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1893 = vector.extract %216[3, 4, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1894 = vector.insert %1893, %1892 [0, 3, 6, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1895 = vector.extract %216[3, 4, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1896 = vector.insert %1895, %1894 [0, 3, 7, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1897 = vector.extract %216[3, 4, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1898 = vector.insert %1897, %1896 [0, 3, 8, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1899 = vector.extract %216[3, 4, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1900 = vector.insert %1899, %1898 [0, 3, 9, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1901 = vector.extract %216[3, 4, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1902 = vector.insert %1901, %1900 [0, 3, 10, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1903 = vector.extract %216[3, 4, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1904 = vector.insert %1903, %1902 [0, 3, 11, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1905 = vector.extract %216[3, 4, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1906 = vector.insert %1905, %1904 [0, 3, 12, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1907 = vector.extract %216[3, 4, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1908 = vector.insert %1907, %1906 [0, 3, 13, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1909 = vector.extract %216[3, 4, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1910 = vector.insert %1909, %1908 [0, 3, 14, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1911 = vector.extract %216[3, 4, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1912 = vector.insert %1911, %1910 [0, 3, 15, 4] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1913 = vector.extract %216[3, 5, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1914 = vector.insert %1913, %1912 [0, 3, 0, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1915 = vector.extract %216[3, 5, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1916 = vector.insert %1915, %1914 [0, 3, 1, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1917 = vector.extract %216[3, 5, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1918 = vector.insert %1917, %1916 [0, 3, 2, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1919 = vector.extract %216[3, 5, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1920 = vector.insert %1919, %1918 [0, 3, 3, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1921 = vector.extract %216[3, 5, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1922 = vector.insert %1921, %1920 [0, 3, 4, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1923 = vector.extract %216[3, 5, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1924 = vector.insert %1923, %1922 [0, 3, 5, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1925 = vector.extract %216[3, 5, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1926 = vector.insert %1925, %1924 [0, 3, 6, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1927 = vector.extract %216[3, 5, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1928 = vector.insert %1927, %1926 [0, 3, 7, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1929 = vector.extract %216[3, 5, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1930 = vector.insert %1929, %1928 [0, 3, 8, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1931 = vector.extract %216[3, 5, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1932 = vector.insert %1931, %1930 [0, 3, 9, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1933 = vector.extract %216[3, 5, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1934 = vector.insert %1933, %1932 [0, 3, 10, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1935 = vector.extract %216[3, 5, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1936 = vector.insert %1935, %1934 [0, 3, 11, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1937 = vector.extract %216[3, 5, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1938 = vector.insert %1937, %1936 [0, 3, 12, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1939 = vector.extract %216[3, 5, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1940 = vector.insert %1939, %1938 [0, 3, 13, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1941 = vector.extract %216[3, 5, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1942 = vector.insert %1941, %1940 [0, 3, 14, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1943 = vector.extract %216[3, 5, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1944 = vector.insert %1943, %1942 [0, 3, 15, 5] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1945 = vector.extract %216[3, 6, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1946 = vector.insert %1945, %1944 [0, 3, 0, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1947 = vector.extract %216[3, 6, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1948 = vector.insert %1947, %1946 [0, 3, 1, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1949 = vector.extract %216[3, 6, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1950 = vector.insert %1949, %1948 [0, 3, 2, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1951 = vector.extract %216[3, 6, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1952 = vector.insert %1951, %1950 [0, 3, 3, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1953 = vector.extract %216[3, 6, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1954 = vector.insert %1953, %1952 [0, 3, 4, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1955 = vector.extract %216[3, 6, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1956 = vector.insert %1955, %1954 [0, 3, 5, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1957 = vector.extract %216[3, 6, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1958 = vector.insert %1957, %1956 [0, 3, 6, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1959 = vector.extract %216[3, 6, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1960 = vector.insert %1959, %1958 [0, 3, 7, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1961 = vector.extract %216[3, 6, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1962 = vector.insert %1961, %1960 [0, 3, 8, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1963 = vector.extract %216[3, 6, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1964 = vector.insert %1963, %1962 [0, 3, 9, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1965 = vector.extract %216[3, 6, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1966 = vector.insert %1965, %1964 [0, 3, 10, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1967 = vector.extract %216[3, 6, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1968 = vector.insert %1967, %1966 [0, 3, 11, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1969 = vector.extract %216[3, 6, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1970 = vector.insert %1969, %1968 [0, 3, 12, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1971 = vector.extract %216[3, 6, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1972 = vector.insert %1971, %1970 [0, 3, 13, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1973 = vector.extract %216[3, 6, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1974 = vector.insert %1973, %1972 [0, 3, 14, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1975 = vector.extract %216[3, 6, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1976 = vector.insert %1975, %1974 [0, 3, 15, 6] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1977 = vector.extract %216[3, 7, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1978 = vector.insert %1977, %1976 [0, 3, 0, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1979 = vector.extract %216[3, 7, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1980 = vector.insert %1979, %1978 [0, 3, 1, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1981 = vector.extract %216[3, 7, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1982 = vector.insert %1981, %1980 [0, 3, 2, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1983 = vector.extract %216[3, 7, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1984 = vector.insert %1983, %1982 [0, 3, 3, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1985 = vector.extract %216[3, 7, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1986 = vector.insert %1985, %1984 [0, 3, 4, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1987 = vector.extract %216[3, 7, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1988 = vector.insert %1987, %1986 [0, 3, 5, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1989 = vector.extract %216[3, 7, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1990 = vector.insert %1989, %1988 [0, 3, 6, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1991 = vector.extract %216[3, 7, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1992 = vector.insert %1991, %1990 [0, 3, 7, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1993 = vector.extract %216[3, 7, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1994 = vector.insert %1993, %1992 [0, 3, 8, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1995 = vector.extract %216[3, 7, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1996 = vector.insert %1995, %1994 [0, 3, 9, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1997 = vector.extract %216[3, 7, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%1998 = vector.insert %1997, %1996 [0, 3, 10, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%1999 = vector.extract %216[3, 7, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2000 = vector.insert %1999, %1998 [0, 3, 11, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2001 = vector.extract %216[3, 7, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2002 = vector.insert %2001, %2000 [0, 3, 12, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2003 = vector.extract %216[3, 7, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2004 = vector.insert %2003, %2002 [0, 3, 13, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2005 = vector.extract %216[3, 7, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2006 = vector.insert %2005, %2004 [0, 3, 14, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2007 = vector.extract %216[3, 7, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2008 = vector.insert %2007, %2006 [0, 3, 15, 7] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2009 = vector.extract %216[3, 8, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2010 = vector.insert %2009, %2008 [0, 3, 0, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2011 = vector.extract %216[3, 8, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2012 = vector.insert %2011, %2010 [0, 3, 1, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2013 = vector.extract %216[3, 8, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2014 = vector.insert %2013, %2012 [0, 3, 2, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2015 = vector.extract %216[3, 8, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2016 = vector.insert %2015, %2014 [0, 3, 3, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2017 = vector.extract %216[3, 8, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2018 = vector.insert %2017, %2016 [0, 3, 4, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2019 = vector.extract %216[3, 8, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2020 = vector.insert %2019, %2018 [0, 3, 5, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2021 = vector.extract %216[3, 8, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2022 = vector.insert %2021, %2020 [0, 3, 6, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2023 = vector.extract %216[3, 8, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2024 = vector.insert %2023, %2022 [0, 3, 7, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2025 = vector.extract %216[3, 8, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2026 = vector.insert %2025, %2024 [0, 3, 8, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2027 = vector.extract %216[3, 8, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2028 = vector.insert %2027, %2026 [0, 3, 9, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2029 = vector.extract %216[3, 8, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2030 = vector.insert %2029, %2028 [0, 3, 10, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2031 = vector.extract %216[3, 8, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2032 = vector.insert %2031, %2030 [0, 3, 11, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2033 = vector.extract %216[3, 8, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2034 = vector.insert %2033, %2032 [0, 3, 12, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2035 = vector.extract %216[3, 8, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2036 = vector.insert %2035, %2034 [0, 3, 13, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2037 = vector.extract %216[3, 8, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2038 = vector.insert %2037, %2036 [0, 3, 14, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2039 = vector.extract %216[3, 8, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2040 = vector.insert %2039, %2038 [0, 3, 15, 8] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2041 = vector.extract %216[3, 9, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2042 = vector.insert %2041, %2040 [0, 3, 0, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2043 = vector.extract %216[3, 9, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2044 = vector.insert %2043, %2042 [0, 3, 1, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2045 = vector.extract %216[3, 9, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2046 = vector.insert %2045, %2044 [0, 3, 2, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2047 = vector.extract %216[3, 9, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2048 = vector.insert %2047, %2046 [0, 3, 3, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2049 = vector.extract %216[3, 9, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2050 = vector.insert %2049, %2048 [0, 3, 4, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2051 = vector.extract %216[3, 9, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2052 = vector.insert %2051, %2050 [0, 3, 5, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2053 = vector.extract %216[3, 9, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2054 = vector.insert %2053, %2052 [0, 3, 6, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2055 = vector.extract %216[3, 9, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2056 = vector.insert %2055, %2054 [0, 3, 7, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2057 = vector.extract %216[3, 9, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2058 = vector.insert %2057, %2056 [0, 3, 8, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2059 = vector.extract %216[3, 9, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2060 = vector.insert %2059, %2058 [0, 3, 9, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2061 = vector.extract %216[3, 9, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2062 = vector.insert %2061, %2060 [0, 3, 10, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2063 = vector.extract %216[3, 9, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2064 = vector.insert %2063, %2062 [0, 3, 11, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2065 = vector.extract %216[3, 9, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2066 = vector.insert %2065, %2064 [0, 3, 12, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2067 = vector.extract %216[3, 9, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2068 = vector.insert %2067, %2066 [0, 3, 13, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2069 = vector.extract %216[3, 9, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2070 = vector.insert %2069, %2068 [0, 3, 14, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2071 = vector.extract %216[3, 9, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2072 = vector.insert %2071, %2070 [0, 3, 15, 9] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2073 = vector.extract %216[3, 10, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2074 = vector.insert %2073, %2072 [0, 3, 0, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2075 = vector.extract %216[3, 10, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2076 = vector.insert %2075, %2074 [0, 3, 1, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2077 = vector.extract %216[3, 10, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2078 = vector.insert %2077, %2076 [0, 3, 2, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2079 = vector.extract %216[3, 10, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2080 = vector.insert %2079, %2078 [0, 3, 3, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2081 = vector.extract %216[3, 10, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2082 = vector.insert %2081, %2080 [0, 3, 4, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2083 = vector.extract %216[3, 10, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2084 = vector.insert %2083, %2082 [0, 3, 5, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2085 = vector.extract %216[3, 10, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2086 = vector.insert %2085, %2084 [0, 3, 6, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2087 = vector.extract %216[3, 10, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2088 = vector.insert %2087, %2086 [0, 3, 7, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2089 = vector.extract %216[3, 10, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2090 = vector.insert %2089, %2088 [0, 3, 8, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2091 = vector.extract %216[3, 10, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2092 = vector.insert %2091, %2090 [0, 3, 9, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2093 = vector.extract %216[3, 10, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2094 = vector.insert %2093, %2092 [0, 3, 10, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2095 = vector.extract %216[3, 10, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2096 = vector.insert %2095, %2094 [0, 3, 11, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2097 = vector.extract %216[3, 10, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2098 = vector.insert %2097, %2096 [0, 3, 12, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2099 = vector.extract %216[3, 10, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2100 = vector.insert %2099, %2098 [0, 3, 13, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2101 = vector.extract %216[3, 10, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2102 = vector.insert %2101, %2100 [0, 3, 14, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2103 = vector.extract %216[3, 10, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2104 = vector.insert %2103, %2102 [0, 3, 15, 10] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2105 = vector.extract %216[3, 11, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2106 = vector.insert %2105, %2104 [0, 3, 0, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2107 = vector.extract %216[3, 11, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2108 = vector.insert %2107, %2106 [0, 3, 1, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2109 = vector.extract %216[3, 11, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2110 = vector.insert %2109, %2108 [0, 3, 2, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2111 = vector.extract %216[3, 11, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2112 = vector.insert %2111, %2110 [0, 3, 3, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2113 = vector.extract %216[3, 11, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2114 = vector.insert %2113, %2112 [0, 3, 4, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2115 = vector.extract %216[3, 11, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2116 = vector.insert %2115, %2114 [0, 3, 5, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2117 = vector.extract %216[3, 11, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2118 = vector.insert %2117, %2116 [0, 3, 6, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2119 = vector.extract %216[3, 11, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2120 = vector.insert %2119, %2118 [0, 3, 7, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2121 = vector.extract %216[3, 11, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2122 = vector.insert %2121, %2120 [0, 3, 8, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2123 = vector.extract %216[3, 11, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2124 = vector.insert %2123, %2122 [0, 3, 9, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2125 = vector.extract %216[3, 11, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2126 = vector.insert %2125, %2124 [0, 3, 10, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2127 = vector.extract %216[3, 11, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2128 = vector.insert %2127, %2126 [0, 3, 11, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2129 = vector.extract %216[3, 11, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2130 = vector.insert %2129, %2128 [0, 3, 12, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2131 = vector.extract %216[3, 11, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2132 = vector.insert %2131, %2130 [0, 3, 13, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2133 = vector.extract %216[3, 11, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2134 = vector.insert %2133, %2132 [0, 3, 14, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2135 = vector.extract %216[3, 11, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2136 = vector.insert %2135, %2134 [0, 3, 15, 11] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2137 = vector.extract %216[3, 12, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2138 = vector.insert %2137, %2136 [0, 3, 0, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2139 = vector.extract %216[3, 12, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2140 = vector.insert %2139, %2138 [0, 3, 1, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2141 = vector.extract %216[3, 12, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2142 = vector.insert %2141, %2140 [0, 3, 2, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2143 = vector.extract %216[3, 12, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2144 = vector.insert %2143, %2142 [0, 3, 3, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2145 = vector.extract %216[3, 12, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2146 = vector.insert %2145, %2144 [0, 3, 4, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2147 = vector.extract %216[3, 12, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2148 = vector.insert %2147, %2146 [0, 3, 5, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2149 = vector.extract %216[3, 12, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2150 = vector.insert %2149, %2148 [0, 3, 6, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2151 = vector.extract %216[3, 12, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2152 = vector.insert %2151, %2150 [0, 3, 7, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2153 = vector.extract %216[3, 12, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2154 = vector.insert %2153, %2152 [0, 3, 8, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2155 = vector.extract %216[3, 12, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2156 = vector.insert %2155, %2154 [0, 3, 9, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2157 = vector.extract %216[3, 12, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2158 = vector.insert %2157, %2156 [0, 3, 10, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2159 = vector.extract %216[3, 12, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2160 = vector.insert %2159, %2158 [0, 3, 11, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2161 = vector.extract %216[3, 12, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2162 = vector.insert %2161, %2160 [0, 3, 12, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2163 = vector.extract %216[3, 12, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2164 = vector.insert %2163, %2162 [0, 3, 13, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2165 = vector.extract %216[3, 12, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2166 = vector.insert %2165, %2164 [0, 3, 14, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2167 = vector.extract %216[3, 12, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2168 = vector.insert %2167, %2166 [0, 3, 15, 12] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2169 = vector.extract %216[3, 13, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2170 = vector.insert %2169, %2168 [0, 3, 0, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2171 = vector.extract %216[3, 13, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2172 = vector.insert %2171, %2170 [0, 3, 1, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2173 = vector.extract %216[3, 13, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2174 = vector.insert %2173, %2172 [0, 3, 2, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2175 = vector.extract %216[3, 13, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2176 = vector.insert %2175, %2174 [0, 3, 3, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2177 = vector.extract %216[3, 13, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2178 = vector.insert %2177, %2176 [0, 3, 4, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2179 = vector.extract %216[3, 13, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2180 = vector.insert %2179, %2178 [0, 3, 5, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2181 = vector.extract %216[3, 13, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2182 = vector.insert %2181, %2180 [0, 3, 6, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2183 = vector.extract %216[3, 13, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2184 = vector.insert %2183, %2182 [0, 3, 7, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2185 = vector.extract %216[3, 13, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2186 = vector.insert %2185, %2184 [0, 3, 8, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2187 = vector.extract %216[3, 13, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2188 = vector.insert %2187, %2186 [0, 3, 9, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2189 = vector.extract %216[3, 13, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2190 = vector.insert %2189, %2188 [0, 3, 10, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2191 = vector.extract %216[3, 13, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2192 = vector.insert %2191, %2190 [0, 3, 11, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2193 = vector.extract %216[3, 13, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2194 = vector.insert %2193, %2192 [0, 3, 12, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2195 = vector.extract %216[3, 13, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2196 = vector.insert %2195, %2194 [0, 3, 13, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2197 = vector.extract %216[3, 13, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2198 = vector.insert %2197, %2196 [0, 3, 14, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2199 = vector.extract %216[3, 13, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2200 = vector.insert %2199, %2198 [0, 3, 15, 13] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2201 = vector.extract %216[3, 14, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2202 = vector.insert %2201, %2200 [0, 3, 0, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2203 = vector.extract %216[3, 14, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2204 = vector.insert %2203, %2202 [0, 3, 1, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2205 = vector.extract %216[3, 14, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2206 = vector.insert %2205, %2204 [0, 3, 2, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2207 = vector.extract %216[3, 14, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2208 = vector.insert %2207, %2206 [0, 3, 3, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2209 = vector.extract %216[3, 14, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2210 = vector.insert %2209, %2208 [0, 3, 4, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2211 = vector.extract %216[3, 14, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2212 = vector.insert %2211, %2210 [0, 3, 5, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2213 = vector.extract %216[3, 14, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2214 = vector.insert %2213, %2212 [0, 3, 6, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2215 = vector.extract %216[3, 14, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2216 = vector.insert %2215, %2214 [0, 3, 7, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2217 = vector.extract %216[3, 14, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2218 = vector.insert %2217, %2216 [0, 3, 8, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2219 = vector.extract %216[3, 14, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2220 = vector.insert %2219, %2218 [0, 3, 9, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2221 = vector.extract %216[3, 14, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2222 = vector.insert %2221, %2220 [0, 3, 10, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2223 = vector.extract %216[3, 14, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2224 = vector.insert %2223, %2222 [0, 3, 11, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2225 = vector.extract %216[3, 14, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2226 = vector.insert %2225, %2224 [0, 3, 12, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2227 = vector.extract %216[3, 14, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2228 = vector.insert %2227, %2226 [0, 3, 13, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2229 = vector.extract %216[3, 14, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2230 = vector.insert %2229, %2228 [0, 3, 14, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2231 = vector.extract %216[3, 14, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2232 = vector.insert %2231, %2230 [0, 3, 15, 14] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2233 = vector.extract %216[3, 15, 0] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2234 = vector.insert %2233, %2232 [0, 3, 0, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2235 = vector.extract %216[3, 15, 1] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2236 = vector.insert %2235, %2234 [0, 3, 1, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2237 = vector.extract %216[3, 15, 2] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2238 = vector.insert %2237, %2236 [0, 3, 2, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2239 = vector.extract %216[3, 15, 3] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2240 = vector.insert %2239, %2238 [0, 3, 3, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2241 = vector.extract %216[3, 15, 4] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2242 = vector.insert %2241, %2240 [0, 3, 4, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2243 = vector.extract %216[3, 15, 5] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2244 = vector.insert %2243, %2242 [0, 3, 5, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2245 = vector.extract %216[3, 15, 6] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2246 = vector.insert %2245, %2244 [0, 3, 6, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2247 = vector.extract %216[3, 15, 7] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2248 = vector.insert %2247, %2246 [0, 3, 7, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2249 = vector.extract %216[3, 15, 8] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2250 = vector.insert %2249, %2248 [0, 3, 8, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2251 = vector.extract %216[3, 15, 9] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2252 = vector.insert %2251, %2250 [0, 3, 9, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2253 = vector.extract %216[3, 15, 10] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2254 = vector.insert %2253, %2252 [0, 3, 10, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2255 = vector.extract %216[3, 15, 11] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2256 = vector.insert %2255, %2254 [0, 3, 11, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2257 = vector.extract %216[3, 15, 12] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2258 = vector.insert %2257, %2256 [0, 3, 12, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2259 = vector.extract %216[3, 15, 13] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2260 = vector.insert %2259, %2258 [0, 3, 13, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2261 = vector.extract %216[3, 15, 14] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2262 = vector.insert %2261, %2260 [0, 3, 14, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2263 = vector.extract %216[3, 15, 15] : vector<1xf16> from vector<4x16x16x1xf16> | |
%2264 = vector.insert %2263, %2262 [0, 3, 15, 15] : vector<1xf16> into vector<1x4x16x16x1xf16> | |
%2265 = vector.extract %2264[0] : vector<4x16x16x1xf16> from vector<1x4x16x16x1xf16> | |
%subview_5 = memref.subview %subview[0, 0, 0, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%2266 = vector.shape_cast %2265 : vector<4x16x16x1xf16> to vector<4x16x16xf16> | |
%2267 = vector.extract %2266[0, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2267, %subview_5[%arg3, %c0, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2268 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%2269 = vector.extract %2266[0, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2269, %subview_5[%arg3, %c0, %2268, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2270 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%2271 = vector.extract %2266[0, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2271, %subview_5[%arg3, %c0, %2270, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2272 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%2273 = vector.extract %2266[0, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2273, %subview_5[%arg3, %c0, %2272, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2274 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%2275 = vector.extract %2266[0, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2275, %subview_5[%arg3, %c0, %2274, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2276 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%2277 = vector.extract %2266[0, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2277, %subview_5[%arg3, %c0, %2276, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2278 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%2279 = vector.extract %2266[0, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2279, %subview_5[%arg3, %c0, %2278, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2280 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%2281 = vector.extract %2266[0, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2281, %subview_5[%arg3, %c0, %2280, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2282 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%2283 = vector.extract %2266[0, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2283, %subview_5[%arg3, %c0, %2282, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2284 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%2285 = vector.extract %2266[0, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2285, %subview_5[%arg3, %c0, %2284, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2286 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%2287 = vector.extract %2266[0, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2287, %subview_5[%arg3, %c0, %2286, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2288 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%2289 = vector.extract %2266[0, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2289, %subview_5[%arg3, %c0, %2288, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2290 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%2291 = vector.extract %2266[0, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2291, %subview_5[%arg3, %c0, %2290, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2292 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%2293 = vector.extract %2266[0, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2293, %subview_5[%arg3, %c0, %2292, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2294 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%2295 = vector.extract %2266[0, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2295, %subview_5[%arg3, %c0, %2294, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2296 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%2297 = vector.extract %2266[0, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2297, %subview_5[%arg3, %c0, %2296, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2298 = vector.extract %2266[1, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2298, %subview_5[%arg3, %c1, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2299 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%2300 = vector.extract %2266[1, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2300, %subview_5[%arg3, %c1, %2299, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2301 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%2302 = vector.extract %2266[1, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2302, %subview_5[%arg3, %c1, %2301, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2303 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%2304 = vector.extract %2266[1, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2304, %subview_5[%arg3, %c1, %2303, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2305 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%2306 = vector.extract %2266[1, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2306, %subview_5[%arg3, %c1, %2305, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2307 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%2308 = vector.extract %2266[1, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2308, %subview_5[%arg3, %c1, %2307, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2309 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%2310 = vector.extract %2266[1, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2310, %subview_5[%arg3, %c1, %2309, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2311 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%2312 = vector.extract %2266[1, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2312, %subview_5[%arg3, %c1, %2311, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2313 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%2314 = vector.extract %2266[1, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2314, %subview_5[%arg3, %c1, %2313, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2315 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%2316 = vector.extract %2266[1, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2316, %subview_5[%arg3, %c1, %2315, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2317 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%2318 = vector.extract %2266[1, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2318, %subview_5[%arg3, %c1, %2317, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2319 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%2320 = vector.extract %2266[1, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2320, %subview_5[%arg3, %c1, %2319, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2321 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%2322 = vector.extract %2266[1, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2322, %subview_5[%arg3, %c1, %2321, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2323 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%2324 = vector.extract %2266[1, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2324, %subview_5[%arg3, %c1, %2323, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2325 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%2326 = vector.extract %2266[1, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2326, %subview_5[%arg3, %c1, %2325, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2327 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%2328 = vector.extract %2266[1, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2328, %subview_5[%arg3, %c1, %2327, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2329 = vector.extract %2266[2, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2329, %subview_5[%arg3, %c2, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2330 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%2331 = vector.extract %2266[2, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2331, %subview_5[%arg3, %c2, %2330, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2332 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%2333 = vector.extract %2266[2, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2333, %subview_5[%arg3, %c2, %2332, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2334 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%2335 = vector.extract %2266[2, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2335, %subview_5[%arg3, %c2, %2334, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2336 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%2337 = vector.extract %2266[2, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2337, %subview_5[%arg3, %c2, %2336, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2338 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%2339 = vector.extract %2266[2, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2339, %subview_5[%arg3, %c2, %2338, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2340 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%2341 = vector.extract %2266[2, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2341, %subview_5[%arg3, %c2, %2340, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2342 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%2343 = vector.extract %2266[2, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2343, %subview_5[%arg3, %c2, %2342, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2344 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%2345 = vector.extract %2266[2, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2345, %subview_5[%arg3, %c2, %2344, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2346 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%2347 = vector.extract %2266[2, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2347, %subview_5[%arg3, %c2, %2346, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2348 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%2349 = vector.extract %2266[2, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2349, %subview_5[%arg3, %c2, %2348, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2350 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%2351 = vector.extract %2266[2, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2351, %subview_5[%arg3, %c2, %2350, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2352 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%2353 = vector.extract %2266[2, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2353, %subview_5[%arg3, %c2, %2352, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2354 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%2355 = vector.extract %2266[2, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2355, %subview_5[%arg3, %c2, %2354, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2356 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%2357 = vector.extract %2266[2, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2357, %subview_5[%arg3, %c2, %2356, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2358 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%2359 = vector.extract %2266[2, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2359, %subview_5[%arg3, %c2, %2358, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2360 = vector.extract %2266[3, 0] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2360, %subview_5[%arg3, %c3, %arg4, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2361 = affine.apply affine_map<(d0) -> (d0 + 1)>(%arg4) | |
%2362 = vector.extract %2266[3, 1] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2362, %subview_5[%arg3, %c3, %2361, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2363 = affine.apply affine_map<(d0) -> (d0 + 2)>(%arg4) | |
%2364 = vector.extract %2266[3, 2] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2364, %subview_5[%arg3, %c3, %2363, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2365 = affine.apply affine_map<(d0) -> (d0 + 3)>(%arg4) | |
%2366 = vector.extract %2266[3, 3] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2366, %subview_5[%arg3, %c3, %2365, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2367 = affine.apply affine_map<(d0) -> (d0 + 4)>(%arg4) | |
%2368 = vector.extract %2266[3, 4] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2368, %subview_5[%arg3, %c3, %2367, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2369 = affine.apply affine_map<(d0) -> (d0 + 5)>(%arg4) | |
%2370 = vector.extract %2266[3, 5] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2370, %subview_5[%arg3, %c3, %2369, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2371 = affine.apply affine_map<(d0) -> (d0 + 6)>(%arg4) | |
%2372 = vector.extract %2266[3, 6] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2372, %subview_5[%arg3, %c3, %2371, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2373 = affine.apply affine_map<(d0) -> (d0 + 7)>(%arg4) | |
%2374 = vector.extract %2266[3, 7] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2374, %subview_5[%arg3, %c3, %2373, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2375 = affine.apply affine_map<(d0) -> (d0 + 8)>(%arg4) | |
%2376 = vector.extract %2266[3, 8] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2376, %subview_5[%arg3, %c3, %2375, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2377 = affine.apply affine_map<(d0) -> (d0 + 9)>(%arg4) | |
%2378 = vector.extract %2266[3, 9] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2378, %subview_5[%arg3, %c3, %2377, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2379 = affine.apply affine_map<(d0) -> (d0 + 10)>(%arg4) | |
%2380 = vector.extract %2266[3, 10] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2380, %subview_5[%arg3, %c3, %2379, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2381 = affine.apply affine_map<(d0) -> (d0 + 11)>(%arg4) | |
%2382 = vector.extract %2266[3, 11] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2382, %subview_5[%arg3, %c3, %2381, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2383 = affine.apply affine_map<(d0) -> (d0 + 12)>(%arg4) | |
%2384 = vector.extract %2266[3, 12] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2384, %subview_5[%arg3, %c3, %2383, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2385 = affine.apply affine_map<(d0) -> (d0 + 13)>(%arg4) | |
%2386 = vector.extract %2266[3, 13] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2386, %subview_5[%arg3, %c3, %2385, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2387 = affine.apply affine_map<(d0) -> (d0 + 14)>(%arg4) | |
%2388 = vector.extract %2266[3, 14] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2388, %subview_5[%arg3, %c3, %2387, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%2389 = affine.apply affine_map<(d0) -> (d0 + 15)>(%arg4) | |
%2390 = vector.extract %2266[3, 15] : vector<16xf16> from vector<4x16x16xf16> | |
vector.store %2390, %subview_5[%arg3, %c3, %2389, %c0] : memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
} | |
} | |
} | |
} | |
} | |
return | |
} | |
// -----// IR Dump After LLVMCPUVectorShapeCastLowering (iree-llvmcpu-vector-shape-cast-lowering) //----- // | |
func.func @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_broadcast_Dx8640x3200_f16_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%cst = arith.constant dense<0.000000e+00> : vector<4x16x16xf16> | |
%c63 = arith.constant 63 : index | |
%c62 = arith.constant 62 : index | |
%c61 = arith.constant 61 : index | |
%c60 = arith.constant 60 : index | |
%c59 = arith.constant 59 : index | |
%c58 = arith.constant 58 : index | |
%c57 = arith.constant 57 : index | |
%c56 = arith.constant 56 : index | |
%c55 = arith.constant 55 : index | |
%c54 = arith.constant 54 : index | |
%c53 = arith.constant 53 : index | |
%c52 = arith.constant 52 : index | |
%c51 = arith.constant 51 : index | |
%c50 = arith.constant 50 : index | |
%c49 = arith.constant 49 : index | |
%c48 = arith.constant 48 : index | |
%c47 = arith.constant 47 : index | |
%c46 = arith.constant 46 : index | |
%c45 = arith.constant 45 : index | |
%c44 = arith.constant 44 : index | |
%c43 = arith.constant 43 : index | |
%c42 = arith.constant 42 : index | |
%c41 = arith.constant 41 : index | |
%c40 = arith.constant 40 : index | |
%c39 = arith.constant 39 : index | |
%c38 = arith.constant 38 : index | |
%c37 = arith.constant 37 : index | |
%c36 = arith.constant 36 : index | |
%c35 = arith.constant 35 : index | |
%c34 = arith.constant 34 : index | |
%c33 = arith.constant 33 : index | |
%c32 = arith.constant 32 : index | |
%c31 = arith.constant 31 : index | |
%c30 = arith.constant 30 : index | |
%c29 = arith.constant 29 : index | |
%c28 = arith.constant 28 : index | |
%c27 = arith.constant 27 : index | |
%c26 = arith.constant 26 : index | |
%c25 = arith.constant 25 : index | |
%c24 = arith.constant 24 : index | |
%c23 = arith.constant 23 : index | |
%c22 = arith.constant 22 : index | |
%c21 = arith.constant 21 : index | |
%c20 = arith.constant 20 : index | |
%c19 = arith.constant 19 : index | |
%c18 = arith.constant 18 : index | |
%c17 = arith.constant 17 : index | |
%c15 = arith.constant 15 : index | |
%c14 = arith.constant 14 : index | |
%c13 = arith.constant 13 : index | |
%c12 = arith.constant 12 : index | |
%c11 = arith.constant 11 : index | |
%c10 = arith.constant 10 : index | |
%c9 = arith.constant 9 : index | |
%c8 = arith.constant 8 : index | |
%c7 = arith.constant 7 : index | |
%c6 = arith.constant 6 : index | |
%c5 = arith.constant 5 : index | |
%c4 = arith.constant 4 : index | |
%c3 = arith.constant 3 : index | |
%c2 = arith.constant 2 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%c0 = arith.constant 0 : index | |
%c540 = arith.constant 540 : index | |
%c3200 = arith.constant 3200 : index | |
%c1 = arith.constant 1 : index | |
%c64 = arith.constant 64 : index | |
%c16 = arith.constant 16 : index | |
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x64x16xf16> | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = hal.interface.constant.load[1] : i32 | |
%2 = hal.interface.constant.load[2] : i32 | |
%3 = hal.interface.constant.load[3] : i32 | |
%4 = arith.extui %0 : i32 to i64 | |
%5 = arith.extui %1 : i32 to i64 | |
%6 = arith.shli %5, %c32_i64 : i64 | |
%7 = arith.ori %4, %6 : i64 | |
%8 = arith.index_castui %7 : i64 to index | |
%9 = arith.extui %2 : i32 to i64 | |
%10 = arith.extui %3 : i32 to i64 | |
%11 = arith.shli %10, %c32_i64 : i64 | |
%12 = arith.ori %9, %11 : i64 | |
%13 = arith.index_castui %12 : i64 to index | |
%14 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
memref.assume_alignment %14, 64 : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> | |
%15 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%13} | |
memref.assume_alignment %15, 1 : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%workgroup_id_y = hal.interface.workgroup.id[1] : index | |
%workgroup_count_y = hal.interface.workgroup.count[1] : index | |
%workgroup_id_z = hal.interface.workgroup.id[2] : index | |
%workgroup_count_z = hal.interface.workgroup.count[2] : index | |
%16 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_z] | |
%17 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_z] | |
%18 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_y] | |
%19 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_y] | |
%20 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x] | |
%21 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_count_x] | |
scf.for %arg0 = %16 to %13 step %17 { | |
%22 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 64)>(%arg0)[%13] | |
scf.for %arg1 = %18 to %c540 step %19 { | |
%23 = affine.apply affine_map<(d0) -> (d0 * 16)>(%arg1) | |
scf.for %arg2 = %20 to %c3200 step %21 { | |
%subview = memref.subview %15[%arg0, %arg1, %arg2, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x540x3200x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%subview_0 = memref.subview %14[%23, %arg2] [64, 64] [1, 1] : memref<8640x3200xf16, #hal.descriptor_type<storage_buffer>> to memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
scf.for %arg3 = %c0 to %22 step %c1 { | |
scf.for %arg4 = %c0 to %c64 step %c16 { | |
%24 = vector.load %subview_0[%c0, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%25 = vector.load %subview_0[%c1, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%26 = vector.load %subview_0[%c2, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%27 = vector.load %subview_0[%c3, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%28 = vector.load %subview_0[%c4, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%29 = vector.load %subview_0[%c5, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%30 = vector.load %subview_0[%c6, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%31 = vector.load %subview_0[%c7, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%32 = vector.load %subview_0[%c8, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%33 = vector.load %subview_0[%c9, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%34 = vector.load %subview_0[%c10, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%35 = vector.load %subview_0[%c11, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%36 = vector.load %subview_0[%c12, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%37 = vector.load %subview_0[%c13, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%38 = vector.load %subview_0[%c14, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%39 = vector.load %subview_0[%c15, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%40 = vector.load %subview_0[%c16, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%41 = vector.load %subview_0[%c17, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%42 = vector.load %subview_0[%c18, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%43 = vector.load %subview_0[%c19, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%44 = vector.load %subview_0[%c20, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%45 = vector.load %subview_0[%c21, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%46 = vector.load %subview_0[%c22, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%47 = vector.load %subview_0[%c23, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%48 = vector.load %subview_0[%c24, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%49 = vector.load %subview_0[%c25, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%50 = vector.load %subview_0[%c26, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%51 = vector.load %subview_0[%c27, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%52 = vector.load %subview_0[%c28, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%53 = vector.load %subview_0[%c29, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%54 = vector.load %subview_0[%c30, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%55 = vector.load %subview_0[%c31, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%56 = vector.load %subview_0[%c32, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%57 = vector.load %subview_0[%c33, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%58 = vector.load %subview_0[%c34, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%59 = vector.load %subview_0[%c35, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%60 = vector.load %subview_0[%c36, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%61 = vector.load %subview_0[%c37, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%62 = vector.load %subview_0[%c38, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%63 = vector.load %subview_0[%c39, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%64 = vector.load %subview_0[%c40, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%65 = vector.load %subview_0[%c41, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%66 = vector.load %subview_0[%c42, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%67 = vector.load %subview_0[%c43, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%68 = vector.load %subview_0[%c44, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%69 = vector.load %subview_0[%c45, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%70 = vector.load %subview_0[%c46, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%71 = vector.load %subview_0[%c47, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%72 = vector.load %subview_0[%c48, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%73 = vector.load %subview_0[%c49, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%74 = vector.load %subview_0[%c50, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%75 = vector.load %subview_0[%c51, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%76 = vector.load %subview_0[%c52, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%77 = vector.load %subview_0[%c53, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%78 = vector.load %subview_0[%c54, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%79 = vector.load %subview_0[%c55, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%80 = vector.load %subview_0[%c56, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%81 = vector.load %subview_0[%c57, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%82 = vector.load %subview_0[%c58, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%83 = vector.load %subview_0[%c59, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%84 = vector.load %subview_0[%c60, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%85 = vector.load %subview_0[%c61, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%86 = vector.load %subview_0[%c62, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%87 = vector.load %subview_0[%c63, %arg4] : memref<64x64xf16, strided<[3200, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>, vector<16xf16> | |
%subview_1 = memref.subview %alloca[0, 0, 0] [1, 64, 16] [1, 1, 1] : memref<1x64x16xf16> to memref<64x16xf16> | |
vector.store %24, %subview_1[%c0, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %25, %subview_1[%c1, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %26, %subview_1[%c2, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %27, %subview_1[%c3, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %28, %subview_1[%c4, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %29, %subview_1[%c5, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %30, %subview_1[%c6, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %31, %subview_1[%c7, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %32, %subview_1[%c8, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %33, %subview_1[%c9, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %34, %subview_1[%c10, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %35, %subview_1[%c11, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %36, %subview_1[%c12, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %37, %subview_1[%c13, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %38, %subview_1[%c14, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %39, %subview_1[%c15, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %40, %subview_1[%c16, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %41, %subview_1[%c17, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %42, %subview_1[%c18, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %43, %subview_1[%c19, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %44, %subview_1[%c20, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %45, %subview_1[%c21, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %46, %subview_1[%c22, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %47, %subview_1[%c23, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %48, %subview_1[%c24, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %49, %subview_1[%c25, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %50, %subview_1[%c26, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %51, %subview_1[%c27, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %52, %subview_1[%c28, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %53, %subview_1[%c29, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %54, %subview_1[%c30, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %55, %subview_1[%c31, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %56, %subview_1[%c32, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %57, %subview_1[%c33, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %58, %subview_1[%c34, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %59, %subview_1[%c35, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %60, %subview_1[%c36, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %61, %subview_1[%c37, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %62, %subview_1[%c38, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %63, %subview_1[%c39, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %64, %subview_1[%c40, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %65, %subview_1[%c41, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %66, %subview_1[%c42, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %67, %subview_1[%c43, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %68, %subview_1[%c44, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %69, %subview_1[%c45, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %70, %subview_1[%c46, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %71, %subview_1[%c47, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %72, %subview_1[%c48, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %73, %subview_1[%c49, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %74, %subview_1[%c50, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %75, %subview_1[%c51, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %76, %subview_1[%c52, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %77, %subview_1[%c53, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %78, %subview_1[%c54, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %79, %subview_1[%c55, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %80, %subview_1[%c56, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %81, %subview_1[%c57, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %82, %subview_1[%c58, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %83, %subview_1[%c59, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %84, %subview_1[%c60, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %85, %subview_1[%c61, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %86, %subview_1[%c62, %c0] : memref<64x16xf16>, vector<16xf16> | |
vector.store %87, %subview_1[%c63, %c0] : memref<64x16xf16>, vector<16xf16> | |
%expand_shape = memref.expand_shape %alloca [[0], [1, 2], [3, 4]] : memref<1x64x16xf16> into memref<1x4x16x16x1xf16> | |
%subview_2 = memref.subview %expand_shape[0, 0, 0, 0, 0] [1, 4, 16, 16, 1] [1, 1, 1, 1, 1] : memref<1x4x16x16x1xf16> to memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> | |
%subview_3 = memref.subview %subview_2[0, 0, 0, 0] [1, 4, 16, 16] [1, 1, 1, 1] : memref<1x4x16x16xf16, strided<[1024, 256, 16, 1]>> to memref<4x16x16xf16> | |
%88 = vector.load %subview_3[%c0, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%89 = vector.load %subview_3[%c0, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%90 = vector.load %subview_3[%c0, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%91 = vector.load %subview_3[%c0, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%92 = vector.load %subview_3[%c0, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%93 = vector.load %subview_3[%c0, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%94 = vector.load %subview_3[%c0, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%95 = vector.load %subview_3[%c0, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%96 = vector.load %subview_3[%c0, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%97 = vector.load %subview_3[%c0, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%98 = vector.load %subview_3[%c0, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%99 = vector.load %subview_3[%c0, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%100 = vector.load %subview_3[%c0, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%101 = vector.load %subview_3[%c0, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%102 = vector.load %subview_3[%c0, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%103 = vector.load %subview_3[%c0, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%104 = vector.load %subview_3[%c1, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%105 = vector.load %subview_3[%c1, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%106 = vector.load %subview_3[%c1, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%107 = vector.load %subview_3[%c1, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%108 = vector.load %subview_3[%c1, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%109 = vector.load %subview_3[%c1, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%110 = vector.load %subview_3[%c1, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%111 = vector.load %subview_3[%c1, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%112 = vector.load %subview_3[%c1, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%113 = vector.load %subview_3[%c1, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%114 = vector.load %subview_3[%c1, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%115 = vector.load %subview_3[%c1, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%116 = vector.load %subview_3[%c1, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%117 = vector.load %subview_3[%c1, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%118 = vector.load %subview_3[%c1, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%119 = vector.load %subview_3[%c1, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%120 = vector.load %subview_3[%c2, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%121 = vector.load %subview_3[%c2, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%122 = vector.load %subview_3[%c2, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%123 = vector.load %subview_3[%c2, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%124 = vector.load %subview_3[%c2, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%125 = vector.load %subview_3[%c2, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%126 = vector.load %subview_3[%c2, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%127 = vector.load %subview_3[%c2, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%128 = vector.load %subview_3[%c2, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%129 = vector.load %subview_3[%c2, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%130 = vector.load %subview_3[%c2, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%131 = vector.load %subview_3[%c2, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%132 = vector.load %subview_3[%c2, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%133 = vector.load %subview_3[%c2, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%134 = vector.load %subview_3[%c2, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%135 = vector.load %subview_3[%c2, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%136 = vector.load %subview_3[%c3, %c0, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%137 = vector.load %subview_3[%c3, %c1, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%138 = vector.load %subview_3[%c3, %c2, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%139 = vector.load %subview_3[%c3, %c3, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%140 = vector.load %subview_3[%c3, %c4, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%141 = vector.load %subview_3[%c3, %c5, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%142 = vector.load %subview_3[%c3, %c6, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%143 = vector.load %subview_3[%c3, %c7, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%144 = vector.load %subview_3[%c3, %c8, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%145 = vector.load %subview_3[%c3, %c9, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%146 = vector.load %subview_3[%c3, %c10, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%147 = vector.load %subview_3[%c3, %c11, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%148 = vector.load %subview_3[%c3, %c12, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%149 = vector.load %subview_3[%c3, %c13, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%150 = vector.load %subview_3[%c3, %c14, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%151 = vector.load %subview_3[%c3, %c15, %c0] : memref<4x16x16xf16>, vector<16xf16> | |
%subview_4 = memref.subview %subview[0, 0, 0, 0, 0] [%22, 4, 64, 16, 1] [1, 1, 1, 1, 1] : memref<?x4x64x16x1xf16, strided<[27648000, 51200, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<?x4x64x16xf16, strided<[27648000, 51200, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> | |
%152 = vector.extract %88[0] : f16 from vector<16xf16> | |
%153 = vector.insert %152, %cst [0, 0, 0] : f16 into vector<4x16x16xf16> | |
%154 = vector.extract %89[0] : f16 from vector<16xf16> | |
%155 = vector.insert %154, %153 [0, 0, 1] : f16 into vector<4x16x16xf16> | |
%156 = vector.extract %90[0] : f16 from vector<16xf16> | |
%157 = vector.insert %156, %155 [0, 0, 2] : f16 into vector<4x16x16xf16> | |
%158 = vector.extract %91[0] : f16 from vector<16xf16> | |
%159 = vector.insert %158, %157 [0, 0, 3] : f16 into vector<4x16x16xf16> | |
%160 = vector.extract %92[0] : f16 from vector<16xf16> | |
%161 = vector.insert %160, %159 [0, 0, 4] : f16 into vector<4x16x16xf16> | |
%162 = vector.extract %93[0] : f16 from vector<16xf16> | |
%163 = vector.insert %162, %161 [0, 0, 5] : f16 into vector<4x16x16xf16> | |
%164 = vector.extract %94[0] : f16 from vector<16xf16> | |
%165 = vector.insert %164, %163 [0, 0, 6] : f16 into vector<4x16x16xf16> | |
%166 = vector.extract %95[0] : f16 from vector<16xf16> | |
%167 = vector.insert %166, %165 [0, 0, 7] : f16 into vector<4x16x16xf16> | |
%168 = vector.extract %96[0] : f16 from vector<16xf16> | |
%169 = vector.insert %168, %167 [0, 0, 8] : f16 into vector<4x16x16xf16> | |
%170 = vector.extract %97[0] : f16 from vector<16xf16> | |
%171 = vector.insert %170, %169 [0, 0, 9] : f16 into vector<4x16x16xf16> | |
%172 = vector.extract %98[0] : f16 from vector<16xf16> | |
%173 = vector.insert %172, %171 [0, 0, 10] : f16 into vector<4x16x16xf16> | |
%174 = vector.extract %99[0] : f16 from vector<16xf16> | |
%175 = vector.insert %174, %173 [0, 0, 11] : f16 into vector<4x16x16xf16> | |
%176 = vector.extract %100[0] : f16 from vector<16xf16> | |
%177 = vector.insert %176, %175 [0, 0, 12] : f16 into vector<4x16x16xf16> | |
%178 = vector.extract %101[0] : f16 from vector<16xf16> | |
%179 = vector.insert %178, %177 [0, 0, 13] : f16 into vector<4x16x16xf16> | |
%180 = vector.extract %102[0] : f16 from vector<16xf16> | |
%181 = vector.insert %180, %179 [0, 0, 14] : f16 into vector<4x16x16xf16> | |
%182 = vector.extract %103[0] : f16 from vector<16xf16> | |
%183 = vector.insert %182, %181 [0, 0, 15] : f16 into vector<4x16x16xf16> | |
%184 = vector.extract %88[1] : f16 from vector<16xf16> | |
%185 = vector.insert %184, %183 [0, 1, 0] : f16 into vector<4x16x16xf16> | |
%186 = vector.extract %89[1] : f16 from vector<16xf16> | |
%187 = vector.insert %186, %185 [0, 1, 1] : f16 into vector<4x16x16xf16> | |
%188 = vector.extract %90[1] : f16 from vector<16xf16> | |
%189 = vector.insert %188, %187 [0, 1, 2] : f16 into vector<4x16x16xf16> | |
%190 = vector.extract %91[1] : f16 from vector<16xf16> | |
%191 = vector.insert %190, %189 [0, 1, 3] : f16 into vector<4x16x16xf16> | |
%192 = vector.extract %92[1] : f16 from vector<16xf16> | |
%193 = vector.insert %192, %191 [0, 1, 4] : f16 into vector<4x16x16xf16> | |
%194 = vector.extract %93[1] : f16 from vector<16xf16> | |
%195 = vector.insert %194, %193 [0, 1, 5] : f16 into vector<4x16x16xf16> | |
%196 = vector.extract %94[1] : f16 from vector<16xf16> | |
%197 = vector.insert %196, %195 [0, 1, 6] : f16 into vector<4x16x16xf16> | |
%198 = vector.extract %95[1] : f16 from vector<16xf16> | |
%199 = vector.insert %198, %197 [0, 1, 7] : f16 into vector<4x16x16xf16> | |
%200 = vector.extract %96[1] : f16 from vector<16xf16> | |
%201 = vector.insert %200, %199 [0, 1, 8] : f16 into vector<4x16x16xf16> | |
%202 = vector.extract %97[1] : f16 from vector<16xf16> | |
%203 = vector.insert %202, %201 [0, 1, 9] : f16 into vector<4x16x16xf16> | |
%204 = vector.extract %98[1] : f16 from vector<16xf16> | |
%205 = vector.insert %204, %203 [0, 1, 10] : f16 into vector<4x16x16xf16> | |
%206 = vector.extract %99[1] : f16 from vector<16xf16> | |
%207 = vector.insert %206, %205 [0, 1, 11] : f16 into vector<4x16x16xf16> | |
%208 = vector.extract %100[1] : f16 from vector<16xf16> | |
%209 = vector.insert %208, %207 [0, 1, 12] : f16 into vector<4x16x16xf16> | |
%210 = vector.extract %101[1] : f16 from vector<16xf16> | |
%211 = vector.insert %210, %209 [0, 1, 13] : f16 into vector<4x16x16xf16> | |
%212 = vector.extract %102[1] : f16 from vector<16xf16> | |
%213 = vector.insert %212, %211 [0, 1, 14] : f16 into vector<4x16x16xf16> | |
%214 = vector.extract %103[1] : f16 from vector<16xf16> | |
%215 = vector.insert %214, %213 [0, 1, 15] : f16 into vector<4x16x16xf16> | |
%216 = vector.extract %88[2] : f16 from vector<16xf16> | |
%217 = vector.insert %216, %215 [0, 2, 0] : f16 into vector<4x16x16xf16> | |
%218 = vector.extract %89[2] : f16 from vector<16xf16> | |
%219 = vector.insert %218, %217 [0, 2, 1] : f16 into vector<4x16x16xf16> | |
%220 = vector.extract %90[2] : f16 from vector<16xf16> | |
%221 = vector.insert %220, %219 [0, 2, 2] : f16 into vector<4x16x16xf16> | |
%222 = vector.extract %91[2] : f16 from vector<16xf16> | |
%223 = vector.insert %222, %221 [0, 2, 3] : f16 into vector<4x16x16xf16> | |
%224 = vector.extract %92[2] : f16 from vector<16xf16> | |
%225 = vector.insert %224, %223 [0, 2, 4] : f16 into vector<4x16x16xf16> | |
%226 = vector.extract %93[2] : f16 from vector<16xf16> | |
%227 = vector.insert %226, %225 [0, 2, 5] : f16 into vector<4x16x16xf16> | |
%228 = vector.extract %94[2] : f16 from vector<16xf16> | |
%229 = vector.insert %228, %227 [0, 2, 6] : f16 into vector<4x16x16xf16> | |
%230 = vector.extract %95[2] : f16 from vector<16xf16> | |
%231 = vector.insert %230, %229 [0, 2, 7] : f16 into vector<4x16x16xf16> | |
%232 = vector.extract %96[2] : f16 from vector<16xf16> | |
%233 = vector.insert %232, %231 [0, 2, 8] : f16 into vector<4x16x16xf16> | |
%234 = vector.extract %97[2] : f16 from vector<16xf16> | |
%235 = vector.insert %234, %233 [0, 2, 9] : f16 into vector<4x16x16xf16> | |
%236 = vector.extract %98[2] : f16 from vector<16xf16> | |
%237 = vector.insert %236, %235 [0, 2, 10] : f16 into vector<4x16x16xf16> | |
%238 = vector.extract %99[2] : f16 from vector<16xf16> | |
%239 = vector.insert %238, %237 [0, 2, 11] : f16 into vector<4x16x16xf16> | |
%240 = vector.extract %100[2] : f16 from vector<16xf16> | |
%241 = vector.insert %240, %239 [0, 2, 12] : f16 into vector<4x16x16xf16> | |
%242 = vector.extract %101[2] : f16 from vector<16xf16> | |
%243 = vector.insert %242, %241 [0, 2, 13] : f16 into vector<4x16x16xf16> | |
%244 = vector.extract %102[2] : f16 from vector<16xf16> | |
%245 = vector.insert %244, %243 [0, 2, 14] : f16 into vector<4x16x16xf16> | |
%246 = vector.extract %103[2] : f16 from vector<16xf16> | |
%247 = vector.insert %246, %245 [0, 2, 15] : f16 into vector<4x16x16xf16> | |
%248 = vector.extract %88[3] : f16 from vector<16xf16> | |
%249 = vector.insert %248, %247 [0, 3, 0] : f16 into vector<4x16x16xf16> | |
%250 = vector.extract %89[3] : f16 from vector<16xf16> | |
%251 = vector.insert %250, %249 [0, 3, 1] : f16 into vector<4x16x16xf16> | |
%252 = vector.extract %90[3] : f16 from vector<16xf16> | |
%253 = vector.insert %252, %251 [0, 3, 2] : f16 into vector<4x16x16xf16> | |
%254 = vector.extract %91[3] : f16 from vector<16xf16> | |
%255 = vector.insert %254, %253 [0, 3, 3] : f16 into vector<4x16x16xf16> | |
%256 = vector.extract %92[3] : f16 from vector<16xf16> | |
%257 = vector.insert %256, %255 [0, 3, 4] : f16 into vector<4x16x16xf16> | |
%258 = vector.extract %93[3] : f16 from vector<16xf16> | |
%259 = vector.insert %258, %257 [0, 3, 5] : f16 into vector<4x16x16xf16> | |
%260 = vector.extract %94[3] : f16 from vector<16xf16> | |
%261 = vector.insert %260, %259 [0, 3, 6] : f16 into vector<4x16x16xf16> | |
%262 = vector.extract %95[3] : f16 from vector<16xf16> | |
%263 = vector.insert %262, %261 [0, 3, 7] : f16 into vector<4x16x16xf16> | |
%264 = vector.extract %96[3] : f16 from vector<16xf16> | |
%265 = vector.insert %264, %263 [0, 3, 8] : f16 into vector<4x16x16xf16> | |
%266 = vector.extract %97[3] : f16 from vector<16xf16> | |
%267 = vector.insert %266, %265 [0, 3, 9] : f16 into vector<4x16x16xf16> | |
%268 = vector.extract %98[3] : f16 from vector<16xf16> | |
%269 = vector.insert %268, %267 [0, 3, 10] : f16 into vector<4x16x16xf16> | |
%270 = vector.extract %99[3] : f16 from vector<16xf16> | |
%271 = vector.insert %270, %269 [0, 3, 11] : f16 into vector<4x16x16xf16> | |
%272 = vector.extract %100[3] : f16 from vector<16xf16> | |
%273 = vector.insert %272, %271 [0, 3, 12] : f16 into vector<4x16x16xf16> | |
%274 = vector.extract %101[3] : f16 from vector<16xf16> | |
%275 = vector.insert %274, %273 [0, 3, 13] : f16 into vector<4x16x16xf16> | |
%276 = vector.extract %102[3] : f16 from vector<16xf16> | |
%277 = vector.insert %276, %275 [0, 3, 14] : f16 into vector<4x16x16xf16> | |
%278 = vector.extract %103[3] : f16 from vector<16xf16> | |
%279 = vector.insert %278, %277 [0, 3, 15] : f16 into vector<4x16x16xf16> | |
%280 = vector.extract %88[4] : f16 from vector<16xf16> | |
%281 = vector.insert %280, %279 [0, 4, 0] : f16 into vector<4x16x16xf16> | |
%282 = vector.extract %89[4] : f16 from vector<16xf16> | |
%283 = vector.insert %282, %281 [0, 4, 1] : f16 into vector<4x16x16xf16> | |
%284 = vector.extract %90[4] : f16 from vector<16xf16> | |
%285 = vector.insert %284, %283 [0, 4, 2] : f16 into vector<4x16x16xf16> | |
%286 = vector.extract %91[4] : f16 from vector<16xf16> | |
%287 = vector.insert %286, %285 [0, 4, 3] : f16 into vector<4x16x16xf16> | |
%288 = vector.extract %92[4] : f16 from vector<16xf16> | |
%289 = vector.insert %288, %287 [0, 4, 4] : f16 into vector<4x16x16xf16> | |
%290 = vector.extract %93[4] : f16 from vector<16xf16> | |
%291 = vector.insert %290, %289 [0, 4, 5] : f16 into vector<4x16x16xf16> | |
%292 = vector.extract %94[4] : f16 from vector<16xf16> | |
%293 = vector.insert %292, %291 [0, 4, 6] : f16 into vector<4x16x16xf16> | |
%294 = vector.extract %95[4] : f16 from vector<16xf16> | |
%295 = vector.insert %294, %293 [0, 4, 7] : f16 into vector<4x16x16xf16> | |
%296 = vector.extract %96[4] : f16 from vector<16xf16> | |
%297 = vector.insert %296, %295 [0, 4, 8] : f16 into vector<4x16x16xf16> | |
%298 = vector.extract %97[4] : f16 from vector<16xf16> | |
%299 = vector.insert %298, %297 [0, 4, 9] : f16 into vector<4x16x16xf16> | |
%300 = vector.extract %98[4] : f16 from vector<16xf16> | |
%301 = vector.insert %300, %299 [0, 4, 10] : f16 into vector<4x16x16xf16> | |
%302 = vector.extract %99[4] : f16 from vector<16xf16> | |
%303 = vector.insert %302, %301 [0, 4, 11] : f16 into vector<4x16x16xf16> | |
%304 = vector.extract %100[4] : f16 from vector<16xf16> | |
%305 = vector.insert %304, %303 [0, 4, 12] : f16 into vector<4x16x16xf16> | |
%306 = vector.extract %101[4] : f16 from vector<16xf16> | |
%307 = vector.insert %306, %305 [0, 4, 13] : f16 into vector<4x16x16xf16> | |
%308 = vector.extract %102[4] : f16 from vector<16xf16> | |
%309 = vector.insert %308, %307 [0, 4, 14] : f16 into vector<4x16x16xf16> | |
%310 = vector.extract %103[4] : f16 from vector<16xf16> | |
%311 = vector.insert %310, %309 [0, 4, 15] : f16 into vector<4x16x16xf16> | |
%312 = vector.extract %88[5] : f16 from vector<16xf16> | |
%313 = vector.insert %312, %311 [0, 5, 0] : f16 into vector<4x16x16xf16> | |
%314 = vector.extract %89[5] : f16 from vector<16xf16> | |
%315 = vector.insert %314, %313 [0, 5, 1] : f16 into vector<4x16x16xf16> | |
%316 = vector.extract %90[5] : f16 from vector<16xf16> | |
%317 = vector.insert %316, %315 [0, 5, 2] : f16 into vector<4x16x16xf16> | |
%318 = vector.extract %91[5] : f16 from vector<16xf16> | |
%319 = vector.insert %318, %317 [0, 5, 3] : f16 into vector<4x16x16xf16> | |
%320 = vector.extract %92[5] : f16 from vector<16xf16> | |
%321 = vector.insert %320, %319 [0, 5, 4] : f16 into vector<4x16x16xf16> | |
%322 = vector.extract %93[5] : f16 from vector<16xf16> | |
%323 = vector.insert %322, %321 [0, 5, 5] : f16 into vector<4x16x16xf16> | |
%324 = vector.extract %94[5] : f16 from vector<16xf16> | |
%325 = vector.insert %324, %323 [0, 5, 6] : f16 into vector<4x16x16xf16> | |
%326 = vector.extract %95[5] : f16 from vector<16xf16> | |
%327 = vector.insert %326, %325 [0, 5, 7] : f16 into vector<4x16x16xf16> | |
%328 = vector.extract %96[5] : f16 from vector<16xf16> | |
%329 = vector.insert %328, %327 [0, 5, 8] : f16 into vector<4x16x16xf16> | |
%330 = vector.extract %97[5] : f16 from vector<16xf16> | |
%331 = vector.insert %330, %329 [0, 5, 9] : f16 into vector<4x16x16xf16> | |
%332 = vector.extract %98[5] : f16 from vector<16xf16> | |
%333 = vector.insert %332, %331 [0, 5, 10] : f16 into vector<4x16x16xf16> | |
%334 = vector.extract %99[5] : f16 from vector<16xf16> | |
%335 = vector.insert %334, %333 [0, 5, 11] : f16 into vector<4x16x16xf16> | |
%336 = vector.extract %100[5] : f16 from vector<16xf16> | |
%337 = vector.insert %336, %335 [0, 5, 12] : f16 into vector<4x16x16xf16> | |
%338 = vector.extract %101[5] : f16 from vector<16xf16> | |
%339 = vector.insert %338, %337 [0, 5, 13] : f16 into vector<4x16x16xf16> | |
%340 = vector.extract %102[5] : f16 from vector<16xf16> | |
%341 = vector.insert %340, %339 [0, 5, 14] : f16 into vector<4x16x16xf16> | |
%342 = vector.extract %103[5] : f16 from vector<16xf16> | |
%343 = vector.insert %342, %341 [0, 5, 15] : f16 into vector<4x16x16xf16> | |
%344 = vector.extract %88[6] : f16 from vector<16xf16> | |
%345 = vector.insert %344, %343 [0, 6, 0] : f16 into vector<4x16x16xf16> | |
%346 = vector.extract %89[6] : f16 from vector<16xf16> | |
%347 = vector.insert %346, %345 [0, 6, 1] : f16 into vector<4x16x16xf16> | |
%348 = vector.extract %90[6] : f16 from vector<16xf16> | |
%349 = vector.insert %348, %347 [0, 6, 2] : f16 into vector<4x16x16xf16> | |
%350 = vector.extract %91[6] : f16 from vector<16xf16> | |
%351 = vector.insert %350, %349 [0, 6, 3] : f16 into vector<4x16x16xf16> | |
%352 = vector.extract %92[6] : f16 from vector<16xf16> | |
%353 = vector.insert %352, %351 [0, 6, 4] : f16 into vector<4x16x16xf16> | |
%354 = vector.extract %93[6] : f16 from vector<16xf16> | |
%355 = vector.insert %354, %353 [0, 6, 5] : f16 into vector<4x16x16xf16> | |
%356 = vector.extract %94[6] : f16 from vector<16xf16> | |
%357 = vector.insert %356, %355 [0, 6, 6] : f16 into vector<4x16x16xf16> | |
%358 = vector.extract %95[6] : f16 from vector<16xf16> | |
%359 = vector.insert %358, %357 [0, 6, 7] : f16 into vector<4x16x16xf16> | |
%360 = vector.extract %96[6] : f16 from vector<16xf16> | |
%361 = vector.insert %360, %359 [0, 6, 8] : f16 into vector<4x16x16xf16> | |
%362 = vector.extract %97[6] : f16 from vector<16xf16> | |
%363 = vector.insert %362, %361 [0, 6, 9] : f16 into vector<4x16x16xf16> | |
%364 = vector.extract %98[6] : f16 from vector<16xf16> | |
%365 = vector.insert %364, %363 [0, 6, 10] : f16 into vector<4x16x16xf16> | |
%366 = vector.extract %99[6] : f16 from vector<16xf16> | |
%367 = vector.insert %366, %365 [0, 6, 11] : f16 into vector<4x16x16xf16> | |
%368 = vector.extract %100[6] : f16 from vector<16xf16> | |
%369 = vector.insert %368, %367 [0, 6, 12] : f16 into vector<4x16x16xf16> | |
%370 = vector.extract %101[6] : f16 from vector<16xf16> | |
%371 = vector.insert %370, %369 [0, 6, 13] : f16 into vector<4x16x16xf16> | |
%372 = vector.extract %102[6] : f16 from vector<16xf16> | |
%373 = vector.insert %372, %371 [0, 6, 14] : f16 into vector<4x16x16xf16> | |
%374 = vector.extract %103[6] : f16 from vector<16xf16> | |
%375 = vector.insert %374, %373 [0, 6, 15] : f16 into vector<4x16x16xf16> | |
%376 = vector.extract %88[7] : f16 from vector<16xf16> | |
%377 = vector.insert %376, %375 [0, 7, 0] : f16 into vector<4x16x16xf16> | |
%378 = vector.extract %89[7] : f16 from vector<16xf16> | |
%379 = vector.insert %378, %377 [0, 7, 1] : f16 into vector<4x16x16xf16> | |
%380 = vector.extract %90[7] : f16 from vector<16xf16> | |
%381 = vector.insert %380, %379 [0, 7, 2] : f16 into vector<4x16x16xf16> | |
%382 = vector.extract %91[7] : f16 from vector<16xf16> | |
%383 = vector.insert %382, %381 [0, 7, 3] : f16 into vector<4x16x16xf16> | |
%384 = vector.extract %92[7] : f16 from vector<16xf16> | |
%385 = vector.insert %384, %383 [0, 7, 4] : f16 into vector<4x16x16xf16> | |
%386 = vector.extract %93[7] : f16 from vector<16xf16> | |
%387 = vector.insert %386, %385 [0, 7, 5] : f16 into vector<4x16x16xf16> | |
%388 = vector.extract %94[7] : f16 from vector<16xf16> | |
%389 = vector.insert %388, %387 [0, 7, 6] : f16 into vector<4x16x16xf16> | |
%390 = vector.extract %95[7] : f16 from vector<16xf16> | |
%391 = vector.insert %390, %389 [0, 7, 7] : f16 into vector<4x16x16xf16> | |
%392 = vector.extract %96[7] : f16 from vector<16xf16> | |
%393 = vector.insert %392, %391 [0, 7, 8] : f16 into vector<4x16x16xf16> | |
%394 = vector.extract %97[7] : f16 from vector<16xf16> | |
%395 = vector.insert %394, %393 [0, 7, 9] : f16 into vector<4x16x16xf16> | |
%396 = vector.extract %98[7] : f16 from vector<16xf16> | |
%397 = vector.insert %396, %395 [0, 7, 10] : f16 into vector<4x16x16xf16> | |
%398 = vector.extract %99[7] : f16 from vector<16xf16> | |
%399 = vector.insert %398, %397 [0, 7, 11] : f16 into vector<4x16x16xf16> | |
%400 = vector.extract %100[7] : f16 from vector<16xf16> | |
%401 = vector.insert %400, %399 [0, 7, 12] : f16 into vector<4x16x16xf16> | |
%402 = vector.extract %101[7] : f16 from vector<16xf16> | |
%403 = vector.insert %402, %401 [0, 7, 13] : f16 into vector<4x16x16xf16> | |
%404 = vector.extract %102[7] : f16 from vector<16xf16> | |
%405 = vector.insert %404, %403 [0, 7, 14] : f16 into vector<4x16x16xf16> | |
%406 = vector.extract %103[7] : f16 from vector<16xf16> | |
%407 = vector.insert %406, %405 [0, 7, 15] : f16 into vector<4x16x16xf16> | |
%408 = vector.extract %88[8] : f16 from vector<16xf16> | |
%409 = vector.insert %408, %407 [0, 8, 0] : f16 into vector<4x16x16xf16> | |
%410 = vector.extract %89[8] : f16 from vector<16xf16> | |
%411 = vector.insert %410, %409 [0, 8, 1] : f16 into vector<4x16x16xf16> | |
%412 = vector.extract %90[8] : f16 from vector<16xf16> | |
%413 = vector.insert %412, %411 [0, 8, 2] : f16 into vector<4x16x16xf16> | |
%414 = vector.extract %91[8] : f16 from vector<16xf16> | |
%415 = vector.insert %414, %413 [0, 8, 3] : f16 into vector<4x16x16xf16> | |
%416 = vector.extract %92[8] : f16 from vector<16xf16> | |
%417 = vector.insert %416, %415 [0, 8, 4] : f16 into vector<4x16x16xf16> | |
%418 = vector.extract %93[8] : f16 from vector<16xf16> | |
%419 = vector.insert %418, %417 [0, 8, 5] : f16 into vector<4x16x16xf16> | |
%420 = vector.extract %94[8] : f16 from vector<16xf16> | |
%421 = vector.insert %420, %419 [0, 8, 6] : f16 into vector<4x16x16xf16> | |
%422 = vector.extract %95[8] : f16 from vector<16xf16> | |
%423 = vector.insert %422, %421 [0, 8, 7] : f16 into vector<4x16x16xf16> | |
%424 = vector.extract %96[8] : f16 from vector<16xf16> | |
%425 = vector.insert %424, %423 [0, 8, 8] : f16 into vector<4x16x16xf16> | |
%426 = vector.extract %97[8] : f16 from vector<16xf16> | |
%427 = vector.insert %426, %425 [0, 8, 9] : f16 into vector<4x16x16xf16> | |
%428 = vector.extract %98[8] : f16 from vector<16xf16> | |
%429 = vector.insert %428, %427 [0, 8, 10] : f16 into vector<4x16x16xf16> | |
%430 = vector.extract %99[8] : f16 from vector<16xf16> | |
%431 = vector.insert %430, %429 [0, 8, 11] : f16 into vector<4x16x16xf16> | |
%432 = vector.extract %100[8] : f16 from vector<16xf16> | |
%433 = vector.insert %432, %431 [0, 8, 12] : f16 into vector<4x16x16xf16> | |
%434 = vector.extract %101[8] : f16 from vector<16xf16> | |
%435 = vector.insert %434, %433 [0, 8, 13] : f16 into vector<4x16x16xf16> | |
%436 = vector.extract %102[8] : f16 from vector<16xf16> | |
%437 = vector.insert %436, %435 [0, 8, 14] : f16 into vector<4x16x16xf16> | |
%438 = vector.extract %103[8] : f16 from vector<16xf16> | |
%439 = vector.insert %438, %437 [0, 8, 15] : f16 into vector<4x16x16xf16> | |
%440 = vector.extract %88[9] : f16 from vector<16xf16> | |
%441 = vector.insert %440, %439 [0, 9, 0] : f16 into vector<4x16x16xf16> | |
%442 = vector.extract %89[9] : f16 from vector<16xf16> | |
%443 = vector.insert %442, %441 [0, 9, 1] : f16 into vector<4x16x16xf16> | |
%444 = vector.extract %90[9] : f16 from vector<16xf16> | |
%445 = vector.insert %444, %443 [0, 9, 2] : f16 into vector<4x16x16xf16> | |
%446 = vector.extract %91[9] : f16 from vector<16xf16> | |
%447 = vector.insert %446, %445 [0, 9, 3] : f16 into vector<4x16x16xf16> | |
%448 = vector.extract %92[9] : f16 from vector<16xf16> | |
%449 = vector.insert %448, %447 [0, 9, 4] : f16 into vector<4x16x16xf16> | |
%450 = vector.extract %93[9] : f16 from vector<16xf16> | |
%451 = vector.insert %450, %449 [0, 9, 5] : f16 into vector<4x16x16xf16> | |
%452 = vector.extract %94[9] : f16 from vector<16xf16> | |
%453 = vector.insert %452, %451 [0, 9, 6] : f16 into vector<4x16x16xf16> | |
%454 = vector.extract %95[9] : f16 from vector<16xf16> | |
%455 = vector.insert %454, %453 [0, 9, 7] : f16 into vector<4x16x16xf16> | |
%456 = vector.extract %96[9] : f16 from vector<16xf16> | |
%457 = vector.insert %456, %455 [0, 9, 8] : f16 into vector<4x16x16xf16> | |
%458 = vector.extract %97[9] : f16 from vector<16xf16> | |
%459 = vector.insert %458, %457 [0, 9, 9] : f16 into vector<4x16x16xf16> | |
%460 = vector.extract %98[9] : f16 from vector<16xf16> | |
%461 = vector.insert %460, %459 [0, 9, 10] : f16 into vector<4x16x16xf16> | |
%462 = vector.extract %99[9] : f16 from vector<16xf16> | |
%463 = vector.insert %462, %461 [0, 9, 11] : f16 into vector<4x16x16xf16> | |
%464 = vector.extract %100[9] : f16 from vector<16xf16> | |
%465 = vector.insert %464, %463 [0, 9, 12] : f16 into vector<4x16x16xf16> | |
%466 = vector.extract %101[9] : f16 from vector<16xf16> | |
%467 = vector.insert %466, %465 [0, 9, 13] : f16 into vector<4x16x16xf16> | |
%468 = vector.extract %102[9] : f16 from vector<16xf16> | |
%469 = vector.insert %468, %467 [0, 9, 14] : f16 into vector<4x16x16xf16> | |
%470 = vector.extract %103[9] : f16 from vector<16xf16> | |
%471 = vector.insert %470, %469 [0, 9, 15] : f16 into vector<4x16x16xf16> | |
%472 = vector.extract %88[10] : f16 from vector<16xf16> | |
%473 = vector.insert %472, %471 [0, 10, 0] : f16 into vector<4x16x16xf16> | |
%474 = vector.extract %89[10] : f16 from vector<16xf16> | |
%475 = vector.insert %474, %473 [0, 10, 1] : f16 into vector<4x16x16xf16> | |
%476 = vector.extract %90[10] : f16 from vector<16xf16> | |
%477 = vector.insert %476, %475 [0, 10, 2] : f16 into vector<4x16x16xf16> | |
%478 = vector.extract %91[10] : f16 from vector<16xf16> | |
%479 = vector.insert %478, %477 [0, 10, 3] : f16 into vector<4x16x16xf16> | |
%480 = vector.extract %92[10] : f16 from vector<16xf16> | |
%481 = vector.insert %480, %479 [0, 10, 4] : f16 into vector<4x16x16xf16> | |
%482 = vector.extract %93[10] : f16 from vector<16xf16> | |
%483 = vector.insert %482, %481 [0, 10, 5] : f16 into vector<4x16x16xf16> | |
%484 = vector.extract %94[10] : f16 from vector<16xf16> | |
%485 = vector.insert %484, %483 [0, 10, 6] : f16 into vector<4x16x16xf16> | |
%486 = vector.extract %95[10] : f16 from vector<16xf16> | |
%487 = vector.insert %486, %485 [0, 10, 7] : f16 into vector<4x16x16xf16> | |
%488 = vector.extract %96[10] : f16 from vector<16xf16> | |
%489 = vector.insert %488, %487 [0, 10, 8] : f16 into vector<4x16x16xf16> | |
%490 = vector.extract %97[10] : f16 from vector<16xf16> | |
%491 = vector.insert %490, %489 [0, 10, 9] : f16 into vector<4x16x16xf16> | |
%492 = vector.extract %98[10] : f16 from vector<16xf16> | |
%493 = vector.insert %492, %491 [0, 10, 10] : f16 into vector<4x16x16xf16> | |
%494 = vector.extract %99[10] : f16 from vector<16xf16> | |
%495 = vector.insert %494, %493 [0, 10, 11] : f16 into vector<4x16x16xf16> | |
%496 = vector.extract %100[10] : f16 from vector<16xf16> | |
%497 = vector.insert %496, %495 [0, 10, 12] : f16 into vector<4x16x16xf16> | |
%498 = vector.extract %101[10] : f16 from vector<16xf16> | |
%499 = vector.insert %498, %497 [0, 10, 13] : f16 into vector<4x16x16xf16> | |
%500 = vector.extract %102[10] : f16 from vector<16xf16> | |
%501 = vector.insert %500, %499 [0, 10, 14] : f16 into vector<4x16x16xf16> | |
%502 = vector.extract %103[10] : f16 from vector<16xf16> | |
%503 = vector.insert %502, %501 [0, 10, 15] : f16 into vector<4x16x16xf16> | |
%504 = vector.extract %88[11] : f16 from vector<16xf16> | |
%505 = vector.insert %504, %503 [0, 11, 0] : f16 into vector<4x16x16xf16> | |
%506 = vector.extract %89[11] : f16 from vector<16xf16> | |
%507 = vector.insert %506, %505 [0, 11, 1] : f16 into vector<4x16x16xf16> | |
%508 = vector.extract %90[11] : f16 from vector<16xf16> | |
%509 = vector.insert %508, %507 [0, 11, 2] : f16 into vector<4x16x16xf16> | |
%510 = vector.extract %91[11] : f16 from vector<16xf16> | |
%511 = vector.insert %510, %509 [0, 11, 3] : f16 into vector<4x16x16xf16> | |
%512 = vector.extract %92[11] : f16 from vector<16xf16> | |
%513 = vector.insert %512, %511 [0, 11, 4] : f16 into vector<4x16x16xf16> | |
%514 = vector.extract %93[11] : f16 from vector<16xf16> | |
%515 = vector.insert %514, %513 [0, 11, 5] : f16 into vector<4x16x16xf16> | |
%516 = vector.extract %94[11] : f16 from vector<16xf16> | |
%517 = vector.insert %516, %515 [0, 11, 6] : f16 into vector<4x16x16xf16> | |
%518 = vector.extract %95[11] : f16 from vector<16xf16> | |
%519 = vector.insert %518, %517 [0, 11, 7] : f16 into vector<4x16x16xf16> | |
%520 = vector.extract %96[11] : f16 from vector<16xf16> | |
%521 = vector.insert %520, %519 [0, 11, 8] : f16 into vector<4x16x16xf16> | |
%522 = vector.extract %97[11] : f16 from vector<16xf16> | |
%523 = vector.insert %522, %521 [0, 11, 9] : f16 into vector<4x16x16xf16> | |
%524 = vector.extract %98[11] : f16 from vector<16xf16> | |
%525 = vector.insert %524, %523 [0, 11, 10] : f16 into vector<4x16x16xf16> | |
%526 = vector.extract %99[11] : f16 from vector<16xf16> | |
%527 = vector.insert %526, %525 [0, 11, 11] : f16 into vector<4x16x16xf16> | |
%528 = vector.extract %100[11] : f16 from vector<16xf16> | |
%529 = vector.insert %528, %527 [0, 11, 12] : f16 into vector<4x16x16xf16> | |
%530 = vector.extract %101[11] : f16 from vector<16xf16> | |
%531 = vector.insert %530, %529 [0, 11, 13] : f16 into vector<4x16x16xf16> | |
%532 = vector.extract %102[11] : f16 from vector<16xf16> | |
%533 = vector.insert %532, %531 [0, 11, 14] : f16 into vector<4x16x16xf16> | |
%534 = vector.extract %103[11] : f16 from vector<16xf16> | |
%535 = vector.insert %534, %533 [0, 11, 15] : f16 into vector<4x16x16xf16> | |
%536 = vector.extract %88[12] : f16 from vector<16xf16> | |
%537 = vector.insert %536, %535 [0, 12, 0] : f16 into vector<4x16x16xf16> | |
%538 = vector.extract %89[12] : f16 from vector<16xf16> | |
%539 = vector.insert %538, %537 [0, 12, 1] : f16 into vector<4x16x16xf16> | |
%540 = vector.extract %90[12] : f16 from vector<16xf16> | |
%541 = vector.insert %540, %539 [0, 12, 2] : f16 into vector<4x16x16xf16> | |
%542 = vector.extract %91[12] : f16 from vector<16xf16> | |
%543 = vector.insert %542, %541 [0, 12, 3] : f16 into vector<4x16x16xf16> | |
%544 = vector.extract %92[12] : f16 from vector<16xf16> | |
%545 = vector.insert %544, %543 [0, 12, 4] : f16 into vector<4x16x16xf16> | |
%546 = vector.extract %93[12] : f16 from vector<16xf16> | |
%547 = vector.insert %546, %545 [0, 12, 5] : f16 into vector<4x16x16xf16> | |
%548 = vector.extract %94[12] : f16 from vector<16xf16> | |
%549 = vector.insert %548, %547 [0, 12, 6] : f16 into vector<4x16x16xf16> | |
%550 = vector.extract %95[12] : f16 from vector<16xf16> | |
%551 = vector.insert %550, %549 [0, 12, 7] : f16 into vector<4x16x16xf16> | |
%552 = vector.extract %96[12] : f16 from vector<16xf16> | |
%553 = vector.insert %552, %551 [0, 12, 8] : f16 into vector<4x16x16xf16> | |
%554 = vector.extract %97[12] : f16 from vector<16xf16> | |
%555 = vector.insert %554, %553 [0, 12, 9] : f16 into vector<4x16x16xf16> | |
%556 = vector.extract %98[12] : f16 from vector<16xf16> | |
%557 = vector.insert %556, %555 [0, 12, 10] : f16 into vector<4x16x16xf16> | |
%558 = vector.extract %99[12] : f16 from vector<16xf16> | |
%559 = vector.insert %558, %557 [0, 12, 11] : f16 into vector<4x16x16xf16> | |
%560 = vector.extract %100[12] : f16 from vector<16xf16> | |
%561 = vector.insert %560, %559 [0, 12, 12] : f16 into vector<4x16x16xf16> | |
%562 = vector.extract %101[12] : f16 from vector<16xf16> | |
%563 = vector.insert %562, %561 [0, 12, 13] : f16 into vector<4x16x16xf16> | |
%564 = vector.extract %102[12] : f16 from vector<16xf16> | |
%565 = vector.insert %564, %563 [0, 12, 14] : f16 into vector<4x16x16xf16> | |
%566 = vector.extract %103[12] : f16 from vector<16xf16> | |
%567 = vector.insert %566, %565 [0, 12, 15] : f16 into vector<4x16x16xf16> | |
%568 = vector.extract %88[13] : f16 from vector<16xf16> | |
%569 = vector.insert %568, %567 [0, 13, 0] : f16 into vector<4x16x16xf16> | |
%570 = vector.extract %89[13] : f16 from vector<16xf16> | |
%571 = vector.insert %570, %569 [0, 13, 1] : f16 into vector<4x16x16xf16> | |
%572 = vector.extract %90[13] : f16 from vector<16xf16> | |
%573 = vector.insert %572, %571 [0, 13, 2] : f16 into vector<4x16x16xf16> | |
%574 = vector.extract %91[13] : f16 from vector<16xf16> | |
%575 = vector.insert %574, %573 [0, 13, 3] : f16 into vector<4x16x16xf16> | |
%576 = vector.extract %92[13] : f16 from vector<16xf16> | |
%577 = vector.insert %576, %575 [0, 13, 4] : f16 into vector<4x16x16xf16> | |
%578 = vector.extract %93[13] : f16 from vector<16xf16> | |
%579 = vector.insert %578, %577 [0, 13, 5] : f16 into vector<4x16x16xf16> | |
%580 = vector.extract %94[13] : f16 from vector<16xf16> | |
%581 = vector.insert %580, %579 [0, 13, 6] : f16 into vector<4x16x16xf16> | |
%582 = vector.extract %95[13] : f16 from vector<16xf16> | |
%583 = vector.insert %582, %581 [0, 13, 7] : f16 into vector<4x16x16xf16> | |
%584 = vector.extract %96[13] : f16 from vector<16xf16> | |
%585 = vector.insert %584, %583 [0, 13, 8] : f16 into vector<4x16x16xf16> | |
%586 = vector.extract %97[13] : f16 from vector<16xf16> | |
%587 = vector.insert %586, %585 [0, 13, 9] : f16 into vector<4x16x16xf16> | |
%588 = vector.extract %98[13] : f16 from vector<16xf16> | |
%589 = vector.insert %588, %587 [0, 13, 10] : f16 into vector<4x16x16xf16> | |
%590 = vector.extract %99[13] : f16 from vector<16xf16> | |
%591 = vector.insert %590, %589 [0, 13, 11] : f16 into vector<4x16x16xf16> | |
%592 = vector.extract %100[13] : f16 from vector<16xf16> | |
%593 = vector.insert %592, %591 [0, 13, 12] : f16 into vector<4x16x16xf16> | |
%594 = vector.extract %101[13] : f16 from vector<16xf16> | |
%595 = vector.insert %594, %593 [0, 13, 13] : f16 into vector<4x16x16xf16> | |
%596 = vector.extract %102[13] : f16 from vector<16xf16> | |
%597 = vector.insert %596, %595 [0, 13, 14] : f16 into vector<4x16x16xf16> | |
%598 = vector.extract %103[13] : f16 from vector<16xf16> | |
%599 = vector.insert %598, %597 [0, 13, 15] : f16 into vector<4x16x16xf16> | |
%600 = vector.extract %88[14] : f16 from vector<16xf16> | |
%601 = vector.insert %600, %599 [0, 14, 0] : f16 into vector<4x16x16xf16> | |
%602 = vector.extract %89[14] : f16 from vector<16xf16> | |
%603 = vector.insert %602, %601 [0, 14, 1] : f16 into vector<4x16x16xf16> | |
%604 = vector.extract %90[14] : f16 from vector<16xf16> | |
%605 = vector.insert %604, %603 [0, 14, 2] : f16 into vector<4x16x16xf16> | |
%606 = vector.extract %91[14] : f16 from vector<16xf16> | |
%607 = vector.insert %606, %605 [0, 14, 3] : f16 into vector<4x16x16xf16> | |
%608 = vector.extract %92[14] : f16 from vector<16xf16> | |
%609 = vector.insert %608, %607 [0, 14, 4] : f16 into vector<4x16x16xf16> | |
%610 = vector.extract %93[14] : f16 from vector<16xf16> | |
%611 = vector.insert %610, %609 [0, 14, 5] : f16 into vector<4x16x16xf16> | |
%612 = vector.extract %94[14] : f16 from vector<16xf16> | |
%613 = vector.insert %612, %611 [0, 14, 6] : f16 into vector<4x16x16xf16> | |
%614 = vector.extract %95[14] : f16 from vector<16xf16> | |
%615 = vector.insert %614, %613 [0, 14, 7] : f16 into vector<4x16x16xf16> | |
%616 = vector.extract %96[14] : f16 from vector<16xf16> | |
%617 = vector.insert %616, %615 [0, 14, 8] : f16 into vector<4x16x16xf16> | |
%618 = vector.extract %97[14] : f16 from vector<16xf16> | |
%619 = vector.insert %618, %617 [0, 14, 9] : f16 into vector<4x16x16xf16> | |
%620 = vector.extract %98[14] : f16 from vector<16xf16> | |
%621 = vector.insert %620, %619 [0, 14, 10] : f16 into vector<4x16x16xf16> | |
%622 = vector.extract %99[14] : f16 from vector<16xf16> | |
%623 = vector.insert %622, %621 [0, 14, 11] : f16 into vector<4x16x16xf16> | |
%624 = vector.extract %100[14] : f16 from vector<16xf16> | |
%625 = vector.insert %624, %623 [0, 14, 12] : f16 into vector<4x16x16xf16> | |
%626 = vector.extract %101[14] : f16 from vector<16xf16> | |
%627 = vector.insert %626, %625 [0, 14, 13] : f16 into vector<4x16x16xf16> | |
%628 = vector.extract %102[14] : f16 from vector<16xf16> | |
%629 = vector.insert %628, %627 [0, 14, 14] : f16 into vector<4x16x16xf16> | |
%630 = vector.extract %103[14] : f16 from vector<16xf16> | |
%631 = vector.insert %630, %629 [0, 14, 15] : f16 into vector<4x16x16xf16> | |
%632 = vector.extract %88[15] : f16 from vector<16xf16> | |
%633 = vector.insert %632, %631 [0, 15, 0] : f16 into vector<4x16x16xf16> | |
%634 = vector.extract %89[15] : f16 from vector<16xf16> | |
%635 = vector.insert %634, %633 [0, 15, 1] : f16 into vector<4x16x16xf16> | |
%636 = vector.extract %90[15] : f16 from vector<16xf16> | |
%637 = vector.insert %636, %635 [0, 15, 2] : f16 into vector<4x16x16xf16> | |
%638 = vector.extract %91[15] : f16 from vector<16xf16> | |
%639 = vector.insert %638, %637 [0, 15, 3] : f16 into vector<4x16x16xf16> | |
%640 = vector.extract %92[15] : f16 from vector<16xf16> | |
%641 = vector.insert %640, %639 [0, 15, 4] : f16 into vector<4x16x16xf16> | |
%642 = vector.extract %93[15] : f16 from vector<16xf16> | |
%643 = vector.insert %642, %641 [0, 15, 5] : f16 into vector<4x16x16xf16> | |
%644 = vector.extract %94[15] : f16 from vector<16xf16> | |
%645 = vector.insert %644, %643 [0, 15, 6] : f16 into vector<4x16x16xf16> | |
%646 = vector.extract %95[15] : f16 from vector<16xf16> | |
%647 = vector.insert %646, %645 [0, 15, 7] : f16 into vector<4x16x16xf16> | |
%648 = vector.extract %96[15] : f16 from vector<16xf16> | |
%649 = vector.insert %648, %647 [0, 15, 8] : f16 into vector<4x16x16xf16> | |
%650 = vector.extract %97[15] : f16 from vector<16xf16> | |
%651 = vector.insert %650, %649 [0, 15, 9] : f16 into vector<4x16x16xf16> | |
%652 = vector.extract %98[15] : f16 from vector<16xf16> | |
%653 = vector.insert %652, %651 [0, 15, 10] : f16 into vector<4x16x16xf16> | |
%654 = vector.extract %99[15] : f16 from vector<16xf16> | |
%655 = vector.insert %654, %653 [0, 15, 11] : f16 into vector<4x16x16xf16> | |
%656 = vector.extract %100[15] : f16 from vector<16xf16> | |
%657 = vector.insert %656, %655 [0, 15, 12] : f16 into vector<4x16x16xf16> | |
%658 = vector.extract %101[15] : f16 from vector<16xf16> | |
%659 = vector.insert %658, %657 [0, 15, 13] : f16 into vector<4x16x16xf16> | |
%660 = vector.extract %102[15] : f16 from vector<16xf16> | |
%661 = vector.insert %660, %659 [0, 15, 14] : f16 into vector<4x16x16xf16> | |
%662 = vector.extract %103[15] : f16 from vector<16xf16> | |
%663 = vector.insert %662, %661 [0, 15, 15] : f16 into vector<4x16x16xf16> | |
%664 = vector.extract %104[0] : f16 from vector<16xf16> | |
%665 = vector.insert %664, %663 [1, 0, 0] : f16 into vector<4x16x16xf16> | |
%666 = vector.extract %105[0] : f16 from vector<16xf16> | |
%667 = vector.insert %666, %665 [1, 0, 1] : f16 into vector<4x16x16xf16> | |
%668 = vector.extract %106[0] : f16 from vector<16xf16> | |
%669 = vector.insert %668, %667 [1, 0, 2] : f16 into vector<4x16x16xf16> | |
%670 = vector.extract %107[0] : f16 from vector<16xf16> | |
%671 = vector.insert %670, %669 [1, 0, 3] : f16 into vector<4x16x16xf16> | |
%672 = vector.extract %108[0] : f16 from vector<16xf16> | |
%673 = vector.insert %672, %671 [1, 0, 4] : f16 into vector<4x16x16xf16> | |
%674 = vector.extract %109[0] : f16 from vector<16xf16> | |
%675 = vector.insert %674, %673 [1, 0, 5] : f16 into vector<4x16x16xf16> | |
%676 = vector.extract %110[0] : f16 from vector<16xf16> | |
%677 = vector.insert %676, %675 [1, 0, 6] : f16 into vector<4x16x16xf16> | |
%678 = vector.extract %111[0] : f16 from vector<16xf16> | |
%679 = vector.insert %678, %677 [1, 0, 7] : f16 into vector<4x16x16xf16> | |
%680 = vector.extract %112[0] : f16 from vector<16xf16> | |
%681 = vector.insert %680, %679 [1, 0, 8] : f16 into vector<4x16x16xf16> | |
%682 = vector.extract %113[0] : f16 from vector<16xf16> | |
%683 = vector.insert %682, %681 [1, 0, 9] : f16 into vector<4x16x16xf16> | |
%684 = vector.extract %114[0] : f16 from vector<16xf16> | |
%685 = vector.insert %684, %683 [1, 0, 10] : f16 into vector<4x16x16xf16> | |
%686 = vector.extract %115[0] : f16 from vector<16xf16> | |
%687 = vector.insert %686, %685 [1, 0, 11] : f16 into vector<4x16x16xf16> | |
%688 = vector.extract %116[0] : f16 from vector<16xf16> | |
%689 = vector.insert %688, %687 [1, 0, 12] : f16 into vector<4x16x16xf16> | |
%690 = vector.extract %117[0] : f16 from vector<16xf16> | |
%691 = vector.insert %690, %689 [1, 0, 13] : f16 into vector<4x16x16xf16> | |
%692 = vector.extract %118[0] : f16 from vector<16xf16> | |
%693 = vector.insert %692, %691 [1, 0, 14] : f16 into vector<4x16x16xf16> | |
%694 = vector.extract %119[0] : f16 from vector<16xf16> | |
%695 = vector.insert %694, %693 [1, 0, 15] : f16 into vector<4x16x16xf16> | |
%696 = vector.extract %104[1] : f16 from vector<16xf16> | |
%697 = vector.insert %696, %695 [1, 1, 0] : f16 into vector<4x16x16xf16> | |
%698 = vector.extract %105[1] : f16 from vector<16xf16> | |
%699 = vector.insert %698, %697 [1, 1, 1] : f16 into vector<4x16x16xf16> | |
%700 = vector.extract %106[1] : f16 from vector<16xf16> | |
%701 = vector.insert %700, %699 [1, 1, 2] : f16 into vector<4x16x16xf16> | |
%702 = vector.extract %107[1] : f16 from vector<16xf16> | |
%703 = vector.insert %702, %701 [1, 1, 3] : f16 into vector<4x16x16xf16> | |
%704 = vector.extract %108[1] : f16 from vector<16xf16> | |
%705 = vector.insert %704, %703 [1, 1, 4] : f16 into vector<4x16x16xf16> | |
%706 = vector.extract %109[1] : f16 from vector<16xf16> | |
%707 = vector.insert %706, %705 [1, 1, 5] : f16 into vector<4x16x16xf16> | |
%708 = vector.extract %110[1] : f16 from vector<16xf16> | |
%709 = vector.insert %708, %707 [1, 1, 6] : f16 into vector<4x16x16xf16> | |
%710 = vector.extract %111[1] : f16 from vector<16xf16> | |
%711 = vector.insert %710, %709 [1, 1, 7] : f16 into vector<4x16x16xf16> | |
%712 = vector.extract %112[1] : f16 from vector<16xf16> | |
%713 = vector.insert %712, %711 [1, 1, 8] : f16 into vector<4x16x16xf16> | |
%714 = vector.extract %113[1] : f16 from vector<16xf16> | |
%715 = vector.insert %714, %713 [1, 1, 9] : f16 into vector<4x16x16xf16> | |
%716 = vector.extract %114[1] : f16 from vector<16xf16> | |
%717 = vector.insert %716, %715 [1, 1, 10] : f16 into vector<4x16x16xf16> | |
%718 = vector.extract %115[1] : f16 from vector<16xf16> | |
%719 = vector.insert %718, %717 [1, 1, 11] : f16 into vector<4x16x16xf16> | |
%720 = vector.extract %116[1] : f16 from vector<16xf16> | |
%721 = vector.insert %720, %719 [1, 1, 12] : f16 into vector<4x16x16xf16> | |
%722 = vector.extract %117[1] : f16 from vector<16xf16> | |
%723 = vector.insert %722, %721 [1, 1, 13] : f16 into vector<4x16x16xf16> | |
%724 = vector.extract %118[1] : f16 from vector<16xf16> | |
%725 = vector.insert %724, %723 [1, 1, 14] : f16 into vector<4x16x16xf16> | |
%726 = vector.extract %119[1] : f16 from vector<16xf16> | |
%727 = vector.insert %726, %725 [1, 1, 15] : f16 into vector<4x16x16xf16> | |
%728 = vector.extract %104[2] : f16 from vector<16xf16> | |
%729 = vector.insert %728, %727 [1, 2, 0] : f16 into vector<4x16x16xf16> | |
%730 = vector.extract %105[2] : f16 from vector<16xf16> | |
%731 = vector.insert %730, %729 [1, 2, 1] : f16 into vector<4x16x16xf16> | |
%732 = vector.extract %106[2] : f16 from vector<16xf16> | |
%733 = vector.insert %732, %731 [1, 2, 2] : f16 into vector<4x16x16xf16> | |
%734 = vector.extract %107[2] : f16 from vector<16xf16> | |
%735 = vector.insert %734, %733 [1, 2, 3] : f16 into vector<4x16x16xf16> | |
%736 = vector.extract %108[2] : f16 from vector<16xf16> | |
%737 = vector.insert %736, %735 [1, 2, 4] : f16 into vector<4x16x16xf16> | |
%738 = vector.extract %109[2] : f16 from vector<16xf16> | |
%739 = vector.insert %738, %737 [1, 2, 5] : f16 into vector<4x16x16xf16> | |
%740 = vector.extract %110[2] : f16 from vector<16xf16> | |
%741 = vector.insert %740, %739 [1, 2, 6] : f16 into vector<4x16x16xf16> | |
%742 = vector.extract %111[2] : f16 from vector<16xf16> | |
%743 = vector.insert %742, %741 [1, 2, 7] : f16 into vector<4x16x16xf16> | |
%744 = vector.extract %112[2] : f16 from vector<16xf16> | |
%745 = vector.insert %744, %743 [1, 2, 8] : f16 into vector<4x16x16xf16> | |
%746 = vector.extract %113[2] : f16 from vector<16xf16> | |
%747 = vector.insert %746, %745 [1, 2, 9] : f16 into vector<4x16x16xf16> | |
%748 = vector.extract %114[2] : f16 from vector<16xf16> | |
%749 = vector.insert %748, %747 [1, 2, 10] : f16 into vector<4x16x16xf16> | |
%750 = vector.extract %115[2] : f16 from vector<16xf16> | |
%751 = vector.insert %750, %749 [1, 2, 11] : f16 into vector<4x16x16xf16> | |
%752 = vector.extract %116[2] : f16 from vector<16xf16> | |
%753 = vector.insert %752, %751 [1, 2, 12] : f16 into vector<4x16x16xf16> | |
%754 = vector.extract %117[2] : f16 from vector<16xf16> | |
%755 = vector.insert %754, %753 [1, 2, 13] : f16 into vector<4x16x16xf16> | |
%756 = vector.extract %118[2] : f16 from vector<16xf16> | |
%757 = vector.insert %756, %755 [1, 2, 14] : f16 into vector<4x16x16xf16> | |
%758 = vector.extract %119[2] : f16 from vector<16xf16> | |
%759 = vector.insert %758, %757 [1, 2, 15] : f16 into vector<4x16x16xf16> | |
%760 = vector.extract %104[3] : f16 from vector<16xf16> | |
%761 = vector.insert %760, %759 [1, 3, 0] : f16 into vector<4x16x16xf16> | |
%762 = vector.extract %105[3] : f16 from vector<16xf16> | |
%763 = vector.insert %762, %761 [1, 3, 1] : f16 into vector<4x16x16xf16> | |
%764 = vector.extract %106[3] : f16 from vector<16xf16> | |
%765 = vector.insert %764, %763 [1, 3, 2] : f16 into vector<4x16x16xf16> | |
%766 = vector.extract %107[3] : f16 from vector<16xf16> | |
%767 = vector.insert %766, %765 [1, 3, 3] : f16 into vector<4x16x16xf16> | |
%768 = vector.extract %108[3] : f16 from vector<16xf16> | |
%769 = vector.insert %768, %767 [1, 3, 4] : f16 into vector<4x16x16xf16> | |
%770 = vector.extract %109[3] : f16 from vector<16xf16> | |
%771 = vector.insert %770, %769 [1, 3, 5] : f16 into vector<4x16x16xf16> | |
%772 = vector.extract %110[3] : f16 from vector<16xf16> | |
%773 = vector.insert %772, %771 [1, 3, 6] : f16 into vector<4x16x16xf16> | |
%774 = vector.extract %111[3] : f16 from vector<16xf16> | |
%775 = vector.insert %774, %773 [1, 3, 7] : f16 into vector<4x16x16xf16> | |
%776 = vector.extract %112[3] : f16 from vector<16xf16> | |
%777 = vector.insert %776, %775 [1, 3, 8] : f16 into vector<4x16x16xf16> | |
%778 = vector.extract %113[3] : f16 from vector<16xf16> | |
%779 = vector.insert %778, %777 [1, 3, 9] : f16 into vector<4x16x16xf16> | |
%780 = vector.extract %114[3] : f16 from vector<16xf16> | |
%781 = vector.insert %780, %779 [1, 3, 10] : f16 into vector<4x16x16xf16> | |
%782 = vector.extract %115[3] : f16 from vector<16xf16> | |
%783 = vector.insert %782, %781 [1, 3, 11] : f16 into vector<4x16x16xf16> | |
%784 = vector.extract %116[3] : f16 from vector<16xf16> | |
%785 = vector.insert %784, %783 [1, 3, 12] : f16 into vector<4x16x16xf16> | |
%786 = vector.extract %117[3] : f16 from vector<16xf16> | |
%787 = vector.insert %786, %785 [1, 3, 13] : f16 into vector<4x16x16xf16> | |
%788 = vector.extract %118[3] : f16 from vector<16xf16> | |
%789 = vector.insert %788, %787 [1, 3, 14] : f16 into vector<4x16x16xf16> | |
%790 = vector.extract %119[3] : f16 from vector<16xf16> | |
%791 = vector.insert %790, %789 [1, 3, 15] : f16 into vector<4x16x16xf16> | |
%792 = vector.extract %104[4] : f16 from vector<16xf16> | |
%793 = vector.insert %792, %791 [1, 4, 0] : f16 into vector<4x16x16xf16> | |
%794 = vector.extract %105[4] : f16 from vector<16xf16> | |
%795 = vector.insert %794, %793 [1, 4, 1] : f16 into vector<4x16x16xf16> | |
%796 = vector.extract %106[4] : f16 from vector<16xf16> | |
%797 = vector.insert %796, %795 [1, 4, 2] : f16 into vector<4x16x16xf16> | |
%798 = vector.extract %107[4] : f16 from vector<16xf16> | |
%799 = vector.insert %798, %797 [1, 4, 3] : f16 into vector<4x16x16xf16> | |
%800 = vector.extract %108[4] : f16 from vector<16xf16> | |
%801 = vector.insert %800, %799 [1, 4, 4] : f16 into vector<4x16x16xf16> | |
%802 = vector.extract %109[4] : f16 from vector<16xf16> | |
%803 = vector.insert %802, %801 [1, 4, 5] : f16 into vector<4x16x16xf16> | |
%804 = vector.extract %110[4] : f16 from vector<16xf16> | |
%805 = vector.insert %804, %803 [1, 4, 6] : f16 into vector<4x16x16xf16> | |
%806 = vector.extract %111[4] : f16 from vector<16xf16> | |
%807 = vector.insert %806, %805 [1, 4, 7] : f16 into vector<4x16x16xf16> | |
%808 = vector.extract %112[4] : f16 from vector<16xf16> | |
%809 = vector.insert %808, %807 [1, 4, 8] : f16 into vector<4x16x16xf16> | |
%810 = vector.extract %113[4] : f16 from vector<16xf16> | |
%811 = vector.insert %810, %809 [1, 4, 9] : f16 into vector<4x16x16xf16> | |
%812 = vector.extract %114[4] : f16 from vector<16xf16> | |
%813 = vector.insert %812, %811 [1, 4, 10] : f16 into vector<4x16x16xf16> | |
%814 = vector.extract %115[4] : f16 from vector<16xf16> | |
%815 = vector.insert %814, %813 [1, 4, 11] : f16 into vector<4x16x16xf16> | |
%816 = vector.extract %116[4] : f16 from vector<16xf16> | |
%817 = vector.insert %816, %815 [1, 4, 12] : f16 into vector<4x16x16xf16> | |
%818 = vector.extract %117[4] : f16 from vector<16xf16> | |
%819 = vector.insert %818, %817 [1, 4, 13] : f16 into vector<4x16x16xf16> | |
%820 = vector.extract %118[4] : f16 from vector<16xf16> | |
%821 = vector.insert %820, %819 [1, 4, 14] : f16 into vector<4x16x16xf16> | |
%822 = vector.extract %119[4] : f16 from vector<16xf16> | |
%823 = vector.insert %822, %821 [1, 4, 15] : f16 into vector<4x16x16xf16> | |
%824 = vector.extract %104[5] : f16 from vector<16xf16> | |
%825 = vector.insert %824, %823 [1, 5, 0] : f16 into vector<4x16x16xf16> | |
%826 = vector.extract %105[5] : f16 from vector<16xf16> | |
%827 = vector.insert %826, %825 [1, 5, 1] : f16 into vector<4x16x16xf16> | |
%828 = vector.extract %106[5] : f16 from vector<16xf16> | |
%829 = vector.insert %828, %827 [1, 5, 2] : f16 into vector<4x16x16xf16> | |
%830 = vector.extract %107[5] : f16 from vector<16xf16> | |
%831 = vector.insert %830, %829 [1, 5, 3] : f16 into vector<4x16x16xf16> | |
%832 = vector.extract %108[5] : f16 from vector<16xf16> | |
%833 = vector.insert %832, %831 [1, 5, 4] : f16 into vector<4x16x16xf16> | |
%834 = vector.extract %109[5] : f16 from vector<16xf16> | |
%835 = vector.insert %834, %833 [1, 5, 5] : f16 into vector<4x16x16xf16> | |
%836 = vector.extract %110[5] : f16 from vector<16xf16> | |
%837 = vector.insert %836, %835 [1, 5, 6] : f16 into vector<4x16x16xf16> | |
%838 = vector.extract %111[5] : f16 from vector<16xf16> | |
%839 = vector.insert %838, %837 [1, 5, 7] : f16 into vector<4x16x16xf16> | |
%840 = vector.extract %112[5] : f16 from vector<16xf16> | |
%841 = vector.insert %840, %839 [1, 5, 8] : f16 into vector<4x16x16xf16> | |
%842 = vector.extract %113[5] : f16 from vector<16xf16> | |
%843 = vector.insert %842, %841 [1, 5, 9] : f16 into vector<4x16x16xf16> | |
%844 = vector.extract %114[5] : f16 from vector<16xf16> | |
%845 = vector.insert %844, %843 [1, 5, 10] : f16 into vector<4x16x16xf16> | |
%846 = vector.extract %115[5] : f16 from vector<16xf16> | |
%847 = vector.insert %846, %845 [1, 5, 11] : f16 into vector<4x16x16xf16> | |
%848 = vector.extract %116[5] : f16 from vector<16xf16> | |
%849 = vector.insert %848, %847 [1, 5, 12] : f16 into vector<4x16x16xf16> | |
%850 = vector.extract %117[5] : f16 from vector<16xf16> | |
%851 = vector.insert %850, %849 [1, 5, 13] : f16 into vector<4x16x16xf16> | |
%852 = vector.extract %118[5] : f16 from vector<16xf16> | |
%853 = vector.insert %852, %851 [1, 5, 14] : f16 into vector<4x16x16xf16> | |
%854 = vector.extract %119[5] : f16 from vector<16xf16> | |
%855 = vector.insert %854, %853 [1, 5, 15] : f16 into vector<4x16x16xf16> | |
%856 = vector.extract %104[6] : f16 from vector<16xf16> | |
%857 = vector.insert %856, %855 [1, 6, 0] : f16 into vector<4x16x16xf16> | |
%858 = vector.extract %105[6] : f16 from vector<16xf16> | |
%859 = vector.insert %858, %857 [1, 6, 1] : f16 into vector<4x16x16xf16> | |
%860 = vector.extract %106[6] : f16 from vector<16xf16> | |
%861 = vector.insert %860, %859 [1, 6, 2] : f16 into vector<4x16x16xf16> | |
%862 = vector.extract %107[6] : f16 from vector<16xf16> | |
%863 = vector.insert %862, %861 [1, 6, 3] : f16 into vector<4x16x16xf16> | |
%864 = vector.extract %108[6] : f16 from vector<16xf16> | |
%865 = vector.insert %864, %863 [1, 6, 4] : f16 into vector<4x16x16xf16> | |
%866 = vector.extract %109[6] : f16 from vector<16xf16> | |
%867 = vector.insert %866, %865 [1, 6, 5] : f16 into vector<4x16x16xf16> | |
%868 = vector.extract %110[6] : f16 from vector<16xf16> | |
%869 = vector.insert %868, %867 [1, 6, 6] : f16 into vector<4x16x16xf16> | |
%870 = vector.extract %111[6] : f16 from vector<16xf16> | |
%871 = vector.insert %870, %869 [1, 6, 7] : f16 into vector<4x16x16xf16> | |
%872 = vector.extract %112[6] : f16 from vector<16xf16> | |
%873 = vector.insert %872, %871 [1, 6, 8] : f16 into vector<4x16x16xf16> | |
%874 = vector.extract %113[6] : f16 from vector<16xf16> | |
%875 = vector.insert %874, %873 [1, 6, 9] : f16 into vector<4x16x16xf16> | |
%876 = vector.extract %114[6] : f16 from vector<16xf16> | |
%877 = vector.insert %876, %875 [1, 6, 10] : f16 into vector<4x16x16xf16> | |
%878 = vector.extract %115[6] : f16 from vector<16xf16> | |
%879 = vector.insert %878, %877 [1, 6, 11] : f16 into vector<4x16x16xf16> | |
%880 = vector.extract %116[6] : f16 from vector<16xf16> | |
%881 = vector.insert %880, %879 [1, 6, 12] : f16 into vector<4x16x16xf16> | |
%882 = vector.extract %117[6] : f16 from vector<16xf16> | |
%883 = vector.insert %882, %881 [1, 6, 13] : f16 into vector<4x16x16xf16> | |
%884 = vector.extract %118[6] : f16 from vector<16xf16> | |
%885 = vector.insert %884, %883 [1, 6, 14] : f16 into vector<4x16x16xf16> | |
%886 = vector.extract %119[6] : f16 from vector<16xf16> | |
%887 = vector.insert %886, %885 [1, 6, 15] : f16 into vector<4x16x16xf16> | |
%888 = vector.extract %104[7] : f16 from vector<16xf16> | |
%889 = vector.insert %888, %887 [1, 7, 0] : f16 into vector<4x16x16xf16> | |
%890 = vector.extract %105[7] : f16 from vector<16xf16> | |
%891 = vector.insert %890, %889 [1, 7, 1] : f16 into vector<4x16x16xf16> | |
%892 = vector.extract %106[7] : f16 from vector<16xf16> | |
%893 = vector.insert %892, %891 [1, 7, 2] : f16 into vector<4x16x16xf16> | |
%894 = vector.extract %107[7] : f16 from vector<16xf16> | |
%895 = vector.insert %894, %893 [1, 7, 3] : f16 into vector<4x16x16xf16> | |
%896 = vector.extract %108[7] : f16 from vector<16xf16> | |
%897 = vector.insert %896, %895 [1, 7, 4] : f16 into vector<4x16x16xf16> | |
%898 = vector.extract %109[7] : f16 from vector<16xf16> | |
%899 = vector.insert %898, %897 [1, 7, 5] : f16 into vector<4x16x16xf16> | |
%900 = vector.extract %110[7] : f16 from vector<16xf16> | |
%901 = vector.insert %900, %899 [1, 7, 6] : f16 into vector<4x16x16xf16> | |
%902 = vector.extract %111[7] : f16 from vector<16xf16> | |
%903 = vector.insert %902, %901 [1, 7, 7] : f16 into vector<4x16x16xf16> | |
%904 = vector.extract %112[7] : f16 from vector<16xf16> | |
%905 = vector.insert %904, %903 [1, 7, 8] : f16 into vector<4x16x16xf16> | |
%906 = vector.extract %113[7] : f16 from vector<16xf16> | |
%907 = vector.insert %906, %905 [1, 7, 9] : f16 into vector<4x16x16xf16> | |
%908 = vector.extract %114[7] : f16 from vector<16xf16> | |
%909 = vector.insert %908, %907 [1, 7, 10] : f16 into vector<4x16x16xf16> | |
%910 = vector.extract %115[7] : f16 from vector<16xf16> | |
%911 = vector.insert %910, %909 [1, 7, 11] : f16 into vector<4x16x16xf16> | |
%912 = vector.extract %116[7] : f16 from vector<16xf16> | |
%913 = vector.insert %912, %911 [1, 7, 12] : f16 into vector<4x16x16xf16> | |
%914 = vector.extract %117[7] : f16 from vector<16xf16> | |
%915 = vector.insert %914, %913 [1, 7, 13] : f16 into vector<4x16x16xf16> | |
%916 = vector.extract %118[7] : f16 from vector<16xf16> | |
%917 = vector.insert %916, %915 [1, 7, 14] : f16 into vector<4x16x16xf16> | |
%918 = vector.extract %119[7] : f16 from vector<16xf16> | |
%919 = vector.insert %918, %917 [1, 7, 15] : f16 into vector<4x16x16xf16> | |
%920 = vector.extract %104[8] : f16 from vector<16xf16> | |
%921 = vector.insert %920, %919 [1, 8, 0] : f16 into vector<4x16x16xf16> | |
%922 = vector.extract %105[8] : f16 from vector<16xf16> | |
%923 = vector.insert %922, %921 [1, 8, 1] : f16 into vector<4x16x16xf16> | |
%924 = vector.extract %106[8] : f16 from vector<16xf16> | |
%925 = vector.insert %924, %923 [1, 8, 2] : f16 into vector<4x16x16xf16> | |
%926 = vector.extract %107[8] : f16 from vector<16xf16> | |
%927 = vector.insert %926, %925 [1, 8, 3] : f16 into vector<4x16x16xf16> | |
%928 = vector.extract %108[8] : f16 from vector<16xf16> | |
%929 = vector.insert %928, %927 [1, 8, 4] : f16 into vector<4x16x16xf16> | |
%930 = vector.extract %109[8] : f16 from vector<16xf16> | |
%931 = vector.insert %930, %929 [1, 8, 5] : f16 into vector<4x16x16xf16> | |
%932 = vector.extract %110[8] : f16 from vector<16xf16> | |
%933 = vector.insert %932, %931 [1, 8, 6] : f16 into vector<4x16x16xf16> | |
%934 = vector.extract %111[8] : f16 from vector<16xf16> | |
%935 = vector.insert %934, %933 [1, 8, 7] : f16 into vector<4x16x16xf16> | |
%936 = vector.extract %112[8] : f16 from vector<16xf16> | |
%937 = vector.insert %936, %935 [1, 8, 8] : f16 into vector<4x16x16xf16> | |
%938 = vector.extract %113[8] : f16 from vector<16xf16> | |
%939 = vector.insert %938, %937 [1, 8, 9] : f16 into vector<4x16x16xf16> | |
%940 = vector.extract %114[8] : f16 from vector<16xf16> | |
%941 = vector.insert %940, %939 [1, 8, 10] : f16 into vector<4x16x16xf16> | |
%942 = vector.extract %115[8] : f16 from vector<16xf16> | |
%943 = vector.insert %942, %941 [1, 8, 11] : f16 into vector<4x16x16xf16> | |
%944 = vector.extract %116[8] : f16 from vector<16xf16> | |
%945 = vector.insert %944, %943 [1, 8, 12] : f16 into vector<4x16x16xf16> | |
%946 = vector.extract %117[8] : f16 from vector<16xf16> | |
%947 = vector.insert %946, %945 [1, 8, 13] : f16 into vector<4x16x16xf16> | |
%948 = vector.extract %118[8] : f16 from vector<16xf16> | |
%949 = vector.insert %948, %947 [1, 8, 14] : f16 into vector<4x16x16xf16> | |
%950 = vector.extract %119[8] : f16 from vector<16xf16> | |
%951 = vector.insert %950, %949 [1, 8, 15] : f16 into vector<4x16x16xf16> | |
%952 = vector.extract %104[9] : f16 from vector<16xf16> | |
%953 = vector.insert %952, %951 [1, 9, 0] : f16 into vector<4x16x16xf16> | |
%954 = vector.extract %105[9] : f16 from vector<16xf16> | |
%955 = vector.insert %954, %953 [1, 9, 1] : f16 into vector<4x16x16xf16> | |
%956 = vector.extract %106[9] : f16 from vector<16xf16> | |
%957 = vector.insert %956, %955 [1, 9, 2] : f16 into vector<4x16x16xf16> | |
%958 = vector.extract %107[9] : f16 from vector<16xf16> | |
%959 = vector.insert %958, %957 [1, 9, 3] : f16 into vector<4x16x16xf16> | |
%960 = vector.extract %108[9] : f16 from vector<16xf16> | |
%961 = vector.insert %960, %959 [1, 9, 4] : f16 into vector<4x16x16xf16> | |
%962 = vector.extract %109[9] : f16 from vector<16xf16> | |
%963 = vector.insert %962, %961 [1, 9, 5] : f16 into vector<4x16x16xf16> | |
%964 = vector.extract %110[9] : f16 from vector<16xf16> | |
%965 = vector.insert %964, %963 [1, 9, 6] : f16 into vector<4x16x16xf16> | |
%966 = vector.extract %111[9] : f16 from vector<16xf16> | |
%967 = vector.insert %966, %965 [1, 9, 7] : f16 into vector<4x16x16xf16> | |
%968 = vector.extract %112[9] : f16 from vector<16xf16> | |
%969 = vector.insert %968, %967 [1, 9, 8] : f16 into vector<4x16x16xf16> | |
%970 = vector.extract %113[9] : f16 from vector<16xf16> | |
%971 = vector.insert %970, %969 [1, 9, 9] : f16 into vector<4x16x16xf16> | |
%972 = vector.extract %114[9] : f16 from vector<16xf16> | |
%973 = vector.insert %972, %971 [1, 9, 10] : f16 into vector<4x16x16xf16> | |
%974 = vector.extract %115[9] : f16 from vector<16xf16> | |
%975 = vector.insert %974, %973 [1, 9, 11] : f16 into vector<4x16x16xf16> | |
%976 = vector.extract %116[9] : f16 from vector<16xf16> | |
%977 = vector.insert %976, %975 [1, 9, 12] : f16 into vector<4x16x16xf16> | |
%978 = vector.extract %117[9] : f16 from vector<16xf16> | |
%979 = vector.insert %978, %977 [1, 9, 13] : f16 into vector<4x16x16xf16> | |
%980 = vector.extract %118[9] : f16 from vector<16xf16> | |
%981 = vector.insert %980, %979 [1, 9, 14] : f16 into vector<4x16x16xf16> | |
%982 = vector.extract %119[9] : f16 from vector<16xf16> | |
%983 = vector.insert %982, %981 [1, 9, 15] : f16 into vector<4x16x16xf16> | |
%984 = vector.extract %104[10] : f16 from vector<16xf16> | |
%985 = vector.insert %984, %983 [1, 10, 0] : f16 into vector<4x16x16xf16> | |
%986 = vector.extract %105[10] : f16 from vector<16xf16> | |
%987 = vector.insert %986, %985 [1, 10, 1] : f16 into vector<4x16x16xf16> | |
%988 = vector.extract %106[10] : f16 from vector<16xf16> | |
%989 = vector.insert %988, %987 [1, 10, 2] : f16 into vector<4x16x16xf16> | |
%990 = vector.extract %107[10] : f16 from vector<16xf16> | |
%991 = vector.insert %990, %989 [1, 10, 3] : f16 into vector<4x16x16xf16> | |
%992 = vector.extract %108[10] : f16 from vector<16xf16> | |
%993 = vector.insert %992, %991 [1, 10, 4] : f16 into vector<4x16x16xf16> | |
%994 = vector.extract %109[10] : f16 from vector<16xf16> | |
%995 = vector.insert %994, %993 [1, 10, 5] : f16 into vector<4x16x16xf16> | |
%996 = vector.extract %110[10] : f16 from vector<16xf16> | |
%997 = vector.insert %996, %995 [1, 10, 6] : f16 into vector<4x16x16xf16> | |
%998 = vector.extract %111[10] : f16 from vector<16xf16> | |
%999 = vector.insert %998, %997 [1, 10, 7] : f16 into vector<4x16x16xf16> | |
%1000 = vector.extract %112[10] : f16 from vector<16xf16> | |
%1001 = vector.insert %1000, %999 [1, 10, 8] : f16 into vector<4x16x16xf16> | |
%1002 = vector.extract %113[10] : f16 from vector<16xf16> | |
%1003 = vector.insert %1002, %1001 [1, 10, 9] : f16 into vector<4x16x16xf16> | |
%1004 = vector.extract %114[10] : f16 from vector<16xf16> | |
%1005 = vector.insert %1004, %1003 [1, 10, 10] : f16 into vector<4x16x16xf16> | |
%1006 = vector.extract %115[10] : f16 from vector<16xf16> | |
%1007 = vector.insert %1006, %1005 [1, 10, 11] : f16 into vector<4x16x16xf16> | |
%1008 = vector.extract %116[10] : f16 from vector<16xf16> | |
%1009 = vector.insert %1008, %1007 [1, 10, 12] : f16 into vector<4x16x16xf16> | |
%1010 = vector.extract %117[10] : f16 from vector<16xf16> | |
%1011 = vector.insert %1010, %1009 [1, 10, 13] : f16 into vector<4x16x16xf16> | |
%1012 = vector.extract %118[10] : f16 from vector<16xf16> | |
%1013 = vector.insert %1012, %1011 [1, 10, 14] : f16 into vector<4x16x16xf16> | |
%1014 = vector.extract %119[10] : f16 from vector<16xf16> | |
%1015 = vector.insert %1014, %1013 [1, 10, 15] : f16 into vector<4x16x16xf16> | |
%1016 = vector.extract %104[11] : f16 from vector<16xf16> | |
%1017 = vector.insert %1016, %1015 [1, 11, 0] : f16 into vector<4x16x16xf16> | |
%1018 = vector.extract %105[11] : f16 from vector<16xf16> | |
%1019 = vector.insert %1018, %1017 [1, 11, 1] : f16 into vector<4x16x16xf16> | |
%1020 = vector.extract %106[11] : f16 from vector<16xf16> | |
%1021 = vector.insert %1020, %1019 [1, 11, 2] : f16 into vector<4x16x16xf16> | |
%1022 = vector.extract %107[11] : f16 from vector<16xf16> | |
%1023 = vector.insert %1022, %1021 [1, 11, 3] : f16 into vector<4x16x16xf16> | |
%1024 = vector.extract %108[11] : f16 from vector<16xf16> | |
%1025 = vector.insert %1024, %1023 [1, 11, 4] : f16 into vector<4x16x16xf16> | |
%1026 = vector.extract %109[11] : f16 from vector<16xf16> | |
%1027 = vector.insert %1026, %1025 [1, 11, 5] : f16 into vector<4x16x16xf16> | |
%1028 = vector.extract %110[11] : f16 from vector<16xf16> | |
%1029 = vector.insert %1028, %1027 [1, 11, 6] : f16 into vector<4x16x16xf16> | |
%1030 = vector.extract %111[11] : f16 from vector<16xf16> | |
%1031 = vector.insert %1030, %1029 [1, 11, 7] : f16 into vector<4x16x16xf16> | |
%1032 = vector.extract %112[11] : f16 from vector<16xf16> | |
%1033 = vector.insert %1032, %1031 [1, 11, 8] : f16 into vector<4x16x16xf16> | |
%1034 = vector.extract %113[11] : f16 from vector<16xf16> | |
%1035 = vector.insert %1034, %1033 [1, 11, 9] : f16 into vector<4x16x16xf16> | |
%1036 = vector.extract %114[11] : f16 from vector<16xf16> | |
%1037 = vector.insert %1036, %1035 [1, 11, 10] : f16 into vector<4x16x16xf16> | |
%1038 = vector.extract %115[11] : f16 from vector<16xf16> | |
%1039 = vector.insert %1038, %1037 [1, 11, 11] : f16 into vector<4x16x16xf16> | |
%1040 = vector.extract %116[11] : f16 from vector<16xf16> | |
%1041 = vector.insert %1040, %1039 [1, 11, 12] : f16 into vector<4x16x16xf16> | |
%1042 = vector.extract %117[11] : f16 from vector<16xf16> | |
%1043 = vector.insert %1042, %1041 [1, 11, 13] : f16 into vector<4x16x16xf16> | |
%1044 = vector.extract %118[11] : f16 from vector<16xf16> | |
%1045 = vector.insert %1044, %1043 [1, 11, 14] : f16 into vector<4x16x16xf16> | |
%1046 = vector.extract %119[11] : f16 from vector<16xf16> | |
%1047 = vector.insert %1046, %1045 [1, 11, 15] : f16 into vector<4x16x16xf16> | |
%1048 = vector.extract %104[12] : f16 from vector<16xf16> | |
%1049 = vector.insert %1048, %1047 [1, 12, 0] : f16 into vector<4x16x16xf16> | |
%1050 = vector.extract %105[12] : f16 from vector<16xf16> | |
%1051 = vector.insert %1050, %1049 [1, 12, 1] : f16 into vector<4x16x16xf16> | |
%1052 = vector.extract %106[12] : f16 from vector<16xf16> | |
%1053 = vector.insert %1052, %1051 [1, 12, 2] : f16 into vector<4x16x16xf16> | |
%1054 = vector.extract %107[12] : f16 from vector<16xf16> | |
%1055 = vector.insert %1054, %1053 [1, 12, 3] : f16 into vector<4x16x16xf16> | |
%1056 = vector.extract %108[12] : f16 from vector<16xf16> | |
%1057 = vector.insert %1056, %1055 [1, 12, 4] : f16 into vector<4x16x16xf16> | |
%1058 = vector.extract %109[12] : f16 from vector<16xf16> | |
%1059 = vector.insert %1058, %1057 [1, 12, 5] : f16 into vector<4x16x16xf16> | |
%1060 = vector.extract %110[12] : f16 from vector<16xf16> | |
%1061 = vector.insert %1060, %1059 [1, 12, 6] : f16 into vector<4x16x16xf16> | |
%1062 = vector.extract %111[12] : f16 from vector<16xf16> | |
%1063 = vector.insert %1062, %1061 [1, 12, 7] : f16 into vector<4x16x16xf16> | |
%1064 = vector.extract %112[12] : f16 from vector<16xf16> | |
%1065 = vector.insert %1064, %1063 [1, 12, 8] : f16 into vector<4x16x16xf16> | |
%1066 = vector.extract %113[12] : f16 from vector<16xf16> | |
%1067 = vector.insert %1066, %1065 [1, 12, 9] : f16 into vector<4x16x16xf16> | |
%1068 = vector.extract %114[12] : f16 from vector<16xf16> | |
%1069 = vector.insert %1068, %1067 [1, 12, 10] : f16 into vector<4x16x16xf16> | |
%1070 = vector.extract %115[12] : f16 from vector<16xf16> | |
%1071 = vector.insert %1070, %1069 [1, 12, 11] : f16 into vector<4x16x16xf16> | |
%1072 = vector.extract %116[12] : f16 from vector<16xf16> | |
%1073 = vector.insert %1072, %1071 [1, 12, 12] : f16 into vector<4x16x16xf16> | |
%1074 = vector.extract %117[12] : f16 from vector<16xf16> | |
%1075 = vector.insert %1074, %1073 [1, 12, 13] : f16 into vector<4x16x16xf16> | |
%1076 = vector.extract %118[12] : f16 from vector<16xf16> | |
%1077 = vector.insert %1076, %1075 [1, 12, 14] : f16 into vector<4x16x16xf16> | |
%1078 = vector.extract %119[12] : f16 from vector<16xf16> | |
%1079 = vector.insert %1078, %1077 [1, 12, 15] : f16 into vector<4x16x16xf16> | |
%1080 = vector.extract %104[13] : f16 from vector<16xf16> | |
%1081 = vector.insert %1080, %1079 [1, 13, 0] : f16 into vector<4x16x16xf16> | |
%1082 = vector.extract %105[13] : f16 from vector<16xf16> | |
%1083 = vector.insert %1082, %1081 [1, 13, 1] : f16 into vector<4x16x16xf16> | |
%1084 = vector.extract %106[13] : f16 from vector<16xf16> | |
%1085 = vector.insert %1084, %1083 [1, 13, 2] : f16 into vector<4x16x16xf16> | |
%1086 = vector.extract %107[13] : f16 from vector<16xf16> | |
%1087 = vector.insert %1086, %1085 [1, 13, 3] : f16 into vector<4x16x16xf16> | |
%1088 = vector.extract %108[13] : f16 from vector<16xf16> | |
%1089 = vector.insert %1088, %1087 [1, 13, 4] : f16 into vector<4x16x16xf16> | |
%1090 = vector.extract %109[13] : f16 from vector<16xf16> | |
%1091 = vector.insert %1090, %1089 [1, 13, 5] : f16 into vector<4x16x16xf16> | |
%1092 = vector.extract %110[13] : f16 from vector<16xf16> | |
%1093 = vector.insert %1092, %1091 [1, 13, 6] : f16 into vector<4x16x16xf16> | |
%1094 = vector.extract %111[13] : f16 from vector<16xf16> | |
%1095 = vector.insert %1094, %1093 [1, 13, 7] : f16 into vector<4x16x16xf16> | |
%1096 = vector.extract %112[13] : f16 from vector<16xf16> | |
%1097 = vector.insert %1096, %1095 [1, 13, 8] : f16 into vector<4x16x16xf16> | |
%1098 = vector.extract %113[13] : f16 from vector<16xf16> | |
%1099 = vector.insert %1098, %1097 [1, 13, 9] : f16 into vector<4x16x16xf16> | |
%1100 = vector.extract %114[13] : f16 from vector<16xf16> | |
%1101 = vector.insert %1100, %1099 [1, 13, 10] : f16 into vector<4x16x16xf16> | |
%1102 = vector.extract %115[13] : f16 from vector<16xf16> | |
%1103 = vector.insert %1102, %1101 [1, 13, 11] : f16 into vector<4x16x16xf16> | |
%1104 = vector.extract %116[13] : f16 from vector<16xf16> | |
%1105 = vector.insert %1104, %1103 [1, 13, 12] : f16 into vector<4x16x16xf16> | |
%1106 = vector.extract %117[13] : f16 from vector<16xf16> | |
%1107 = vector.insert %1106, %1105 [1, 13, 13] : f16 into vector<4x16x16xf16> | |
%1108 = vector.extract %118[13] : f16 from vector<16xf16> | |
%1109 = vector.insert %1108, %1107 [1, 13, 14] : f16 into vector<4x16x16xf16> | |
%1110 = vector.extract %119[13] : f16 from vector<16xf16> | |
%1111 = vector.insert %1110, %1109 [1, 13, 15] : f16 into vector<4x16x16xf16> | |
%1112 = vector.extract %104[14] : f16 from vector<16xf16> | |
%1113 = vector.insert %1112, %1111 [1, 14, 0] : f16 into vector<4x16x16xf16> | |
%1114 = vector.extract %105[14] : f16 from vector<16xf16> | |
%1115 = vector.insert %1114, %1113 [1, 14, 1] : f16 into vector<4x16x16xf16> | |
%1116 = vector.extract %106[14] : f16 from vector<16xf16> | |
%1117 = vector.insert %1116, %1115 [1, 14, 2] : f16 into vector<4x16x16xf16> | |
%1118 = vector.extract %107[14] : f16 from vector<16xf16> | |
%1119 = vector.insert %1118, %1117 [1, 14, 3] : f16 into vector<4x16x16xf16> | |
%1120 = vector.extract %108[14] : f16 from vector<16xf16> | |
%1121 = vector.insert %1120, %1119 [1, 14, 4] : f16 into vector<4x16x16xf16> | |
%1122 = vector.extract %109[14] : f16 from vector<16xf16> | |
%1123 = vector.insert %1122, %1121 [1, 14, 5] : f16 into vector<4x16x16xf16> | |
%1124 = vector.extract %110[14] : f16 from vector<16xf16> | |
%1125 = vector.insert %1124, %1123 [1, 14, 6] : f16 into vector<4x16x16xf16> | |
%1126 = vector.extract %111[14] : f16 from vector<16xf16> | |
%1127 = vector.insert %1126, %1125 [1, 14, 7] : f16 into vector<4x16x16xf16> | |
%1128 = vector.extract %112[14] : f16 from vector<16xf16> | |
%1129 = vector.insert %1128, %1127 [1, 14, 8] : f16 into vector<4x16x16xf16> | |
%1130 = vector.extract %113[14] : f16 from vector<16xf16> | |
%1131 = vector.insert %1130, %1129 [1, 14, 9] : f16 into vector<4x16x16xf16> | |
%1132 = vector.extract %114[14] : f16 from vector<16xf16> | |
%1133 = vector.insert %1132, %1131 [1, 14, 10] : f16 into vector<4x16x16xf16> | |
%1134 = vector.extract %115[14] : f16 from vector<16xf16> | |
%1135 = vector.insert %1134, %1133 [1, 14, 11] : f16 into vector<4x16x16xf16> | |
%1136 = vector.extract %116[14] : f16 from vector<16xf16> | |
%1137 = vector.insert %1136, %1135 [1, 14, 12] : f16 into vector<4x16x16xf16> | |
%1138 = vector.extract %117[14] : f16 from vector<16xf16> | |
%1139 = vector.insert %1138, %1137 [1, 14, 13] : f16 into vector<4x16x16xf16> | |
%1140 = vector.extract %118[14] : f16 from vector<16xf16> | |
%1141 = vector.insert %1140, %1139 [1, 14, 14] : f16 into vector<4x16x16xf16> | |
%1142 = vector.extract %119[14] : f16 from vector<16xf16> | |
%1143 = vector.insert %1142, %1141 [1, 14, 15] : f16 into vector<4x16x16xf16> | |
%1144 = vector.extract %104[15] : f16 from vector<16xf16> | |
%1145 = vector.insert %1144, %1143 [1, 15, 0] : f16 into vector<4x16x16xf16> | |
%1146 = vector.extract %105[15] : f16 from vector<16xf16> | |
%1147 = vector.insert %1146, %1145 [1, 15, 1] : f16 into vector<4x16x16xf16> | |
%1148 = vector.extract %106[15] : f16 from vector<16xf16> | |
%1149 = vector.insert %1148, %1147 [1, 15, 2] : f16 into vector<4x16x16xf16> | |
%1150 = vector.extract %107[15] : f16 from vector<16xf16> | |
%1151 = vector.insert %1150, %1149 [1, 15, 3] : f16 into vector<4x16x16xf16> | |
%1152 = vector.extract %108[15] : f16 from vector<16xf16> | |
%1153 = vector.insert %1152, %1151 [1, 15, 4] : f16 into vector<4x16x16xf16> | |
%1154 = vector.extract %109[15] : f16 from vector<16xf16> | |
%1155 = vector.insert %1154, %1153 [1, 15, 5] : f16 into vector<4x16x16xf16> | |
%1156 = vector.extract %110[15] : f16 from vector<16xf16> | |
%1157 = vector.insert %1156, %1155 [1, 15, 6] : f16 into vector<4x16x16xf16> | |
%1158 = vector.extract %111[15] : f16 from vector<16xf16> | |
%1159 = vector.insert %1158, %1157 [1, 15, 7] : f16 into vector<4x16x16xf16> | |
%1160 = vector.extract %112[15] : f16 from vector<16xf16> | |
%1161 = vector.insert %1160, %1159 [1, 15, 8] : f16 into vector<4x16x16xf16> | |
%1162 = vector.extract %113[15] : f16 from vector<16xf16> | |
%1163 = vector.insert %1162, %1161 [1, 15, 9] : f16 into vector<4x16x16xf16> | |
%1164 = vector.extract %114[15] : f16 from vector<16xf16> | |
%1165 = vector.insert %1164, %1163 [1, 15, 10] : f16 into vector<4x16x16xf16> | |
%1166 = vector.extract %115[15] : f16 from vector<16xf16> | |
%1167 = vector.insert %1166, %1165 [1, 15, 11] : f16 into vector<4x16x16xf16> | |
%1168 = vector.extract %116[15] : f16 from vector<16xf16> | |
%1169 = vector.insert %1168, %1167 [1, 15, 12] : f16 into vector<4x16x16xf16> | |
%1170 = vector.extract %117[15] : f16 from vector<16xf16> | |
%1171 = vector.insert %1170, %1169 [1, 15, 13] : f16 into vector<4x16x16xf16> | |
%1172 = vector.extract %118[15] : f16 from vector<16xf16> | |
%1173 = vector.insert %1172, %1171 [1, 15, 14] : f16 into vector<4x16x16xf16> | |
%1174 = vector.extract %119[15] : f16 from vector<16xf16> | |
%1175 = vector.insert %1174, %1173 [1, 15, 15] : f16 into vector<4x16x16xf16> | |
%1176 = vector.extract %120[0] : f16 from vector<16xf16> | |
%1177 = vector.insert %1176, %1175 [2, 0, 0] : f16 into vector<4x16x16xf16> | |
%1178 = vector.extract %121[0] : f16 from vector<16xf16> | |
%1179 = vector.insert %1178, %1177 [2, 0, 1] : f16 into vector<4x16x16xf16> | |
%1180 = vector.extract %122[0] : f16 from vector<16xf16> | |
%1181 = vector.insert %1180, %1179 [2, 0, 2] : f16 into vector<4x16x16xf16> | |
%1182 = vector.extract %123[0] : f16 from vector<16xf16> | |
%1183 = vector.insert %1182, %1181 [2, 0, 3] : f16 into vector<4x16x16xf16> | |
%1184 = vector.extract %124[0] : f16 from vector<16xf16> | |
%1185 = vector.insert %1184, %1183 [2, 0, 4] : f16 into vector<4x16x16xf16> | |
%1186 = vector.extract %125[0] : f16 from vector<16xf16> | |
%1187 = vector.insert %1186, %1185 [2, 0, 5] : f16 into vector<4x16x16xf16> | |
%1188 = vector.extract %126[0] : f16 from vector<16xf16> | |
%1189 = vector.insert %1188, %1187 [2, 0, 6] : f16 into vector<4x16x16xf16> | |
%1190 = vector.extract %127[0] : f16 from vector<16xf16> | |
%1191 = vector.insert %1190, %1189 [2, 0, 7] : f16 into vector<4x16x16xf16> | |
%1192 = vector.extract %128[0] : f16 from vector<16xf16> | |
%1193 = vector.insert %1192, %1191 [2, 0, 8] : f16 into vector<4x16x16xf16> | |
%1194 = vector.extract %129[0] : f16 from vector<16xf16> | |
%1195 = vector.insert %1194, %1193 [2, 0, 9] : f16 into vector<4x16x16xf16> | |
%1196 = vector.extract %130[0] : f16 from vector<16xf16> | |
%1197 = vector.insert %1196, %1195 [2, 0, 10] : f16 into vector<4x16x16xf16> | |
%1198 = vector.extract %131[0] : f16 from vector<16xf16> | |
%1199 = vector.insert %1198, %1197 [2, 0, 11] : f16 into vector<4x16x16xf16> | |
%1200 = vector.extract %132[0] : f16 from vector<16xf16> | |
%1201 = vector.insert %1200, %1199 [2, 0, 12] : f16 into vector<4x16x16xf16> | |
%1202 = vector.extract %133[0] : f16 from vector<16xf16> | |
%1203 = vector.insert %1202, %1201 [2, 0, 13] : f16 into vector<4x16x16xf16> | |
%1204 = vector.extract %134[0] : f16 from vector<16xf16> | |
%1205 = vector.insert %1204, %1203 [2, 0, 14] : f16 into vector<4x16x16xf16> | |
%1206 = vector.extract %135[0] : f16 from vector<16xf16> | |
%1207 = vector.insert %1206, %1205 [2, 0, 15] : f16 into vector<4x16x16xf16> | |
%1208 = vector.extract %120[1] : f16 from vector<16xf16> | |
%1209 = vector.insert %1208, %1207 [2, 1, 0] : f16 into vector<4x16x16xf16> | |
%1210 = vector.extract %121[1] : f16 from vector<16xf16> | |
%1211 = vector.insert %1210, %1209 [2, 1, 1] : f16 into vector<4x16x16xf16> | |
%1212 = vector.extract %122[1] : f16 from vector<16xf16> | |
%1213 = vector.insert %1212, %1211 [2, 1, 2] : f16 into vector<4x16x16xf16> | |
%1214 = vector.extract %123[1] : f16 from vector<16xf16> | |
%1215 = vector.insert %1214, %1213 [2, 1, 3] : f16 into vector<4x16x16xf16> | |
%1216 = vector.extract %124[1] : f16 from vector<16xf16> | |
%1217 = vector.insert %1216, %1215 [2, 1, 4] : f16 into vector<4x16x16xf16> | |
%1218 = vector.extract %125[1] : f16 from vector<16xf16> | |
%1219 = vector.insert %1218, %1217 [2, 1, 5] : f16 into vector<4x16x16xf16> | |
%1220 = vector.extract %126[1] : f16 from vector<16xf16> | |
%1221 = vector.insert %1220, %1219 [2, 1, 6] : f16 into vector<4x16x16xf16> | |
%1222 = vector.extract %127[1] : f16 from vector<16xf16> | |
%1223 = vector.insert %1222, %1221 [2, 1, 7] : f16 into vector<4x16x16xf16> | |
%1224 = vector.extract %128[1] : f16 from vector<16xf16> | |
%1225 = vector.insert %1224, %1223 [2, 1, 8] : f16 into vector<4x16x16xf16> | |
%1226 = vector.extract %129[1] : f16 from vector<16xf16> | |
%1227 = vector.insert %1226, %1225 [2, 1, 9] : f16 into vector<4x16x16xf16> | |
%1228 = vector.extract %130[1] : f16 from vector<16xf16> | |
%1229 = vector.insert %1228, %1227 [2, 1, 10] : f16 into vector<4x16x16xf16> | |
%1230 = vector.extract %131[1] : f16 from vector<16xf16> | |
%1231 = vector.insert %1230, %1229 [2, 1, 11] : f16 into vector<4x16x16xf16> | |
%1232 = vector.extract %132[1] : f16 from vector<16xf16> | |
%1233 = vector.insert %1232, %1231 [2, 1, 12] : f16 into vector<4x16x16xf16> | |
%1234 = vector.extract %133[1] : f16 from vector<16xf16> | |
%1235 = vector.insert %1234, %1233 [2, 1, 13] : f16 into vector<4x16x16xf16> | |
%1236 = vector.extract %134[1] : f16 from vector<16xf16> | |
%1237 = vector.insert %1236, %1235 [2, 1, 14] : f16 into vector<4x16x16xf16> | |
%1238 = vector.extract %135[1] : f16 from vector<16xf16> | |
%1239 = vector.insert %1238, %1237 [2, 1, 15] : f16 into vector<4x16x16xf16> | |
%1240 = vector.extract %120[2] : f16 from vector<16xf16> | |
%1241 = vector.insert %1240, %1239 [2, 2, 0] : f16 into vector<4x16x16xf16> | |
%1242 = vector.extract %121[2] : f16 from vector<16xf16> | |
%1243 = vector.insert %1242, %1241 [2, 2, 1] : f16 into vector<4x16x16xf16> | |
%1244 = vector.extract %122[2] : f16 from vector<16xf16> | |
%1245 = vector.insert %1244, %1243 [2, 2, 2] : f16 into vector<4x16x16xf16> | |
%1246 = vector.extract %123[2] : f16 from vector<16xf16> | |
%1247 = vector.insert %1246, %1245 [2, 2, 3] : f16 into vector<4x16x16xf16> | |
%1248 = vector.extract %124[2] : f16 from vector<16xf16> | |
%1249 = vector.insert %1248, %1247 [2, 2, 4] : f16 into vector<4x16x16xf16> | |
%1250 = vector.extract %125[2] : f16 from vector<16xf16> | |
%1251 = vector.insert %1250, %1249 [2, 2, 5] : f16 into vector<4x16x16xf16> | |
%1252 = vector.extract %126[2] : f16 from vector<16xf16> | |
%1253 = vector.insert %1252, %1251 [2, 2, 6] : f16 into vector<4x16x16xf16> | |
%1254 = vector.extract %127[2] : f16 from vector<16xf16> | |
%1255 = vector.insert %1254, %1253 [2, 2, 7] : f16 into vector<4x16x16xf16> | |
%1256 = vector.extract %128[2] : f16 from vector<16xf16> | |
%1257 = vector.insert %1256, %1255 [2, 2, 8] : f16 into vector<4x16x16xf16> | |
%1258 = vector.extract %129[2] : f16 from vector<16xf16> | |
%1259 = vector.insert %1258, %1257 [2, 2, 9] : f16 into vector<4x16x16xf16> | |
%1260 = vector.extract %130[2] : f16 from vector<16xf16> | |
%1261 = vector.insert %1260, %1259 [2, 2, 10] : f16 into vector<4x16x16xf16> | |
%1262 = vector.extract %131[2] : f16 from vector<16xf16> | |
%1263 = vector.insert %1262, %1261 [2, 2, 11] : f16 into vector<4x16x16xf16> | |
%1264 = vector.extract %132[2] : f16 from vector<16xf16> | |
%1265 = vector.insert %1264, %1263 [2, 2, 12] : f16 into vector<4x16x16xf16> | |
%1266 = vector.extract %133[2] : f16 from vector<16xf16> | |
%1267 = vector.insert %1266, %1265 [2, 2, 13] : f16 into vector<4x16x16xf16> | |
%1268 = vector.extract %134[2] : f16 from vector<16xf16> | |
%1269 = vector.insert %1268, %1267 [2, 2, 14] : f16 into vector<4x16x16xf16> | |
%1270 = vector.extract %135[2] : f16 from vector<16xf16> | |
%1271 = vector.insert %1270, %1269 [2, 2, 15] : f16 into vector<4x16x16xf16> | |
%1272 = vector.extract %120[3] : f16 from vector<16xf16> | |
%1273 = vector.insert %1272, %1271 [2, 3, 0] : f16 into vector<4x16x16xf16> | |
%1274 = vector.extract %121[3] : f16 from vector<16xf16> | |
%1275 = vector.insert %1274, %1273 [2, 3, 1] : f16 into vector<4x16x16xf16> | |
%1276 = vector.extract %122[3] : f16 from vector<16xf16> | |
%1277 = vector.insert %1276, %1275 [2, 3, 2] : f16 into vector<4x16x16xf16> | |
%1278 = vector.extract %123[3] : f16 from vector<16xf16> | |
%1279 = vector.insert %1278, %1277 [2, 3, 3] : f16 into vector<4x16x16xf16> | |
%1280 = vector.extract %124[3] : f16 from vector<16xf16> | |
%1281 = vector.insert %1280, %1279 [2, 3, 4] : f16 into vector<4x16x16xf16> | |
%1282 = vector.extract %125[3] : f16 from vector<16xf16> | |
%1283 = vector.insert %1282, %1281 [2, 3, 5] : f16 into vector<4x16x16xf16> | |
%1284 = vector.extract %126[3] : f16 from vector<16xf16> | |
%1285 = vector.insert %1284, %1283 [2, 3, 6] : f16 into vector<4x16x16xf16> | |
%1286 = vector.extract %127[3] : f16 from vector<16xf16> | |
%1287 = vector.insert %1286, %1285 [2, 3, 7] : f16 into vector<4x16x16xf16> | |
%1288 = vector.extract %128[3] : f16 from vector<16xf16> | |
%1289 = vector.insert %1288, %1287 [2, 3, 8] : f16 into vector<4x16x16xf16> | |
%1290 = vector.extract %129[3] : f16 from vector<16xf16> | |
%1291 = vector.insert %1290, %1289 [2, 3, 9] : f16 into vector<4x16x16xf16> | |
%1292 = vector.extract %130[3] : f16 from vector<16xf16> | |
%1293 = vector.insert %1292, %1291 [2, 3, 10] : f16 into vector<4x16x16xf16> | |
%1294 = vector.extract %131[3] : f16 from vector<16xf16> | |
%1295 = vector.insert %1294, %1293 [2, 3, 11] : f16 into vector<4x16x16xf16> | |
%1296 = vector.extract %132[3] : f16 from vector<16xf16> | |
%1297 = vector.insert %1296, %1295 [2, 3, 12] : f16 into vector<4x16x16xf16> | |
%1298 = vector.extract %133[3] : f16 from vector<16xf16> | |
%1299 = vector.insert %1298, %1297 [2, 3, 13] : f16 into vector<4x16x16xf16> | |
%1300 = vector.extract %134[3] : f16 from vector<16xf16> | |
%1301 = vector.insert %1300, %1299 [2, 3, 14] : f16 into vector<4x16x16xf16> | |
%1302 = vector.extract %135[3] : f16 from vector<16xf16> | |
%1303 = vector.insert %1302, %1301 [2, 3, 15] : f16 into vector<4x16x16xf16> | |
%1304 = vector.extract %120[4] : f16 from vector<16xf16> | |
%1305 = vector.insert %1304, %1303 [2, 4, 0] : f16 into vector<4x16x16xf16> | |
%1306 = vector.extract %121[4] : f16 from vector<16xf16> | |
%1307 = vector.insert %1306, %1305 [2, 4, 1] : f16 into vector<4x16x16xf16> | |
%1308 = vector.extract %122[4] : f16 from vector<16xf16> | |
%1309 = vector.insert %1308, %1307 [2, 4, 2] : f16 into vector<4x16x16xf16> | |
%1310 = vector.extract %123[4] : f16 from vector<16xf16> | |
%1311 = vector.insert %1310, %1309 [2, 4, 3] : f16 into vector<4x16x16xf16> | |
%1312 = vector.extract %124[4] : f16 from vector<16xf16> | |
%1313 = vector.insert %1312, %1311 [2, 4, 4] : f16 into vector<4x16x16xf16> | |
%1314 = vector.extract %125[4] : f16 from vector<16xf16> | |
%1315 = vector.insert %1314, %1313 [2, 4, 5] : f16 into vector<4x16x16xf16> | |
%1316 = vector.extract %126[4] : f16 from vector<16xf16> | |
%1317 = vector.insert %1316, %1315 [2, 4, 6] : f16 into vector<4x16x16xf16> | |
%1318 = vector.extract %127[4] : f16 from vector<16xf16> | |
%1319 = vector.insert %1318, %1317 [2, 4, 7] : f16 into vector<4x16x16xf16> | |
%1320 = vector.extract %128[4] : f16 from vector<16xf16> | |
%1321 = vector.insert %1320, %1319 [2, 4, 8] : f16 into vector<4x16x16xf16> | |
%1322 = vector.extract %129[4] : f16 from vector<16xf16> | |
%1323 = vector.insert %1322, %1321 [2, 4, 9] : f16 into vector<4x16x16xf16> | |
%1324 = vector.extract %130[4] : f16 from vector<16xf16> | |
%1325 = vector.insert %1324, %1323 [2, 4, 10] : f16 into vector<4x16x16xf16> | |
%1326 = vector.extract %131[4] : f16 from vector<16xf16> | |
%1327 = vector.insert %1326, %1325 [2, 4, 11] : f16 into vector<4x16x16xf16> | |
%1328 = vector.extract %132[4] : f16 from vector<16xf16> | |
%1329 = vector.insert %1328, %1327 [2, 4, 12] : f16 into vector<4x16x16xf16> | |
%1330 = vector.extract %133[4] : f16 from vector<16xf16> | |
%1331 = vector.insert %1330, %1329 [2, 4, 13] : f16 into vector<4x16x16xf16> | |
%1332 = vector.extract %134[4] : f16 from vector<16xf16> | |
%1333 = vector.insert %1332, %1331 [2, 4, 14] : f16 into vector<4x16x16xf16> | |
%1334 = vector.extract %135[4] : f16 from vector<16xf16> | |
%1335 = vector.insert %1334, %1333 [2, 4, 15] : f16 into vector<4x16x16xf16> | |
%1336 = vector.extract %120[5] : f16 from vector<16xf16> | |
%1337 = vector.insert %1336, %1335 [2, 5, 0] : f16 into vector<4x16x16xf16> | |
%1338 = vector.extract %121[5] : f16 from vector<16xf16> | |
%1339 = vector.insert %1338, %1337 [2, 5, 1] : f16 into vector<4x16x16xf16> | |
%1340 = vector.extract %122[5] : f16 from vector<16xf16> | |
%1341 = vector.insert %1340, %1339 [2, 5, 2] : f16 into vector<4x16x16xf16> | |
%1342 = vector.extract %123[5] : f16 from vector<16xf16> | |
%1343 = vector.insert %1342, %1341 [2, 5, 3] : f16 into vector<4x16x16xf16> | |
%1344 = vector.extract %124[5] : f16 from vector<16xf16> | |
%1345 = vector.insert %1344, %1343 [2, 5, 4] : f16 into vector<4x16x16xf16> | |
%1346 = vector.extract %125[5] : f16 from vector<16xf16> | |
%1347 = vector.insert %1346, %1345 [2, 5, 5] : f16 into vector<4x16x16xf16> | |
%1348 = vector.extract %126[5] : f16 from vector<16xf16> | |
%1349 = vector.insert %1348, %1347 [2, 5, 6] : f16 into vector<4x16x16xf16> | |
%1350 = vector.extract %127[5] : f16 from vector<16xf16> | |
%1351 = vector.insert %1350, %1349 [2, 5, 7] : f16 into vector<4x16x16xf16> | |
%1352 = vector.extract %128[5] : f16 from vector<16xf16> | |
%1353 = vector.insert %1352, %1351 [2, 5, 8] : f16 into vector<4x16x16xf16> | |
%1354 = vector.extract %129[5] : f16 from vector<16xf16> | |
%1355 = vector.insert %1354, %1353 [2, 5, 9] : f16 into vector<4x16x16xf16> | |
%1356 = vector.extract %130[5] : f16 from vector<16xf16> | |
%1357 = vector.insert %1356, %1355 [2, 5, 10] : f16 into vector<4x16x16xf16> | |
%1358 = vector.extract %131[5] : f16 from vector<16xf16> | |
%1359 = vector.insert %1358, %1357 [2, 5, 11] : f16 into vector<4x16x16xf16> | |
%1360 = vector.extract %132[5] : f16 from vector<16xf16> | |
%1361 = vector.insert %1360, %1359 [2, 5, 12] : f16 into vector<4x16x16xf16> | |
%1362 = vector.extract %133[5] : f16 from vector<16xf16> | |
%1363 = vector.insert %1362, %1361 [2, 5, 13] : f16 into vector<4x16x16xf16> | |
%1364 = vector.extract %134[5] : f16 from vector<16xf16> | |
%1365 = vector.insert %1364, %1363 [2, 5, 14] : f16 into vector<4x16x16xf16> | |
%1366 = vector.extract %135[5] : f16 from vector<16xf16> | |
%1367 = vector.insert %1366, %1365 [2, 5, 15] : f16 into vector<4x16x16xf16> | |
%1368 = vector.extract %120[6] : f16 from vector<16xf16> | |
%1369 = vector.insert %1368, %1367 [2, 6, 0] : f16 into vector<4x16x16xf16> | |
%1370 = vector.extract %121[6] : f16 from vector<16xf16> | |
%1371 = vector.insert %1370, %1369 [2, 6, 1] : f16 into vector<4x16x16xf16> | |
%1372 = vector.extract %122[6] : f16 from vector<16xf16> | |
%1373 = vector.insert %1372, %1371 [2, 6, 2] : f16 into vector<4x16x16xf16> | |
%1374 = vector.extract %123[6] : f16 from vector<16xf16> | |
%1375 = vector.insert %1374, %1373 [2, 6, 3] : f16 into vector<4x16x16xf16> | |
%1376 = vector.extract %124[6] : f16 from vector<16xf16> | |
%1377 = vector.insert %1376, %1375 [2, 6, 4] : f16 into vector<4x16x16xf16> | |
%1378 = vector.extract %125[6] : f16 from vector<16xf16> | |
%1379 = vector.insert %1378, %1377 [2, 6, 5] : f16 into vector<4x16x16xf16> | |
%1380 = vector.extract %126[6] : f16 from vector<16xf16> | |
%1381 = vector.insert %1380, %1379 [2, 6, 6] : f16 into vector<4x16x16xf16> | |
%1382 = vector.extract %127[6] : f16 from vector<16xf16> | |
%1383 = vector.insert %1382, %1381 [2, 6, 7] : f16 into vector<4x16x16xf16> | |
%1384 = vector.extract %128[6] : f16 from vector<16xf16> | |
%1385 = vector.insert %1384, %1383 [2, 6, 8] : f16 into vector<4x16x16xf16> | |
%1386 = vector.extract %129[6] : f16 from vector<16xf16> | |
%1387 = vector.insert %1386, %1385 [2, 6, 9] : f16 into vector<4x16x16xf16> | |
%1388 = vector.extract %130[6] : f16 from vector<16xf16> | |
%1389 = vector.insert %1388, %1387 [2, 6, 10] : f16 into vector<4x16x16xf16> | |
%1390 = vector.extract %131[6] : f16 from vector<16xf16> | |
%1391 = vector.insert %1390, %1389 [2, 6, 11] : f16 into vector<4x16x16xf16> | |
%1392 = vector.extract %132[6] : f16 from vector<16xf16> | |
%1393 = vector.insert %1392, %1391 [2, 6, 12] : f16 into vector<4x16x16xf16> | |
%1394 = vector.extract %133[6] : f16 from vector<16xf16> | |
%1395 = vector.insert %1394, %1393 [2, 6, 13] : f16 into vector<4x16x16xf16> | |
%1396 = vector.extract %134[6] : f16 from vector<16xf16> | |
%1397 = vector.insert %1396, %1395 [2, 6, 14] : f16 into vector<4x16x16xf16> | |
%1398 = vector.extract %135[6] : f16 from vector<16xf16> | |
%1399 = vector.insert %1398, %1397 [2, 6, 15] : f16 into vector<4x16x16xf16> | |
%1400 = vector.extract %120[7] : f16 from vector<16xf16> | |
%1401 = vector.insert %1400, %1399 [2, 7, 0] : f16 into vector<4x16x16xf16> | |
%1402 = vector.extract %121[7] : f16 from vector<16xf16> | |
%1403 = vector.insert %1402, %1401 [2, 7, 1] : f16 into vector<4x16x16xf16> | |
%1404 = vector.extract %122[7] : f16 from vector<16xf16> | |
%1405 = vector.insert %1404, %1403 [2, 7, 2] : f16 into vector<4x16x16xf16> | |
%1406 = vector.extract %123[7] : f16 from vector<16xf16> | |
%1407 = vector.insert %1406, %1405 [2, 7, 3] : f16 into vector<4x16x16xf16> | |
%1408 = vector.extract %124[7] : f16 from vector<16xf16> | |
%1409 = vector.insert %1408, %1407 [2, 7, 4] : f16 into vector<4x16x16xf16> | |
%1410 = vector.extract %125[7] : f16 from vector<16xf16> | |
%1411 = vector.insert %1410, %1409 [2, 7, 5] : f16 into vector<4x16x16xf16> | |
%1412 = vector.extract %126[7] : f16 from vector<16xf16> | |
%1413 = vector.insert %1412, %1411 [2, 7, 6] : f16 into vector<4x16x16xf16> | |
%1414 = vector.extract %127[7] : f16 from vector<16xf16> | |
%1415 = vector.insert %1414, %1413 [2, 7, 7] : f16 into vector<4x16x16xf16> | |
%1416 = vector.extract %128[7] : f16 from vector<16xf16> | |
%1417 = vector.insert %1416, %1415 [2, 7, 8] : f16 into vector<4x16x16xf16> | |
%1418 = vector.extract %129[7] : f16 from vector<16xf16> | |
%1419 = vector.insert %1418, %1417 [2, 7, 9] : f16 into vector<4x16x16xf16> | |
%1420 = vector.extract %130[7] : f16 from vector<16xf16> | |
%1421 = vector.insert %1420, %1419 [2, 7, 10] : f16 into vector<4x16x16xf16> | |
%1422 = vector.extract %131[7] : f16 from vector<16xf16> | |
%1423 = vector.insert %1422, %1421 [2, 7, 11] : f16 into vector<4x16x16xf16> | |
%1424 = vector.extract %132[7] : f16 from vector<16xf16> | |
%1425 = vector.insert %1424, %1423 [2, 7, 12] : f16 into vector<4x16x16xf16> | |
%1426 = vector.extract %133[7] : f16 from vector<16xf16> | |
%1427 = vector.insert %1426, %1425 [2, 7, 13] : f16 into vector<4x16x16xf16> | |
%1428 = vector.extract %134[7] : f16 from vector<16xf16> | |
%1429 = vector.insert %1428, %1427 [2, 7, 14] : f16 into vector<4x16x16xf16> | |
%1430 = vector.extract %135[7] : f16 from vector<16xf16> | |
%1431 = vector.insert %1430, %1429 [2, 7, 15] : f16 into vector<4x16x16xf16> | |
%1432 = vector.extract %120[8] : f16 from vector<16xf16> | |
%1433 = vector.insert %1432, %1431 [2, 8, 0] : f16 into vector<4x16x16xf16> | |
%1434 = vector.extract %121[8] : f16 from vector<16xf16> | |
%1435 = vector.insert %1434, %1433 [2, 8, 1] : f16 into vector<4x16x16xf16> | |
%1436 = vector.extract %122[8] : f16 from vector<16xf16> | |
%1437 = vector.insert %1436, %1435 [2, 8, 2] : f16 into vector<4x16x16xf16> | |
%1438 = vector.extract %123[8] : f16 from vector<16xf16> | |
%1439 = vector.insert %1438, %1437 [2, 8, 3] : f16 into vector<4x16x16xf16> | |
%1440 = vector.extract %124[8] : f16 from vector<16xf16> | |
%1441 = vector.insert %1440, %1439 [2, 8, 4] : f16 into vector<4x16x16xf16> | |
%1442 = vector.extract %125[8] : f16 from vector<16xf16> | |
%1443 = vector.insert %1442, %1441 [2, 8, 5] : f16 into vector<4x16x16xf16> | |
%1444 = vector.extract %126[8] : f16 from vector<16xf16> | |
%1445 = vector.insert %1444, %1443 [2, 8, 6] : f16 into vector<4x16x16xf16> | |
%1446 = vector.extract %127[8] : f16 from vector<16xf16> | |
%1447 = vector.insert %1446, %1445 [2, 8, 7] : f16 into vector<4x16x16xf16> | |
%1448 = vector.extract %128[8] : f16 from vector<16xf16> | |
%1449 = vector.insert %1448, %1447 [2, 8, 8] : f16 into vector<4x16x16xf16> | |
%1450 = vector.extract %129[8] : f16 from vector<16xf16> | |
%1451 = vector.insert %1450, %1449 [2, 8, 9] : f16 into vector<4x16x16xf16> | |
%1452 = vector.extract %130[8] : f16 from vector<16xf16> | |
%1453 = vector.insert %1452, %1451 [2, 8, 10] : f16 into vector<4x16x16xf16> | |
%1454 = vector.extract %131[8] : f16 from vector<16xf16> | |
%1455 = vector.insert %1454, %1453 [2, 8, 11] : f16 into vector<4x16x16xf16> | |
%1456 = vector.extract %132[8] : f16 from vector<16xf16> | |
%1457 = vector.insert %1456, %1455 [2, 8, 12] : f16 into vector<4x16x16xf16> | |
%1458 = vector.extract %133[8] : f16 from vector<16xf16> | |
%1459 = vector.insert %1458, %1457 [2, 8, 13] : f16 into vector<4x16x16xf16> | |
%1460 = vector.extract %134[8] : f16 from vector<16xf16> | |
%1461 = vector.insert %1460, %1459 [2, 8, 14] : f16 into vector<4x16x16xf16> | |
%1462 = vector.extract %135[8] : f16 from vector<16xf16> | |
%1463 = vector.insert %1462, %1461 [2, 8, 15] : f16 into vector<4x16x16xf16> | |
%1464 = vector.extract %120[9] : f16 from vector<16xf16> | |
%1465 = vector.insert %1464, %1463 [2, 9, 0] : f16 into vector<4x16x16xf16> | |
%1466 = vector.extract %121[9] : f16 from vector<16xf16> | |
%1467 = vector.insert %1466, %1465 [2, 9, 1] : f16 into vector<4x16x16xf16> | |
%1468 = vector.extract %122[9] : f16 from vector<16xf16> | |
%1469 = vector.insert %1468, %1467 [2, 9, 2] : f16 into vector<4x16x16xf16> | |
%1470 = vector.extract %123[9] : f16 from vector<16xf16> | |
%1471 = vector.insert %1470, %1469 [2, 9, 3] : f16 into vector<4x16x16xf16> | |
%1472 = vector.extract %124[9] : f16 from vector<16xf16> | |
%1473 = vector.insert %1472, %1471 [2, 9, 4] : f16 into vector<4x16x16xf16> | |
%1474 = vector.extract %125[9] : f16 from vector<16xf16> | |
%1475 = vector.insert %1474, %1473 [2, 9, 5] : f16 into vector<4x16x16xf16> | |
%1476 = vector.extract %126[9] : f16 from vector<16xf16> | |
%1477 = vector.insert %1476, %1475 [2, 9, 6] : f16 into vector<4x16x16xf16> | |
%1478 = vector.extract %127[9] : f16 from vector<16xf16> | |
%1479 = vector.insert %1478, %1477 [2, 9, 7] : f16 into vector<4x16x16xf16> | |
%1480 = vector.extract %128[9] : f16 from vector<16xf16> | |
%1481 = vector.insert %1480, %1479 [2, 9, 8] : f16 into vector<4x16x16xf16> | |
%1482 = vector.extract %129[9] : f16 from vector<16xf16> | |
%1483 = vector.insert %1482, %1481 [2, 9, 9] : f16 into vector<4x16x16xf16> | |
%1484 = vector.extract %130[9] : f16 from vector<16xf16> | |
%1485 = vector.insert %1484, %1483 [2, 9, 10] : f16 into vector<4x16x16xf16> | |
%1486 = vector.extract %131[9] : f16 from vector<16xf16> | |
%1487 = vector.insert %1486, %1485 [2, 9, 11] : f16 into vector<4x16x16xf16> | |
%1488 = vector.extract %132[9] : f16 from vector<16xf16> | |
%1489 = vector.insert %1488, %1487 [2, 9, 12] : f16 into vector<4x16x16xf16> | |
%1490 = vector.extract %133[9] : f16 from vector<16xf16> | |
%1491 = vector.insert %1490, %1489 [2, 9, 13] : f16 into vector<4x16x16xf16> | |
%1492 = vector.extract %134[9] : f16 from vector<16xf16> | |
%1493 = vector.insert %1492, %1491 [2, 9, 14] : f16 into vector<4x16x16xf16> | |
%1494 = vector.extract %135[9] : f16 from vector<16xf16> | |
%1495 = vector.insert %1494, %1493 [2, 9, 15] : f16 into vector<4x16x16xf16> | |
%1496 = vector.extract %120[10] : f16 from vector<16xf16> | |
%1497 = vector.insert %1496, %1495 [2, 10, 0] : f16 into vector<4x16x16xf16> | |
%1498 = vector.extract %121[10] : f16 from vector<16xf16> | |
%1499 = vector.insert %1498, %1497 [2, 10, 1] : f16 into vector<4x16x16xf16> | |
%1500 = vector.extract %122[10] : f16 from vector<16xf16> | |
%1501 = vector.insert %1500, %1499 [2, 10, 2] : f16 into vector<4x16x16xf16> | |
%1502 = vector.extract %123[10] : f16 from vector<16xf16> | |
%1503 = vector.insert %1502, %1501 [2, 10, 3] : f16 into vector<4x16x16xf16> | |
%1504 = vector.extract %124[10] : f16 from vector<16xf16> | |
%1505 = vector.insert %1504, %1503 [2, 10, 4] : f16 into vector<4x16x16xf16> | |
%1506 = vector.extract %125[10] : f16 from vector<16xf16> | |
%1507 = vector.insert %1506, %1505 [2, 10, 5] : f16 into vector<4x16x16xf16> | |
%1508 = vector.extract %126[10] : f16 from vector<16xf16> | |
%1509 = vector.insert %1508, %1507 [2, 10, 6] : f16 into vector<4x16x16xf16> | |
%1510 = vector.extract %127[10] : f16 from vector<16xf16> | |
%1511 = vector.insert %1510, %1509 [2, 10, 7] : f16 into vector<4x16x16xf16> | |
%1512 = vector.extract %128[10] : f16 from vector<16xf16> | |
%1513 = vector.insert %1512, %1511 [2, 10, 8] : f16 into vector<4x16x16xf16> | |
%1514 = vector.extract %129[10] : f16 from vector<16xf16> | |
%1515 = vector.insert %1514, %1513 [2, 10, 9] : f16 into vector<4x16x16xf16> | |
%1516 = vector.extract %130[10] : f16 from vector<16xf16> | |
%1517 = vector.insert %1516, %1515 [2, 10, 10] : f16 into vector<4x16x16xf16> | |
%1518 = vector.extract %131[10] : f16 from vector<16xf16> | |
%1519 = vector.insert %1518, %1517 [2, 10, 11] : f16 into vector<4x16x16xf16> | |
%1520 = vector.extract %132[10] : f16 from vector<16xf16> | |
%1521 = vector.insert %1520, %1519 [2, 10, 12] : f16 into vector<4x16x16xf16> | |
%1522 = vector.extract %133[10] : f16 from vector<16xf16> | |
%1523 = vector.insert %1522, %1521 [2, 10, 13] : f16 into vector<4x16x16xf16> | |
%1524 = vector.extract %134[10] : f16 from vector<16xf16> | |
%1525 = vector.insert %1524, %1523 [2, 10, 14] : f16 into vector<4x16x16xf16> | |
%1526 = vector.extract %135[10] : f16 from vector<16xf16> | |
%1527 = vector.insert %1526, %1525 [2, 10, 15] : f16 into vector<4x16x16xf16> | |
%1528 = vector.extract %120[11] : f16 from vector<16xf16> | |
%1529 = vector.insert %1528, %1527 [2, 11, 0] : f16 into vector<4x16x16xf16> | |
%1530 = vector.extract %121[11] : f16 from vector<16xf16> | |
%1531 = vector.insert %1530, %1529 [2, 11, 1] : f16 into vector<4x16x16xf16> | |
%1532 = vector.extract %122[11] : f16 from vector<16xf16> | |
%1533 = vector.insert %1532, %1531 [2, 11, 2] : f16 into vector<4x16x16xf16> | |
%1534 = vector.extract %123[11] : f16 from vector<16xf16> | |
%1535 = vector.insert %1534, %1533 [2, 11, 3] : f16 into vector<4x16x16xf16> | |
%1536 = vector.extract %124[11] : f16 from vector<16xf16> | |
%1537 = vector.insert %1536, %1535 [2, 11, 4] : f16 into vector<4x16x16xf16> | |
%1538 = vector.extract %125[11] : f16 from vector<16xf16> | |
%1539 = vector.insert %1538, %1537 [2, 11, 5] : f16 into vector<4x16x16xf16> | |
%1540 = vector.extract %126[11] : f16 from vector<16xf16> | |
%1541 = vector.insert %1540, %1539 [2, 11, 6] : f16 into vector<4x16x16xf16> | |
%1542 = vector.extract %127[11] : f16 from vector<16xf16> | |
%1543 = vector.insert %1542, %1541 [2, 11, 7] : f16 into vector<4x16x16xf16> | |
%1544 = vector.extract %128[11] : f16 from vector<16xf16> | |
%1545 = vector.insert %1544, %1543 [2, 11, 8] : f16 into vector<4x16x16xf16> | |
%1546 = vector.extract %129[11] : f16 from vector<16xf16> | |
%1547 = vector.insert %1546, %1545 [2, 11, 9] : f16 into vector<4x16x16xf16> | |
%1548 = vector.extract %130[11] : f16 from vector<16xf16> | |
%1549 = vector.insert %1548, %1547 [2, 11, 10] : f16 into vector<4x16x16xf16> | |
%1550 = vector.extract %131[11] : f16 from vector<16xf16> | |
%1551 = vector.insert %1550, %1549 [2, 11, 11] : f16 into vector<4x16x16xf16> | |
%1552 = vector.extract %132[11] : f16 from vector<16xf16> | |
%1553 = vector.insert %1552, %1551 [2, 11, 12] : f16 into vector<4x16x16xf16> | |
%1554 = vector.extract %133[11] : f16 from vector<16xf16> | |
%1555 = vector.insert %1554, %1553 [2, 11, 13] : f16 into vector<4x16x16xf16> | |
%1556 = vector.extract %134[11] : f16 from vector<16xf16> | |
%1557 = vector.insert %1556, %1555 [2, 11, 14] : f16 into vector<4x16x16xf16> | |
%1558 = vector.extract %135[11] : f16 from vector<16xf16> | |
%1559 = vector.insert %1558, %1557 [2, 11, 15] : f16 into vector<4x16x16xf16> | |
%1560 = vector.extract %120[12] : f16 from vector<16xf16> | |
%1561 = vector.insert %1560, %1559 [2, 12, 0] : f16 into vector<4x16x16xf16> | |
%1562 = vector.extract %121[12] : f16 from vector<16xf16> | |
%1563 = vector.insert %1562, %1561 [2, 12, 1] : f16 into vector<4x16x16xf16> | |
%1564 = vector.extract %122[12] : f16 from vector<16xf16> | |
%1565 = vector.insert %1564, %1563 [2, 12, 2] : f16 into vector<4x16x16xf16> | |
%1566 = vector.extract %123[12] : f16 from vector<16xf16> | |
%1567 = vector.insert %1566, %1565 [2, 12, 3] : f16 into vector<4x16x16xf16> | |
%1568 = vector.extract %124[12] : f16 from vector<16xf16> | |
%1569 = vector.insert %1568, %1567 [2, 12, 4] : f16 into vector<4x16x16xf16> | |
%1570 = vector.extract %125[12] : f16 from vector<16xf16> | |
%1571 = vector.insert %1570, %1569 [2, 12, 5] : f16 into vector<4x16x16xf16> | |
%1572 = vector.extract %126[12] : f16 from vector<16xf16> | |
%1573 = vector.insert %1572, %1571 [2, 12, 6] : f16 into vector<4x16x16xf16> | |
%1574 = vector.extract %127[12] : f16 from vector<16xf16> | |
%1575 = vector.insert %1574, %1573 [2, 12, 7] : f16 into vector<4x16x16xf16> | |
%1576 = vector.extract %128[12] : f16 from vector<16xf16> | |
%1577 = vector.insert %1576, %1575 [2, 12, 8] : f16 into vector<4x16x16xf16> | |
%1578 = vector.extract %129[12] : f16 from vector<16xf16> | |
%1579 = vector.insert %1578, %1577 [2, 12, 9] : f16 into vector<4x16x16xf16> | |
%1580 = vector.extract %130[12] : f16 from vector<16xf16> | |
%1581 = vector.insert %1580, %1579 [2, 12, 10] : f16 into vector<4x16x16xf16> | |
%1582 = vector.extract %131[12] : f16 from vector<16xf16> | |
%1583 = vector.insert %1582, %1581 [2, 12, 11] : f16 into vector<4x16x16xf16> | |
%1584 = vector.extract %132[12] : f16 from vector<16xf16> | |
%1585 = vector.insert %1584, %1583 [2, 12, 12] : f16 into vector<4x16x16xf16> | |
%1586 = vector.extract %133[12] : f16 from vector<16xf16> | |
%1587 = vector.insert %1586, %1585 [2, 12, 13] : f16 into vector<4x16x16xf16> | |
%1588 = vector.extract %134[12] : f16 from vector<16xf16> | |
%1589 = vector.insert %1588, %1587 [2, 12, 14] : f16 into vector<4x16x16xf16> | |
%1590 = vector.extract %135[12] : f16 from vector<16xf16> | |
%1591 = vector.insert %1590, %1589 [2, 12, 15] : f16 into vector<4x16x16xf16> | |
%1592 = vector.extract %120[13] : f16 from vector<16xf16> | |
%1593 = vector.insert %1592, %1591 [2, 13, 0] : f16 into vector<4x16x16xf16> | |
%1594 = vector.extract %121[13] : f16 from vector<16xf16> | |
%1595 = vector.insert %1594, %1593 [2, 13, 1] : f16 into vector<4x16x16xf16> | |
%1596 = vector.extract %122[13] : f16 from vector<16xf16> | |
%1597 = vector.insert %1596, %1595 [2, 13, 2] : f16 into vector<4x16x16xf16> | |
%1598 = vector.extract %123[13] : f16 from vector<16xf16> | |
%1599 = vector.insert %1598, %1597 [2, 13, 3] : f16 into vector<4x16x16xf16> | |
%1600 = vector.extract %124[13] : f16 from vector<16xf16> | |
%1601 = vector.insert %1600, %1599 [2, 13, 4] : f16 into vector<4x16x16xf16> | |
%1602 = vector.extract %125[13] : f16 from vector<16xf16> | |
%1603 = vector.insert %1602, %1601 [2, 13, 5] : f16 into vector<4x16x16xf16> | |
%1604 = vector.extract %126[13] : f16 from vector<16xf16> | |
%1605 = vector.insert %1604, %1603 [2, 13, 6] : f16 into vector<4x16x16xf16> | |
%1606 = vector.extract %127[13] : f16 from vector<16xf16> | |
%1607 = vector.insert %1606, %1605 [2, 13, 7] : f16 into vector<4x16x16xf16> | |
%1608 = vector.extract %128[13] : f16 from vector<16xf16> | |
%1609 = vector.insert %1608, %1607 [2, 13, 8] : f16 into vector<4x16x16xf16> | |
%1610 = vector.extract %129[13] : f16 from vector<16xf16> | |
%1611 = vector.insert %1610, %1609 [2, 13, 9] : f16 into vector<4x16x16xf16> | |
%1612 = vector.extract %130[13] : f16 from vector<16xf16> | |
%1613 = vector.insert %1612, %1611 [2, 13, 10] : f16 into vector<4x16x16xf16> | |
%1614 = vector.extract %131[13] : f16 from vector<16xf16> | |
%1615 = vector.insert %1614, %1613 [2, 13, 11] : f16 into vector<4x16x16xf16> | |
%1616 = vector.extract %132[13] : f16 from vector<16xf16> | |
%1617 = vector.insert %1616, %1615 [2, 13, 12] : f16 into vector<4x16x16xf16> | |
%1618 = vector.extract %133[13] : f16 from vector<16xf16> | |
%1619 = vector.insert %1618, %1617 [2, 13, 13] : f16 into vector<4x16x16xf16> | |
%1620 = vector.extract %134[13] : f16 from vector<16xf16> | |
%1621 = vector.insert %1620, %1619 [2, 13, 14] : f16 into vector<4x16x16xf16> | |
%1622 = vector.extract %135[13] : f16 from vector<16xf16> | |
%1623 = vector.insert %1622, %1621 [2, 13, 15] : f16 into vector<4x16x16xf16> | |
%1624 = vector.extract %120[14] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment