Created
July 23, 2025 11:22
-
-
Save marcusbuffett/013e9baab93e210a622a9e408c38e49c to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| validate checksum key=ReduceAutotuneKey - ElemInput: Float(F32), ElemOutput: Float(F32), ElemAcc: Float(F32), PotentialLineSize: 4, AxisIsContiguous: true, ReduceAxisShape: 4096, ReduceCount: 64, checksum=a14a9dbdfd906e71ae310b4829971734 | |
| 00d104e8138e13e2dd8e06 | |
| 0129d16, | |
| ), | |
| info: Some ( | |
| ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ), | |
| ), | |
| mode: Some ( | |
| Unchecked, | |
| ), | |
| type_name: "burn_cubecl: : kernel: : index: : slice_assign: : slice_assign_kernel: : SliceAssignKernel<f32, | |
| cubecl_cuda: : runtime: : CudaRuntime>", | |
| } | |
| source: | |
| ```cpp | |
| #include <cuda_runtime.h> | |
| typedef unsigned int uint; | |
| typedef unsigned char uint8; | |
| typedef unsigned short uint16; | |
| typedef unsigned int uint32; | |
| typedef unsigned long long int uint64; | |
| typedef signed char int8; | |
| typedef signed short int16; | |
| typedef signed int int32; | |
| typedef signed long long int int64; | |
| struct __align__(32) float_8 { | |
| float i_0; | |
| float i_1; | |
| float i_2; | |
| float i_3; | |
| float i_4; | |
| float i_5; | |
| float i_6; | |
| float i_7; | |
| }; | |
| struct scalars_uint32_st { | |
| uint32 x[12]; | |
| }; | |
| struct metadata_st { | |
| uint x[10]; | |
| }; | |
| extern "C" __global__ void slice_assign_kernel ( | |
| float_8* buffer_0, const float_8* __restrict__ buffer_1, const uint32* __restrict__ info, const __grid_constant__ metadata_st static_info, const __grid_constant__ scalars_uint32_st scalars_uint32 | |
| ) { | |
| uint3 absoluteIdx = make_uint3( | |
| blockIdx.x * blockDim.x + threadIdx.x, | |
| blockIdx.y * blockDim.y + threadIdx.y, | |
| blockIdx.z * blockDim.z + threadIdx.z | |
| ); | |
| uint32 idxGlobal = (absoluteIdx.z * gridDim.x * blockDim.x * gridDim.y * blockDim.y) + (absoluteIdx.y * gridDim.x * blockDim.x) + absoluteIdx.x; | |
| uint32 l_mut_3; | |
| uint32 l_mut_4; | |
| uint32 l_mut_5; | |
| const uint32 l_0 = static_info.x[uint32(3)]; | |
| const bool l_1 = idxGlobal >= l_0; | |
| if (l_1) { | |
| return;} | |
| const uint32 l_2 = idxGlobal * uint32(8); | |
| l_mut_3 = l_2; | |
| l_mut_4 = uint32(0); | |
| l_mut_5 = uint32(0); | |
| const uint32 l_6 = l_mut_3 >> scalars_uint32.x[6]; | |
| const uint32 l_7 = l_mut_3 & scalars_uint32.x[7]; | |
| const uint32 l_8 = l_7 + scalars_uint32.x[11]; | |
| const uint32 l_9 = info[static_info.x[uint32(9)] + uint32(3) - 10]; | |
| const uint32 l_10 = l_7 * l_9; | |
| l_mut_5 = l_mut_5 + l_10; | |
| const uint32 l_11 = info[static_info.x[uint32(8)] + uint32(3) - 10]; | |
| const uint32 l_12 = l_8 * l_11; | |
| l_mut_4 = l_mut_4 + l_12; | |
| l_mut_3 = l_6; | |
| const uint32 l_13 = l_mut_3 >> scalars_uint32.x[4]; | |
| const uint32 l_14 = l_mut_3 & scalars_uint32.x[5]; | |
| const uint32 l_15 = l_14 + scalars_uint32.x[10]; | |
| const uint32 l_16 = info[static_info.x[uint32(9)] + uint32(2) - 10]; | |
| const uint32 l_17 = l_14 * l_16; | |
| l_mut_5 = l_mut_5 + l_17; | |
| const uint32 l_18 = info[static_info.x[uint32(8)] + uint32(2) - 10]; | |
| const uint32 l_19 = l_15 * l_18; | |
| l_mut_4 = l_mut_4 + l_19; | |
| l_mut_3 = l_13; | |
| const uint32 l_20 = l_mut_3 >> scalars_uint32.x[2]; | |
| const uint32 l_21 = l_mut_3 & scalars_uint32.x[3]; | |
| const uint32 l_22 = l_21 + scalars_uint32.x[9]; | |
| const uint32 l_23 = info[static_info.x[uint32(9)] + uint32(1) - 10]; | |
| const uint32 l_24 = l_21 * l_23; | |
| l_mut_5 = l_mut_5 + l_24; | |
| const uint32 l_25 = info[static_info.x[uint32(8)] + uint32(1) - 10]; | |
| const uint32 l_26 = l_22 * l_25; | |
| l_mut_4 = l_mut_4 + l_26; | |
| l_mut_3 = l_20; | |
| const uint32 l_27 = l_mut_3 >> scalars_uint32.x[0]; | |
| const uint32 l_28 = l_mut_3 & scalars_uint32.x[1]; | |
| const uint32 l_29 = l_28 + scalars_uint32.x[8]; | |
| const uint32 l_30 = info[static_info.x[uint32(9)] + uint32(0) - 10]; | |
| const uint32 l_31 = l_28 * l_30; | |
| l_mut_5 = l_mut_5 + l_31; | |
| const uint32 l_32 = info[static_info.x[uint32(8)] + uint32(0) - 10]; | |
| const uint32 l_33 = l_29 * l_32; | |
| l_mut_4 = l_mut_4 + l_33; | |
| l_mut_3 = l_27; | |
| const uint32 l_34 = l_mut_4 / uint32(8); | |
| const uint32 l_35 = l_mut_5 / uint32(8); | |
| const float_8 l_36 = buffer_1[l_35]; | |
| buffer_0[l_34] = reinterpret_cast< float_8 const&>(l_36); | |
| } | |
| ``` | |
| [END_KERNEL_COMPILATION] | |
| | 222.102099ms | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 49.851µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.161µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.77µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 38.721µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.531µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.011µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.321µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.79µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.97µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.99µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.38µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.12µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.421µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.251µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.101µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.831µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.101µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.41µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 36.79µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.82µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.191µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.471µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 37.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.621µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.51µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.6µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.5µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.59µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.8µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.81µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.711µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.891µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.86µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.61µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.21µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.05µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| [START_KERNEL_COMPILATION] | |
| name: burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel< | |
| f32, | |
| cubecl_cuda::runtime::CudaRuntime, | |
| > | |
| cube_dim: (16, 16, 1) | |
| info: KernelId { | |
| type_id: TypeId ( | |
| 0xbb7da037008a4eaa774030a150129d16, | |
| ), | |
| info: Some ( | |
| ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ), | |
| ), | |
| mode: Some ( | |
| Unchecked, | |
| ), | |
| type_name: "burn_cubecl: : kernel: : index: : slice_assign: : slice_assign_kernel: : SliceAssignKernel<f32, | |
| cubecl_cuda: : runtime: : CudaRuntime>", | |
| } | |
| source: | |
| ```cpp | |
| #include <cuda_runtime.h> | |
| typedef unsigned int uint; | |
| typedef unsigned char uint8; | |
| typedef unsigned short uint16; | |
| typedef unsigned int uint32; | |
| typedef unsigned long long int uint64; | |
| typedef signed char int8; | |
| typedef signed short int16; | |
| typedef signed int int32; | |
| typedef signed long long int int64; | |
| struct __align__(32) float_8 { | |
| float i_0; | |
| float i_1; | |
| float i_2; | |
| float i_3; | |
| float i_4; | |
| float i_5; | |
| float i_6; | |
| float i_7; | |
| }; | |
| struct scalars_uint32_st { | |
| uint32 x[6]; | |
| }; | |
| struct metadata_st { | |
| uint x[10]; | |
| }; | |
| extern "C" __global__ void slice_assign_kernel ( | |
| float_8* buffer_0, const float_8* __restrict__ buffer_1, const uint32* __restrict__ info, const __grid_constant__ metadata_st static_info, const __grid_constant__ scalars_uint32_st scalars_uint32 | |
| ) { | |
| uint3 absoluteIdx = make_uint3( | |
| blockIdx.x * blockDim.x + threadIdx.x, | |
| blockIdx.y * blockDim.y + threadIdx.y, | |
| blockIdx.z * blockDim.z + threadIdx.z | |
| ); | |
| uint32 idxGlobal = (absoluteIdx.z * gridDim.x * blockDim.x * gridDim.y * blockDim.y) + (absoluteIdx.y * gridDim.x * blockDim.x) + absoluteIdx.x; | |
| uint32 l_mut_3; | |
| uint32 l_mut_4; | |
| uint32 l_mut_5; | |
| const uint32 l_0 = static_info.x[uint32(3)]; | |
| const bool l_1 = idxGlobal >= l_0; | |
| if (l_1) { | |
| return;} | |
| const uint32 l_2 = idxGlobal * uint32(8); | |
| l_mut_3 = l_2; | |
| l_mut_4 = uint32(0); | |
| l_mut_5 = uint32(0); | |
| const uint32 l_6 = l_mut_3 >> scalars_uint32.x[2]; | |
| const uint32 l_7 = l_mut_3 & scalars_uint32.x[3]; | |
| const uint32 l_8 = l_7 + scalars_uint32.x[5]; | |
| const uint32 l_9 = info[static_info.x[uint32(9)] + uint32(1) - 10]; | |
| const uint32 l_10 = l_7 * l_9; | |
| l_mut_5 = l_mut_5 + l_10; | |
| const uint32 l_11 = info[static_info.x[uint32(8)] + uint32(1) - 10]; | |
| const uint32 l_12 = l_8 * l_11; | |
| l_mut_4 = l_mut_4 + l_12; | |
| l_mut_3 = l_6; | |
| const uint32 l_13 = l_mut_3 >> scalars_uint32.x[0]; | |
| const uint32 l_14 = l_mut_3 & scalars_uint32.x[1]; | |
| const uint32 l_15 = l_14 + scalars_uint32.x[4]; | |
| const uint32 l_16 = info[static_info.x[uint32(9)] + uint32(0) - 10]; | |
| const uint32 l_17 = l_14 * l_16; | |
| l_mut_5 = l_mut_5 + l_17; | |
| const uint32 l_18 = info[static_info.x[uint32(8)] + uint32(0) - 10]; | |
| const uint32 l_19 = l_15 * l_18; | |
| l_mut_4 = l_mut_4 + l_19; | |
| l_mut_3 = l_13; | |
| const uint32 l_20 = l_mut_4 / uint32(8); | |
| const uint32 l_21 = l_mut_5 / uint32(8); | |
| const float_8 l_22 = buffer_1[l_21]; | |
| buffer_0[l_20] = reinterpret_cast< float_8 const&>(l_22); | |
| } | |
| ``` | |
| [END_KERNEL_COMPILATION] | |
| || 124.293029ms | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 49.831µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 30.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 29.12µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.021µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 32.12µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 31.401µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.081µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.561µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.76µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.421µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.841µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.961µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.88µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.63µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.861µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.661µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 26.741µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 29.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 29.461µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 29.141µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.021µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.97µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.901µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.591µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.511µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.78µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.59µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.191µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.301µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.771µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.54µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 37.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 29.011µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.491µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.181µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.03µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.7µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.701µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.891µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.58µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.22µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.661µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 25.771µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 24.971µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 25.171µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| [START_KERNEL_COMPILATION] | |
| name: burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel< | |
| i32, | |
| cubecl_cuda::runtime::CudaRuntime, | |
| > | |
| cube_dim: (16, 16, 1) | |
| info: KernelId { | |
| type_id: TypeId ( | |
| 0x69f09211cfcda605c3b12b9ee6e6f05a, | |
| ), | |
| info: Some ( | |
| ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ), | |
| ), | |
| mode: Some ( | |
| Unchecked, | |
| ), | |
| type_name: "burn_cubecl: : kernel: : index: : slice_assign: : slice_assign_kernel: : SliceAssignKernel<i32, | |
| cubecl_cuda: : runtime: : CudaRuntime>", | |
| } | |
| source: | |
| ```cpp | |
| #include <cuda_runtime.h> | |
| typedef unsigned int uint; | |
| typedef unsigned char uint8; | |
| typedef unsigned short uint16; | |
| typedef unsigned int uint32; | |
| typedef unsigned long long int uint64; | |
| typedef signed char int8; | |
| typedef signed short int16; | |
| typedef signed int int32; | |
| typedef signed long long int int64; | |
| struct scalars_uint32_st { | |
| uint32 x[6]; | |
| }; | |
| struct metadata_st { | |
| uint x[10]; | |
| }; | |
| extern "C" __global__ void slice_assign_kernel ( | |
| int32* buffer_0, const int32* __restrict__ buffer_1, const uint32* __restrict__ info, const __grid_constant__ metadata_st static_info, const __grid_constant__ scalars_uint32_st scalars_uint32 | |
| ) { | |
| uint3 absoluteIdx = make_uint3( | |
| blockIdx.x * blockDim.x + threadIdx.x, | |
| blockIdx.y * blockDim.y + threadIdx.y, | |
| blockIdx.z * blockDim.z + threadIdx.z | |
| ); | |
| uint32 idxGlobal = (absoluteIdx.z * gridDim.x * blockDim.x * gridDim.y * blockDim.y) + (absoluteIdx.y * gridDim.x * blockDim.x) + absoluteIdx.x; | |
| uint32 l_mut_3; | |
| uint32 l_mut_4; | |
| uint32 l_mut_5; | |
| const uint32 l_0 = static_info.x[uint32(3)]; | |
| const bool l_1 = idxGlobal >= l_0; | |
| if (l_1) { | |
| return;} | |
| const uint32 l_2 = idxGlobal * uint32(1); | |
| l_mut_3 = l_2; | |
| l_mut_4 = uint32(0); | |
| l_mut_5 = uint32(0); | |
| const uint32 l_6 = l_mut_3 >> scalars_uint32.x[2]; | |
| const uint32 l_7 = l_mut_3 & scalars_uint32.x[3]; | |
| const uint32 l_8 = l_7 + scalars_uint32.x[5]; | |
| const uint32 l_9 = info[static_info.x[uint32(9)] + uint32(1) - 10]; | |
| const uint32 l_10 = l_7 * l_9; | |
| l_mut_5 = l_mut_5 + l_10; | |
| const uint32 l_11 = info[static_info.x[uint32(8)] + uint32(1) - 10]; | |
| const uint32 l_12 = l_8 * l_11; | |
| l_mut_4 = l_mut_4 + l_12; | |
| l_mut_3 = l_6; | |
| const uint32 l_13 = l_mut_3 >> scalars_uint32.x[0]; | |
| const uint32 l_14 = l_mut_3 & scalars_uint32.x[1]; | |
| const uint32 l_15 = l_14 + scalars_uint32.x[4]; | |
| const uint32 l_16 = info[static_info.x[uint32(9)] + uint32(0) - 10]; | |
| const uint32 l_17 = l_14 * l_16; | |
| l_mut_5 = l_mut_5 + l_17; | |
| const uint32 l_18 = info[static_info.x[uint32(8)] + uint32(0) - 10]; | |
| const uint32 l_19 = l_15 * l_18; | |
| l_mut_4 = l_mut_4 + l_19; | |
| l_mut_3 = l_13; | |
| const uint32 l_20 = l_mut_4 / uint32(1); | |
| const uint32 l_21 = l_mut_5 / uint32(1); | |
| const int32 l_22 = buffer_1[l_21]; | |
| buffer_0[l_20] = l_22; | |
| } | |
| ``` | |
| [END_KERNEL_COMPILATION] | |
| || 130.528105ms | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 37.121µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.061µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.61µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.151µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.281µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.871µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.26µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.68µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.3µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.01µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.75µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.771µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.671µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.281µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.241µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 39.271µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.55µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.99µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.1µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.8µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.531µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.851µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.411µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.261µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.01µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.261µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.661µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.201µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.191µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.571µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.02µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.36µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.981µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.671µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.941µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.791µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.4µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.41µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.71µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.14µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.351µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.051µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.811µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.901µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.21µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.381µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.9µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.431µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.051µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.94µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.69µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.821µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.751µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.781µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.021µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.591µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.521µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.67µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.44µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.671µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.731µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.171µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.061µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.711µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.201µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.54µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.9µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.14µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.07µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.64µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.521µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.921µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.561µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.061µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.651µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 41.991µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.54µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.99µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.841µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.151µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.291µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.101µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.911µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.971µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.191µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.43µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.71µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.11µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.93µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.611µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.891µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 33.121µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.37µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 26.771µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 26.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 24.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 26.221µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 25.771µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 24.941µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 24.701µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 23.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.271µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 26.081µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 24.831µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 24.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 23.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 29.861µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 24.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.371µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.661µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 24.721µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.21µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.93µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.3µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.791µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.791µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.271µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.591µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.24µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.24µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.16µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.59µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.881µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.771µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.191µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 23.931µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.281µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.701µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.061µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.32µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.081µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.921µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.181µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.341µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.8µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.64µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.6µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.74µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| [START_KERNEL_COMPILATION] | |
| name: burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel< | |
| i32, | |
| cubecl_cuda::runtime::CudaRuntime, | |
| > | |
| cube_dim: (16, 16, 1) | |
| info: KernelId { | |
| type_id: TypeId ( | |
| 0x69f09211cfcda605c3b12b9ee6e6f05a, | |
| ), | |
| info: Some ( | |
| ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ), | |
| ), | |
| mode: Some ( | |
| Unchecked, | |
| ), | |
| type_name: "burn_cubecl: : kernel: : index: : slice_assign: : slice_assign_kernel: : SliceAssignKernel<i32, | |
| cubecl_cuda: : runtime: : CudaRuntime>", | |
| } | |
| source: | |
| ```cpp | |
| #include <cuda_runtime.h> | |
| typedef unsigned int uint; | |
| typedef unsigned char uint8; | |
| typedef unsigned short uint16; | |
| typedef unsigned int uint32; | |
| typedef unsigned long long int uint64; | |
| typedef signed char int8; | |
| typedef signed short int16; | |
| typedef signed int int32; | |
| typedef signed long long int int64; | |
| struct scalars_uint32_st { | |
| uint32 x[7]; | |
| }; | |
| struct metadata_st { | |
| uint x[10]; | |
| }; | |
| extern "C" __global__ void slice_assign_kernel ( | |
| int32* buffer_0, const int32* __restrict__ buffer_1, const uint32* __restrict__ info, const __grid_constant__ metadata_st static_info, const __grid_constant__ scalars_uint32_st scalars_uint32 | |
| ) { | |
| uint3 absoluteIdx = make_uint3( | |
| blockIdx.x * blockDim.x + threadIdx.x, | |
| blockIdx.y * blockDim.y + threadIdx.y, | |
| blockIdx.z * blockDim.z + threadIdx.z | |
| ); | |
| uint32 idxGlobal = (absoluteIdx.z * gridDim.x * blockDim.x * gridDim.y * blockDim.y) + (absoluteIdx.y * gridDim.x * blockDim.x) + absoluteIdx.x; | |
| uint32 l_mut_3; | |
| uint32 l_mut_4; | |
| uint32 l_mut_5; | |
| const uint32 l_0 = static_info.x[uint32(3)]; | |
| const bool l_1 = idxGlobal >= l_0; | |
| if (l_1) { | |
| return;} | |
| const uint32 l_2 = idxGlobal * uint32(1); | |
| l_mut_3 = l_2; | |
| l_mut_4 = uint32(0); | |
| l_mut_5 = uint32(0); | |
| const uint32 l_6 = __umulhi(l_mut_3, scalars_uint32.x[3]); | |
| const uint32 l_7 = l_6 + l_mut_3; | |
| const uint32 l_8 = l_7 >> scalars_uint32.x[4]; | |
| const uint32 l_9 = l_8 * scalars_uint32.x[2]; | |
| const uint32 l_10 = l_mut_3 - l_9; | |
| const uint32 l_11 = l_10 + scalars_uint32.x[6]; | |
| const uint32 l_12 = info[static_info.x[uint32(9)] + uint32(1) - 10]; | |
| const uint32 l_13 = l_10 * l_12; | |
| l_mut_5 = l_mut_5 + l_13; | |
| const uint32 l_14 = info[static_info.x[uint32(8)] + uint32(1) - 10]; | |
| const uint32 l_15 = l_11 * l_14; | |
| l_mut_4 = l_mut_4 + l_15; | |
| l_mut_3 = l_8; | |
| const uint32 l_16 = l_mut_3 >> scalars_uint32.x[0]; | |
| const uint32 l_17 = l_mut_3 & scalars_uint32.x[1]; | |
| const uint32 l_18 = l_17 + scalars_uint32.x[5]; | |
| const uint32 l_19 = info[static_info.x[uint32(9)] + uint32(0) - 10]; | |
| const uint32 l_20 = l_17 * l_19; | |
| l_mut_5 = l_mut_5 + l_20; | |
| const uint32 l_21 = info[static_info.x[uint32(8)] + uint32(0) - 10]; | |
| const uint32 l_22 = l_18 * l_21; | |
| l_mut_4 = l_mut_4 + l_22; | |
| l_mut_3 = l_16; | |
| const uint32 l_23 = l_mut_4 / uint32(1); | |
| const uint32 l_24 = l_mut_5 / uint32(1); | |
| const int32 l_25 = buffer_1[l_24]; | |
| buffer_0[l_23] = l_25; | |
| } | |
| ``` | |
| [END_KERNEL_COMPILATION]| | 132.590506ms | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 37.431µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.13µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.191µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.211µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.26µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.65µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.75µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.66µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.861µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.861µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.911µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.831µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.271µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.081µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.161µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.561µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.381µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 39.33µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.631µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.32µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.24µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.29µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.141µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.16µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.981µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.081µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.901µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.561µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.531µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.68µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.8µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.121µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.261µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.151µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.591µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.651µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.68µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| [START_KERNEL_COMPILATION] | |
| name: burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel< | |
| f32, | |
| cubecl_cuda::runtime::CudaRuntime, | |
| > | |
| cube_dim: (16, 16, 1) | |
| info: KernelId { | |
| type_id: TypeId ( | |
| 0xbb7da037008a4eaa774030a150129d16, | |
| ), | |
| info: Some ( | |
| ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ), | |
| ), | |
| mode: Some ( | |
| Unchecked, | |
| ), | |
| type_name: "burn_cubecl: : kernel: : index: : slice_assign: : slice_assign_kernel: : SliceAssignKernel<f32, | |
| cubecl_cuda: : runtime: : CudaRuntime>", | |
| } | |
| source: | |
| ```cpp | |
| #include <cuda_runtime.h> | |
| typedef unsigned int uint; | |
| typedef unsigned char uint8; | |
| typedef unsigned short uint16; | |
| typedef unsigned int uint32; | |
| typedef unsigned long long int uint64; | |
| typedef signed char int8; | |
| typedef signed short int16; | |
| typedef signed int int32; | |
| typedef signed long long int int64; | |
| struct scalars_uint32_st { | |
| uint32 x[7]; | |
| }; | |
| struct metadata_st { | |
| uint x[10]; | |
| }; | |
| extern "C" __global__ void slice_assign_kernel ( | |
| float* buffer_0, const float* __restrict__ buffer_1, const uint32* __restrict__ info, const __grid_constant__ metadata_st static_info, const __grid_constant__ scalars_uint32_st scalars_uint32 | |
| ) { | |
| uint3 absoluteIdx = make_uint3( | |
| blockIdx.x * blockDim.x + threadIdx.x, | |
| blockIdx.y * blockDim.y + threadIdx.y, | |
| blockIdx.z * blockDim.z + threadIdx.z | |
| ); | |
| uint32 idxGlobal = (absoluteIdx.z * gridDim.x * blockDim.x * gridDim.y * blockDim.y) + (absoluteIdx.y * gridDim.x * blockDim.x) + absoluteIdx.x; | |
| uint32 l_mut_3; | |
| uint32 l_mut_4; | |
| uint32 l_mut_5; | |
| const uint32 l_0 = static_info.x[uint32(3)]; | |
| const bool l_1 = idxGlobal >= l_0; | |
| if (l_1) { | |
| return;} | |
| const uint32 l_2 = idxGlobal * uint32(1); | |
| l_mut_3 = l_2; | |
| l_mut_4 = uint32(0); | |
| l_mut_5 = uint32(0); | |
| const uint32 l_6 = __umulhi(l_mut_3, scalars_uint32.x[3]); | |
| const uint32 l_7 = l_6 + l_mut_3; | |
| const uint32 l_8 = l_7 >> scalars_uint32.x[4]; | |
| const uint32 l_9 = l_8 * scalars_uint32.x[2]; | |
| const uint32 l_10 = l_mut_3 - l_9; | |
| const uint32 l_11 = l_10 + scalars_uint32.x[6]; | |
| const uint32 l_12 = info[static_info.x[uint32(9)] + uint32(1) - 10]; | |
| const uint32 l_13 = l_10 * l_12; | |
| l_mut_5 = l_mut_5 + l_13; | |
| const uint32 l_14 = info[static_info.x[uint32(8)] + uint32(1) - 10]; | |
| const uint32 l_15 = l_11 * l_14; | |
| l_mut_4 = l_mut_4 + l_15; | |
| l_mut_3 = l_8; | |
| const uint32 l_16 = l_mut_3 >> scalars_uint32.x[0]; | |
| const uint32 l_17 = l_mut_3 & scalars_uint32.x[1]; | |
| const uint32 l_18 = l_17 + scalars_uint32.x[5]; | |
| const uint32 l_19 = info[static_info.x[uint32(9)] + uint32(0) - 10]; | |
| const uint32 l_20 = l_17 * l_19; | |
| l_mut_5 = l_mut_5 + l_20; | |
| const uint32 l_21 = info[static_info.x[uint32(8)] + uint32(0) - 10]; | |
| const uint32 l_22 = l_18 * l_21; | |
| l_mut_4 = l_mut_4 + l_22; | |
| l_mut_3 = l_16; | |
| const uint32 l_23 = l_mut_4 / uint32(1); | |
| const uint32 l_24 = l_mut_5 / uint32(1); | |
| const float l_25 = buffer_1[l_24]; | |
| buffer_0[l_23] = l_25; | |
| } | |
| ``` | |
| [END_KERNEL_COMPILATION] | |
| || 139.990504ms | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 49.761µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.221µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 40.38µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.99µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.43µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.32µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.751µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.751µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.591µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.401µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.481µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.381µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.011µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.31µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.13µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.37µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.91µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.48µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.68µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.411µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.021µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.951µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.621µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.911µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.121µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.971µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.711µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.33µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.99µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.55µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.75µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.03µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 40.52µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.801µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.161µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.341µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.341µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.251µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.791µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.311µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.031µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.741µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.371µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 41.171µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.88µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.87µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.63µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.37µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.58µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.51µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.86µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.261µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.671µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.51µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.88µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.31µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.33µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.38µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.441µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.871µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.981µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.341µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.39µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.56µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.06µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.921µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 32.821µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.861µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.831µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.43µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.42µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.59µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.9µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.711µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.511µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.161µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.45µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.16µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.791µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.661µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.59µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.12µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.541µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.881µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.5µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 32.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.27µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.27µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.06µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.951µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.371µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.39µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.55µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.911µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.67µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.87µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.021µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.75µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.8µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.481µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.831µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.64µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.151µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.86µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.851µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.261µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.87µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.1µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.151µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.29µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.07µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.85µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.441µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.251µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.42µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.66µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.941µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 30.461µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.411µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.83µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.121µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.491µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.42µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1| 93.232µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 31.07µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 31.731µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 31.121µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 32.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 48.131µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.061µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.511µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.761µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 32.01µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 31.001µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.941µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 30.761µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.311µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.7µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 32.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 30.011µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.181µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.761µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 30.3µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.491µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.111µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.021µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.12µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.33µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.531µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.071µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.811µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.87µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.79µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.91µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.681µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 46.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 30.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.88µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 31.461µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.111µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.031µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.55µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.391µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.141µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.891µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.721µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.19µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.41µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.731µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.991µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.85µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.7µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.481µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.82µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.44µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.541µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.31µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.8µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.591µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.86µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 40.521µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.9µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.351µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.32µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.4µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.141µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.701µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.82µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.091µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.63µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.14µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.541µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.56µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.381µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.07µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.541µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.51µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.471µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.13µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.391µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.16µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.42µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.21µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.03µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.41µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.71µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.031µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.27µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.06µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.991µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.38µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.91µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.421µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.371µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.531µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.59µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.711µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.81µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 28.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.221µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.67µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.091µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.131µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 23.651µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.87µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.74µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.83µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.971µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.13µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.31µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.591µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.601µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.781µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.86µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.881µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.63µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 27.51µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.36µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.731µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.41µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.691µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.51µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.311µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.05µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 22.8µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.48µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.14µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.101µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.79µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.811µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.94µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.201µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.991µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 27.06µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.091µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.21µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.601µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.65µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.79µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.751µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.811µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.71µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.56µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.211µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.88µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.811µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.12µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.78µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.951µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.391µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.38µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.36µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.251µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.05µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.701µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.94µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.26µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.231µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.29µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.201µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.48µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.011µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.11µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.461µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.141µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.91µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.58µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.301µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.421µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.821µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 308.776µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.131µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.58µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.42µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.13µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.33µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.7µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.621µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.85µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.531µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.5µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.88µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.37µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.621µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.46µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.931µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.75µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.33µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.39µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.74µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.651µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.661µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.77µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.43µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.56µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.5µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.46µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.7µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.161µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.91µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.54µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.211µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.761µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.85µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.841µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.68µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 54.681µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.99µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.86µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.42µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.52µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.381µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.46µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.9µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.241µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.79µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.171µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.581µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.55µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.44µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.811µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.261µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.58µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.52µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.99µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.741µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.711µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.451µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.24µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.501µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.13µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.68µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.211µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.591µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.411µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.52µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.45µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.81µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.121µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.281µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.03µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.851µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.58µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.83µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 29.461µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.51µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.44µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.32µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.231µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.351µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.22µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.22µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.441µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.37µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.491µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.16µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.251µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.211µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.27µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.301µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.9µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.111µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.27µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.321µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.1µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.351µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.26µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.55µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.141µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.07µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.901µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.19µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.3µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.791µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.411µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.22µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.24µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.431µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.97µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: | 42.044302ms | cubecl_std::tensor::contiguous::into_contiguous_kernel::IntoContiguousKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| 1, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| StridedLayoutCompilationArg: : None, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (11, 10, 1) | |
| | 39.321µs | cubecl_reduce::launch::reduce_kernel::ReduceKernel<f32, f32, f32, cubecl_reduce::instructions::mixed::ReduceFn, cubecl_reduce::args::TensorArgs, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 32, | |
| y: 8, | |
| z: 1, | |
| }, | |
| ReduceParams { | |
| shared: Some ( | |
| 8, | |
| ), | |
| use_planes: true, | |
| line_size_input: 4, | |
| line_size_output: 1, | |
| line_mode: Parallel, | |
| bound_checks: true, | |
| bound_checks_inner: Mask, | |
| }, | |
| Mean, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 4, | |
| ), | |
| }, | |
| (), | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (64, 1, 1) | |
| ce: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.74µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 33.571µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 20.341µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 22.33µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.22µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.501µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.64µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.11µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.171µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.27µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.8µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.791µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.78µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.54µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.301µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.69µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.651µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.39µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.12µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.221µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.791µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.44µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.65µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.951µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.731µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.31µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.66µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.031µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 75.542µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.54µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.87µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.751µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.701µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.19µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.551µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.79µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.58µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.171µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.191µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.07µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.77µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.671µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.42µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.75µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.441µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.161µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.14µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 19.16µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.02µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.871µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.27µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.24µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.191µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.88µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.99µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.771µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.86µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.82µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.821µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.88µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.871µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.371µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.271µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.41µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.061µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.43µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.821µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.131µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.001µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.82µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.421µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.52µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.25µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.521µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.48µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.97µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.61µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.841µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.32µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.15µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.971µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.77µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.58µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.471µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 33.021µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.901µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.3µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.08µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.651µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.201µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.02µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.54µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.721µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.991µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.7µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.7µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.421µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.311µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.291µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.06µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.29µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.321µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.31µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.581µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.78µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.351µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.811µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 19.45µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.59µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.841µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.751µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 17.86µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.161µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.251µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.691µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.4µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.021µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.41µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.3µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.751µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.61µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.061µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.22µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.411µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.14µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.47µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 18.381µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (2, 1, 1) | |
| | 21.691µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.19µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.38µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.291µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.02µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.75µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.851µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.451µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.26µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.54µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.101µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.09µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.151µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.84µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.16µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.571µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.27µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.97µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.471µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.53µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.49µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.391µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.331µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.401µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.5µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.4µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.041µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.72µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.65µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.821µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.501µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.44µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.94µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.171µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.36µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.651µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.42µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.14µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.451µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.24µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.35µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.211µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.16µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.71µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.961µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<i32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 20.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.891µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.57µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 21.97µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.881µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.4µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.46µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.121µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.85µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.04µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.891µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.05µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.11µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.901µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.26µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.471µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.02µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.34µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.481µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.55µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.32µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.281µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.3µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.201µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 18.32µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.28µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.741µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.11µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.201µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.73µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.89µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.92µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.291µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.151µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.96µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.961µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.18µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.1µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.361µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.11µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.23µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 16.951µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.21µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.05µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.431µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.26µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 17.62µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : Fast { | |
| divisor: (), | |
| multiplier: (), | |
| shift_right: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 1, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 40.291µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.87µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 28.91µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 26.141µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.2µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.66µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.98µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.9µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.63µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.3µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.95µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.831µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.321µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 24.841µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 25.031µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.061µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.881µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.701µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.111µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.431µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.291µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.881µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 23.68µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
| x: 16, | |
| y: 16, | |
| z: 1, | |
| }, | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| Sequence [ | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| FastDivmodCompilationArg: : PowerOfTwo { | |
| shift: (), | |
| mask: (), | |
| }, | |
| ], | |
| Sequence [ | |
| (), | |
| (), | |
| (), | |
| (), | |
| ], | |
| TensorCompilationArg { | |
| inplace: None, | |
| vectorisation: Some ( | |
| 8, | |
| ), | |
| }, | |
| ) CubeCount (1, 1, 1) | |
| | 22.76µs | burn_cubecl::kernel::index::slice_assign::slice_assign_kernel::SliceAssignKernel<f32, cubecl_cuda::runtime::CudaRuntime>: ( | |
| CubeDim { | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment