AmosLewis · October 14, 2022 16:38 · AmosLewis · Oct 14, 2022
diff --git a/search_print7173.txt b/search_print7173.txt
 (tuner_venv) chi@alderlake:~/IREE$ python shark-tuner/minilm_example.py -model /home/chi/IREE/stable_diff_linalg.mlir -num_iters 100 -result_dir results -device vulkan -search_op conv
 The input mlir type is linalg
 Found AMD Radeon RX 5000 series device. Using rdna1-5700xt-linux
 Searching for [2, 66, 66, 4, 3, 3, 320, 64, 64, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 16], [0, 4, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 10000000.0 ms Current: 587.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 64], [0, 2, 32, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 587.0 ms Current: 0.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 8], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.61 ms Current: 0.056 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 4, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.056 ms Current: 0.322 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 20], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.056 ms Current: 0.033 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 8], [0, 16, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.051 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 2], [0, 16, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 64, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.129 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 2], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.176 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.109 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 64], [0, 32, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.073 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 8, 40], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.064 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 20], [0, 1, 16, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 4], [0, 32, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 2], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 64, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.105 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 32], [0, 4, 32, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.196 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 20], [0, 8, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.108 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 40], [0, 1, 32, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.046 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 40], [0, 8, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.346 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 8, 16], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.034 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 20], [0, 1, 16, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.246 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 8], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 64, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.047 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 2], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.157 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 4], [0, 8, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.219 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.033 ms Current: 0.023 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.157 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 8, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.064 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 32], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.78 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 4, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.033 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.032 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 16, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.036 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 8], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.034 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 40], [0, 16, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.217 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.038 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 10], [0, 4, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.149 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.023 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 8], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.058 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 4], [0, 2, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.135 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 20], [0, 4, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.307 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.071 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 64, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 64, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.036 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.026 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 20], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.045 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.023 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 16, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.093 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.063 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 20], [0, 2, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.287 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.028 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 64, 8], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.052 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 8], [0, 8, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.216 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 40], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.038 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 20], [0, 32, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 64, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 1.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 32], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.05 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 2, 16, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.725 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 4], [0, 2, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.079 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.031 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.034 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 32], [0, 4, 16, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.582 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 4, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.158 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.025 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 2, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.056 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.036 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 16], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.038 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 20], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.099 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 4], [0, 2, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.136 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 16], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.031 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 32], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.023 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 64], [0, 8, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.179 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.056 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 20], [0, 8, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.052 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 64], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.147 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 40], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.149 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 32], [0, 1, 16, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.039 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 2, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.096 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 40], [0, 4, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.378 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 16], [0, 4, 16, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.28 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 64, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.026 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.071 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 4], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.058 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 64], [0, 8, 8, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 1.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 20], [0, 8, 16, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 2.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 64], [0, 16, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.263 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 8, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.227 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 40], [0, 8, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.628 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 32], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.033 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 2, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.069 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 32], [0, 4, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.165 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.028 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 4, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.032 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 8], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.072 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 32], [0, 1, 8, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 1.97 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 64], [0, 8, 4, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 1.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 20], [0, 2, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.146 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 64, 32], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.024 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.058 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 64, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.058 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.023 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 40], [0, 4, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.023 ms Current: 0.235 ms
 Search time for [2, 66, 66, 4, 3, 3, 320, 64, 64, 1, 1, 0] is 1035.9959334529995
 Searching for [2, 66, 66, 320, 3, 3, 320, 64, 64, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 10000000.0 ms Current: 1.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.86 ms Current: 5.77 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.86 ms Current: 1.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 64], [0, 8, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.58 ms Current: 11.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 64], [0, 1, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.58 ms Current: 74.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 4, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.58 ms Current: 1.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 40], [0, 8, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.58 ms Current: 1.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 16], [0, 8, 32, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 124.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 40], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 2.59 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 4, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 71.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 40], [0, 4, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 22.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 4], [0, 16, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 10.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 14.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 4, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 8.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 32], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 2.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 8], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 19.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 64], [0, 2, 32, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 99.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 16], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 5.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 64], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 1.76 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 16, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 16.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 16], [0, 16, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 9.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 16.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 7.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 4], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 26.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 20], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 3.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 64], [0, 1, 16, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 65.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 8], [0, 1, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 7.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 4], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 37.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 2.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 64], [0, 8, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 24.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 10], [0, 16, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 29.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 10], [0, 2, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 15.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 20], [0, 2, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 4.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 8], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 14.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 64], [0, 4, 8, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 35.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.52 ms Current: 1.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 4], [0, 16, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 24.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 4], [0, 2, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 37.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 32], [0, 2, 32, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 42.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 1.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 40], [0, 4, 16, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 20.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 8], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 8.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 2], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 66.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 64], [0, 16, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 3.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 4.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 16], [0, 2, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 9.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 32], [0, 4, 32, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 62.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 4], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 35.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 20], [0, 2, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 12.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 4.34 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 64], [0, 16, 8, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 314.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 2.93 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 8], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 16.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 32], [0, 16, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 53.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.51 ms Current: 1.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 40], [0, 8, 16, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 210.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 2, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 14.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 2.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 32], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 2.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 64], [0, 16, 16, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 80.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 32], [0, 4, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 17.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 64], [0, 4, 4, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 46.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 2.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 16, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 12.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 1, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 2.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 2, 32], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 3.42 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 64], [0, 8, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 8.02 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 7.42 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 16], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 12.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 32], [0, 8, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 11.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 2.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 32], [0, 4, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 17.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 4], [0, 8, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 16.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 40], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 2.02 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 16], [0, 8, 32, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 68.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 16], [0, 8, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 13.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 8, 10], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 8.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 8], [0, 2, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 16.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 64, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 10.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 8], [0, 1, 16, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 44.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 64], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 8, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 1.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 8], [0, 8, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 11.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 64, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 64, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 6.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 2, 16, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 32.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 10], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 8.77 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 1, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 4.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 30.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 8], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 16.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 8], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 7.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 16], [0, 32, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 33.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 64], [0, 4, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 29.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 2, 16], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 4.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 20], [0, 1, 16, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 30.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 4.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 8.17 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 2.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 32], [0, 32, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 6.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 4], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 1.25 ms Current: 38.1 ms
 Search time for [2, 66, 66, 320, 3, 3, 320, 64, 64, 1, 1, 0] is 3782.4488911890076
 Searching for [2, 64, 64, 320, 1, 1, 320, 64, 64, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 32], [0, 32, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 10000000.0 ms Current: 7.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 7.82 ms Current: 0.287 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 4, 2, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 2.28 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 16], [0, 4, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 2.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 20], [0, 32, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 2.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 32], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 0.462 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 32], [0, 2, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 0.881 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 40], [0, 1, 16, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 12.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 2, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 0.307 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 8], [0, 4, 32, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 2.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 0.528 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 2, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.287 ms Current: 0.278 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.278 ms Current: 0.595 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 16], [0, 2, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.278 ms Current: 0.426 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.278 ms Current: 0.229 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 2, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.229 ms Current: 0.853 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 20], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.229 ms Current: 0.391 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 64, 40], [0, 2, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.229 ms Current: 0.303 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.229 ms Current: 0.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.19 ms Current: 0.153 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.262 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 64, 40], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.241 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 64, 64], [0, 2, 4, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 2.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 16], [0, 16, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 1.28 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.426 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 10], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.789 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 64], [0, 1, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.251 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.273 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 20], [0, 16, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 1.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 4], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 3.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 32], [0, 2, 32, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 5.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.402 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 40], [0, 4, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 5.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 40], [0, 1, 32, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 7.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 10], [0, 2, 32, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 4.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 8], [0, 16, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 5.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.403 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 64], [0, 2, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.866 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 10], [0, 1, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.625 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.226 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.249 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 16], [0, 2, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 2], [0, 16, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 3.95 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 1.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.972 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 16, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 6.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 1, 4, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 2.76 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.93 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 32], [0, 4, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 2.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.153 ms Current: 0.139 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 10], [0, 8, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 2.97 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 8], [0, 16, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 1.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 64, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 64, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 1.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 40], [0, 2, 32, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 4.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.158 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 40], [0, 4, 32, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 15.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 40], [0, 16, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.603 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.847 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 8], [0, 16, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 64], [0, 2, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 1.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 20], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 64, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.468 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 4], [0, 32, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.863 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.872 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.455 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 16], [0, 2, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.934 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.299 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 2, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.251 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 64, 64], [0, 1, 16, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 4.54 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 32], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.251 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 10], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.882 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 16], [0, 32, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 2.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 16, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.579 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 40], [0, 1, 32, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.969 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 2], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 6.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 40], [0, 16, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 2.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 40], [0, 2, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 2.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 64], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.279 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.185 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 2], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 3.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 1.93 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 8], [0, 2, 16, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 2.44 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 2, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.283 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.195 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 2, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.262 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 64], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 64]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.224 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 16, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.366 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.703 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 64, 64], [0, 8, 2, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 5.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 1, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.247 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 8, 4], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 1.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 32, 32], [0, 2, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 2.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 32], [0, 16, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 8.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 32], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.269 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 4, 4], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 1.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 8, 8], [0, 8, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.922 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 40], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.242 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.617 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 1.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 2, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 0.252 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 16, 8], [0, 8, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x64x64x320xf32>, tensor<1x1x320x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
 Best: 0.139 ms Current: 1.6 ms
 Search time for [2, 64, 64, 320, 1, 1, 320, 64, 64, 1, 1, 0] is 1529.1958103450015
 Searching for [2, 66, 66, 320, 3, 3, 320, 32, 32, 1, 2, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 8, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 10000000.0 ms Current: 0.983 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.983 ms Current: 1.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.983 ms Current: 1.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 40], [0, 1, 4, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.983 ms Current: 1.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.983 ms Current: 1.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 4], [0, 1, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.983 ms Current: 4.29 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.983 ms Current: 0.584 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 1, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 2.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 0.679 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 3.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 40], [0, 2, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 1.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 0.645 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 4, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 13.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 0.651 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 8, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 3.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 1.77 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 2.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 6.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 40], [0, 2, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 1.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 1.02 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 1.17 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 0.855 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 1.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 20], [0, 4, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 0.627 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.584 ms Current: 0.547 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 5.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 2, 8, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 19.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 0.98 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 1.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 0.819 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 8.24 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 1.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 2.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 8, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 23.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 8, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 1.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 6.38 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 4.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.547 ms Current: 0.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.923 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 4.38 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 1, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 3.02 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.906 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 4.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 16, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 3.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.38 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 4, 2, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 13.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 2.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 2.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.572 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 2, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 9.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 4], [0, 1, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 5.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 2, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 4.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 2.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 4], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 8.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 10], [0, 16, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.92 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.726 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 4], [0, 8, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 5.46 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 2.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 21.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.697 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 1, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 2.46 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 32], [0, 4, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 13.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 20], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.36 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 32], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 2.84 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 16, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 27.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.774 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 3.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 2], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 15.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 2.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 3.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 8, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 4.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.631 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 4, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 7.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 64], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 11.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 20], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 64], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 62.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 16, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 7.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 2, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.95 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.869 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 9.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 32], [0, 4, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.973 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 4], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 6.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 1.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x320xf32>, tensor<3x3x320x320xf32>) outs(%1 : tensor<2x32x32x320xf32>) -> tensor<2x32x32x320xf32>
 Best: 0.43 ms Current: 0.943 ms
 Search time for [2, 66, 66, 320, 3, 3, 320, 32, 32, 1, 2, 0] is 193.42046682100045
 Searching for [2, 34, 34, 320, 3, 3, 640, 32, 32, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 16], [0, 1, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 10000000.0 ms Current: 2.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 2.21 ms Current: 1.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 16], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.57 ms Current: 1.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.57 ms Current: 1.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.57 ms Current: 0.826 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 20], [0, 1, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 7.38 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 16, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 40], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 64], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 16], [0, 2, 16, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 11.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 10], [0, 1, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.67 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.84 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.05 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 4], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 10.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 16, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 16, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.39 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 0.99 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 0.967 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 2, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 8, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 13.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 40], [0, 1, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 10], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 4.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 619.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 16], [0, 1, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 40], [0, 1, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 4], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 6.97 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 64], [0, 8, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 64], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.67 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 20], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 8.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 16], [0, 8, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 4.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 20], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 4], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 12.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 0.992 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 0.967 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 16], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.83 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 20], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 5.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 7.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 64], [0, 16, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 11.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 0.869 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 16, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 4], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 11.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 1, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.93 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 16, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 2, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 40], [0, 2, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 4], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 15.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 0.849 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.95 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.46 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 64], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 4, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 4.59 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 4, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 1, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 1.76 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 2.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 16], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 8, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 18.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 10.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 20], [0, 2, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 9.36 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 10], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 4, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 11.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 16], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 3.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.826 ms Current: 0.695 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 4, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 9.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 1.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 0.988 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 8], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 6.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 6.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 16, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 2.59 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 4, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 1.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 6.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 1.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 40], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 10], [0, 1, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 4.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 1.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 4], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 14.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 8], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 7.38 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 3.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x320xf32>, tensor<3x3x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.695 ms Current: 0.821 ms
 Search time for [2, 34, 34, 320, 3, 3, 640, 32, 32, 1, 1, 0] is 210.22872485901462
 Searching for [2, 34, 34, 640, 3, 3, 640, 32, 32, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 10000000.0 ms Current: 4.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 8], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 4.66 ms Current: 15.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 4], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 4.66 ms Current: 20.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 4.66 ms Current: 10.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 4.66 ms Current: 1.99 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 2, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 37.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 3.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 20], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 6.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 8], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 14.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 16, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 6.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 8, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 41.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 10], [0, 8, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 9.85 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 4.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 8.97 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 1, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 6.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 20], [0, 2, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 8.29 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 10], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 11.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 10], [0, 2, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 8.93 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 4.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 2, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 5.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 8], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 12.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 8.84 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 8, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 24.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.99 ms Current: 1.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 16], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.56 ms Current: 5.76 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.56 ms Current: 1.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 8, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 19.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 64], [0, 16, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 11.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 64], [0, 2, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.69 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 16, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 31.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 8], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 13.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 20], [0, 1, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 4.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 27.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 1, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 15.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 40], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 4], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 30.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 32], [0, 1, 16, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 78.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.54 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 17.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 20], [0, 4, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 4.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 4, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 15.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 7.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 4], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 28.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 2, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 4.39 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 31.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 8], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 15.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 11.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 8], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 12.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 2], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 62.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 64], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 1.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 2, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 142.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 4, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 8, 4, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 147.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.34 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 64], [0, 4, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 28.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 1.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 8, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 22.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 4, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 34.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 51.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 40], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 8, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 74.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 1.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 8, 4, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 105.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 64], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 1.91 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 2], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 67.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 1.67 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 4.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 64], [0, 16, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 7.62 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 5.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 8, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 34.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 20], [0, 2, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 5.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 2, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 6.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 10], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 9.44 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 8, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 59.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 20], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 5.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 6.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 56.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 40], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.99 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 4, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 50.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 20], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 4.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 4, 2, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 34.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 10.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 40], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 3.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 8, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 4.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 2.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 32], [0, 1, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 5.91 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 1.21 ms Current: 1.68 ms
 Search time for [2, 34, 34, 640, 3, 3, 640, 32, 32, 1, 1, 0] is 343.61230001196964
 Searching for [2, 32, 32, 320, 1, 1, 640, 32, 32, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 10000000.0 ms Current: 0.113 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.413 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.208 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.206 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 4], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.664 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 1, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.472 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 1, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.313 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.214 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.414 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 40], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.117 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 16], [0, 8, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.185 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 4], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.313 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 20], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.177 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.178 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 10], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.286 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 8, 16, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 4.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 32], [0, 16, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 1.29 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 20], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.206 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.129 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 4, 4, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 2.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 4], [0, 2, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.682 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.635 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 2, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.245 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.113 ms Current: 0.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 16, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 3.17 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 20], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.191 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 40], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.112 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.111 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 20], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.229 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 4, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 1.39 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 16], [0, 2, 16, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 2.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.194 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 2, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.838 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 40], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.228 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 2], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 1.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.179 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.403 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.668 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.098 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.103 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 64], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.113 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 16], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.211 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 16, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 3.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 16], [0, 4, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.609 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 8], [0, 8, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.563 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 64], [0, 2, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.208 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.164 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.136 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.206 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.106 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.171 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.284 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.359 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.157 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 1, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 32], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.135 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 8], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.393 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 8, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 4.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 20], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.245 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 4, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 1.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 4, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.825 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.361 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 1, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.166 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 40], [0, 8, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.127 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 20], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.307 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 64], [0, 4, 8, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 4.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 4, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.502 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 4], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.619 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.129 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.449 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.173 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 8], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.337 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 2.84 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 2, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.126 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.174 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.146 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 16, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.467 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.234 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.194 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 8], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.356 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.202 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.194 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 20], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.182 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 20], [0, 8, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 3.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 2], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 1.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.094 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 8], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.432 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 40], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.134 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 2, 16, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 1.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 8, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 1.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 20], [0, 1, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.286 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.119 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 4], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x320xf32>, tensor<1x1x320x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.09 ms Current: 0.467 ms
 Search time for [2, 32, 32, 320, 1, 1, 640, 32, 32, 1, 1, 0] is 190.41175837203627
 Searching for [2, 32, 32, 640, 1, 1, 640, 32, 32, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 40], [0, 4, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 10000000.0 ms Current: 0.277 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 2, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.277 ms Current: 1.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.277 ms Current: 0.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.25 ms Current: 0.216 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.216 ms Current: 0.416 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 1, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.216 ms Current: 1.14 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.216 ms Current: 0.245 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.216 ms Current: 0.184 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 40], [0, 16, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.354 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 4], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 2.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 16, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.714 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.351 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.509 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.251 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 32], [0, 1, 16, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 6.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.345 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 16, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 2.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 16], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.578 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 40], [0, 4, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.538 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 4], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 64], [0, 1, 16, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 5.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.222 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.441 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.323 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 16], [0, 2, 16, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.502 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 40], [0, 2, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 10.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 16, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.929 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 20], [0, 2, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.558 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.645 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 2.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 32], [0, 16, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 4.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 20], [0, 8, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.955 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.549 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 4, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.941 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.737 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.17 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.413 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.223 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 8], [0, 16, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.802 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.231 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 32], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.259 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 40], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.283 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 32, 32], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.381 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.286 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.85 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 8, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.282 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 40], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.817 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.493 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 20], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.565 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 40], [0, 2, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.36 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.478 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.707 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 64], [0, 8, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 3.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.272 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 32], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.302 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 16, 64], [0, 2, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.42 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 2, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.284 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.401 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.285 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 8, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.773 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 20], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.425 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.322 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 64], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.435 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 32, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.839 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 32], [0, 8, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 2.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 32], [0, 4, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.953 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.254 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 16], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.529 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 2], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 7.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.291 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.186 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 40], [0, 8, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.247 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 32], [0, 8, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 32, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.585 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 4, 32], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.277 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.533 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.587 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 8, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.696 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.268 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 10], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 32, 4], [0, 1, 16, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 2.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 16], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.478 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.545 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 8, 64], [0, 4, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.943 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.367 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.673 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 32, 10], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 32, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 32, 8], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.29 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.339 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 1.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 32, 2, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 32]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x32x32x640xf32>, tensor<1x1x640x640xf32>) outs(%1 : tensor<2x32x32x640xf32>) -> tensor<2x32x32x640xf32>
 Best: 0.184 ms Current: 0.41 ms
 Search time for [2, 32, 32, 640, 1, 1, 640, 32, 32, 1, 1, 0] is 149.72391951299505
 Searching for [2, 34, 34, 640, 3, 3, 640, 16, 16, 1, 2, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 10000000.0 ms Current: 1.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 1.4 ms Current: 1.28 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 1.28 ms Current: 1.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 1.28 ms Current: 0.886 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 3.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.02 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 10], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.59 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 32], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 2, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 3.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 3.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.02 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 3.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 0.921 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.05 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 3.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 5.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.91 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 40], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 16], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.98 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 6.29 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 3.39 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 12.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 3.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 1.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 4.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 1, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 2.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.886 ms Current: 0.771 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.54 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.92 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 4], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 5.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 4, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 3.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.29 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 2, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.91 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.78 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 10], [0, 1, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.28 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.54 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 8, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 10.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 8], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.67 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 4.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 3.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.54 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.78 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 0.91 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 2, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 5.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 10], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.95 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 2.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 1.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.771 ms Current: 0.676 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 3.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 16], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 1.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 0.818 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 3.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 3.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 1.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 0.991 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 0.988 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 3.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 2.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 64], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 0.934 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x34x34x640xf32>, tensor<3x3x640x640xf32>) outs(%1 : tensor<2x16x16x640xf32>) -> tensor<2x16x16x640xf32>
 Best: 0.676 ms Current: 1.42 ms
 Search time for [2, 34, 34, 640, 3, 3, 640, 16, 16, 1, 2, 0] is 127.37636018800549
 Searching for [2, 18, 18, 640, 3, 3, 1280, 16, 16, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 10000000.0 ms Current: 23.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 23.9 ms Current: 1.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 8.44 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 10], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 4.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 4], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 6.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 1.69 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 4.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 4], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 10.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 2.29 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 1, 2, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 3.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 3.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 1.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 3.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 5.78 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 1.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 2.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 9.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 4.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 3.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 4], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 11.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 1.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 23.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 2.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 3.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 2.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 1.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 1.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 2.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 2.28 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 3.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 1.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 4.69 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 2.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 1.62 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.0 ms Current: 0.995 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.995 ms Current: 3.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.995 ms Current: 5.78 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 8, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.995 ms Current: 4.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.995 ms Current: 2.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.995 ms Current: 1.83 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.995 ms Current: 0.895 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 1, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.34 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 4], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 13.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 64], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 3.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 8, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.44 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 3.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.97 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 4.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 40], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 3.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 10.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 4], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 11.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 7.59 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 6.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 4.36 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 40], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 20], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 3.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 7.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 5.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 8, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 2], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 28.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 0.898 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 4], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 7.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 32], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 19.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.99 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 40], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.69 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 10], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 4.67 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 4.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 3.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 1.28 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 2, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x640xf32>, tensor<3x3x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.895 ms Current: 2.2 ms
 Search time for [2, 18, 18, 640, 3, 3, 1280, 16, 16, 1, 1, 0] is 139.19886086101178
 Searching for [2, 18, 18, 1280, 3, 3, 1280, 16, 16, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 16], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 10000000.0 ms Current: 4.85 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 40], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 4.85 ms Current: 6.91 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 16], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 4.85 ms Current: 7.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 4.85 ms Current: 20.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 4.85 ms Current: 2.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 2.97 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 5.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 6.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 9.42 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 3.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 3.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 3.54 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 5.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 11.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.71 ms Current: 2.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 58.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 2.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 1, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 5.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 4, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 13.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 44.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 7.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 6.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 3.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 3.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 5.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 4.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 11.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 7.99 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 28.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 6.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 7.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 4.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 6.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 64], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 3.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 13.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.37 ms Current: 2.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 29.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 3.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 6.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 13.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 2.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 4.84 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 6.98 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 4.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 10], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 11.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 19.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 2.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 5.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 9.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 10], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 9.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 6.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 5.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 4, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 105.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 2.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 6.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 8.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 8.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 7.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 5.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 6.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 5.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 3.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 5.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 40], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 2.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 3.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 4.05 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 2.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 6.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 64], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 11.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 2.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 6.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 11.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 3.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 1, 8, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 151.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 8.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 2, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 4.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 3.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 2.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 5.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 4.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 8, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 5.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 40], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 6.99 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 8, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 7.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 15.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 2.33 ms Current: 1.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 5.46 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 8.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 4, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 6.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 7.24 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 6.62 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 6.59 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 4.54 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 3.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 4.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 5.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 64], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 11.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 2.95 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 4.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 8.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 40], [0, 1, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 1.68 ms Current: 7.1 ms
 Search time for [2, 18, 18, 1280, 3, 3, 1280, 16, 16, 1, 1, 0] is 167.32472151098773
 Searching for [2, 16, 16, 640, 1, 1, 1280, 16, 16, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 10000000.0 ms Current: 0.618 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.618 ms Current: 0.785 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.618 ms Current: 0.205 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.205 ms Current: 0.293 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.205 ms Current: 0.17 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 2], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.17 ms Current: 1.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.17 ms Current: 0.156 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.385 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.169 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.259 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.202 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.503 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.274 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 8], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.168 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.386 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.207 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 20], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.259 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.156 ms Current: 0.141 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.141 ms Current: 0.321 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.141 ms Current: 0.145 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.141 ms Current: 0.306 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.141 ms Current: 0.133 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.187 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.188 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.267 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.168 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.149 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.144 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.195 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 4, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.955 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.272 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.178 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.198 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.215 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.133 ms Current: 0.129 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.129 ms Current: 0.106 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.285 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.173 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.186 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 1.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.422 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.382 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 20], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.237 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.152 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.232 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.252 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.185 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.164 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.241 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.322 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.217 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.292 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 2, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.319 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 16], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.203 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.17 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.46 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.413 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 4, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 4.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.172 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 4], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.802 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 20], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.276 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 8, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.218 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.421 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.212 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.268 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 2, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.418 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.461 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.171 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 4, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.503 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.201 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.176 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.334 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.266 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.192 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 1.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.203 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.413 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.163 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.323 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.202 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.157 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.318 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.154 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.141 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 16], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.291 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.172 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.157 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.371 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 1, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.308 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.423 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.148 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.251 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.118 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 10], [0, 1, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x640xf32>, tensor<1x1x640x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.106 ms Current: 0.555 ms
 Search time for [2, 16, 16, 640, 1, 1, 1280, 16, 16, 1, 1, 0] is 124.05010044801747
 Searching for [2, 16, 16, 1280, 1, 1, 1280, 16, 16, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 10000000.0 ms Current: 0.639 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 10], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.639 ms Current: 1.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.639 ms Current: 0.854 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.639 ms Current: 3.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.639 ms Current: 0.566 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.566 ms Current: 0.671 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.566 ms Current: 0.421 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 2, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.814 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 1.34 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 32], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.634 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.998 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 4], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 2.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 1.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.584 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 1, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.723 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.887 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 4, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.615 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 1, 2, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 1.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 2, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.522 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.468 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 1.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.653 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 1.44 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 1, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.637 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.76 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 1.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.439 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.744 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.871 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 2, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.519 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 8, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 1.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.421 ms Current: 0.237 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.237 ms Current: 1.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 1, 8, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.237 ms Current: 1.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 1, 8, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.237 ms Current: 1.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.237 ms Current: 0.406 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.237 ms Current: 0.776 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.237 ms Current: 0.236 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.412 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 2], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 3.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.382 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.628 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.308 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.813 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.353 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.575 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 20], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.676 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.449 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 1.24 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.442 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.662 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.248 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.454 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.535 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 2.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.827 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.757 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.631 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.403 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 20], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.786 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.385 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 8, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 5.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.547 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 2, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.646 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.512 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 8], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 1.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.734 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.568 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 2, 40], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.436 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 1.17 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.504 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.379 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 1.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 4], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 2.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.766 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 4.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 10], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 1.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.365 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 1.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 2, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.398 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 2, 8, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 3.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.401 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.627 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 16, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.696 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.825 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 8, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.73 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 8], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 2.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 1.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.402 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.747 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 16, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.586 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.444 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 1, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.46 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.366 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x16x16x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 0.236 ms Current: 0.303 ms
 Search time for [2, 16, 16, 1280, 1, 1, 1280, 16, 16, 1, 1, 0] is 116.58078584895702
 Searching for [2, 18, 18, 1280, 3, 3, 1280, 8, 8, 1, 2, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 10000000.0 ms Current: 4.64 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 4.64 ms Current: 2.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.45 ms Current: 2.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.11 ms Current: 1.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.72 ms Current: 2.05 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.72 ms Current: 3.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.72 ms Current: 2.17 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.72 ms Current: 1.86 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.72 ms Current: 2.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 4, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.72 ms Current: 5.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.72 ms Current: 2.05 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.72 ms Current: 0.711 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 0.814 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.36 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.24 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 3.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 3.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 4.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 4.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.62 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 3.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 3.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 3.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 0.788 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 4, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.36 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.05 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 2], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 10.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.92 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 40], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 0.78 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 3.14 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 0.816 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 3.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 4.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 0.784 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 1.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 2.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.711 ms Current: 0.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 5.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 2.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 40], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 2.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 20], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 3.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 40], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 2.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.38 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 2.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 2.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.39 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 0.918 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 2.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 10.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 4.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 3.85 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 2.78 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 3.91 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 3.67 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 3.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 0.813 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 1.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 4.38 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.7 ms Current: 2.78 ms
 Search time for [2, 18, 18, 1280, 3, 3, 1280, 8, 8, 1, 2, 0] is 128.3858489629929
 Searching for [2, 10, 10, 1280, 3, 3, 1280, 8, 8, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 10000000.0 ms Current: 1.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.35 ms Current: 1.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.31 ms Current: 1.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.26 ms Current: 1.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.26 ms Current: 0.985 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 1.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 3.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 2.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 2.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 2.42 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 2.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 1.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 4.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 5.62 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 1.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 1.93 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 1.97 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 1.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.985 ms Current: 0.814 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.92 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 2.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 4.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 3.12 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 3.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 1.98 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 4.59 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.814 ms Current: 0.802 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.83 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 64], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 2.69 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.97 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 3.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 2.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.85 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 2.05 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 2.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 0.839 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 2.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 2.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.36 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 1.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.802 ms Current: 0.788 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 0.914 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 0.802 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 3.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 3.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 3.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.27 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 3.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.83 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 40], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.02 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.07 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 4.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 0.802 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 5.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.04 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 4.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 4], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 3.77 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 3.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 0.861 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.99 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.54 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 1.24 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.83 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.42 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.14 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 2.39 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 0.915 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.788 ms Current: 0.787 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.787 ms Current: 2.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.787 ms Current: 2.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.787 ms Current: 0.801 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.787 ms Current: 1.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x1280xf32>, tensor<3x3x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.787 ms Current: 3.64 ms
 Search time for [2, 10, 10, 1280, 3, 3, 1280, 8, 8, 1, 1, 0] is 125.62359256698983
 Searching for [2, 8, 8, 1280, 1, 1, 1280, 8, 8, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 10000000.0 ms Current: 0.149 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.149 ms Current: 0.318 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.149 ms Current: 0.398 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.149 ms Current: 0.417 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.149 ms Current: 0.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 40], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.149 ms Current: 0.131 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.361 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.435 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.135 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.825 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.254 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.406 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.33 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.397 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.267 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.376 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.183 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.168 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.595 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.415 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.435 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.135 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.254 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.406 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.406 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.192 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.179 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.521 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.184 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.181 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.235 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.217 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.455 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.463 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.191 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.155 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.445 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.408 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.498 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.398 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.164 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.132 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.278 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.265 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.306 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.434 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.182 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.458 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.139 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.164 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.131 ms Current: 0.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.369 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.368 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.164 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.341 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 40], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.145 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.159 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.326 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.171 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.091 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.209 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.306 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.175 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.121 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.253 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.498 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.158 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.128 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.395 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.178 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.216 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.155 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.432 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.19 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.129 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.274 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.208 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.097 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.366 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.274 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.622 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.131 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.371 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 64], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.299 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.303 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.454 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.228 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.253 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.247 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 2], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.968 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.171 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.171 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.234 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.299 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.465 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x1280xf32>, tensor<1x1x1280x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.09 ms Current: 0.211 ms
 Search time for [2, 8, 8, 1280, 1, 1, 1280, 8, 8, 1, 1, 0] is 115.36333997000474
 Searching for [2, 10, 10, 2560, 3, 3, 1280, 8, 8, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 10000000.0 ms Current: 3.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 3.96 ms Current: 2.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 7.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 5.79 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 2.76 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 6.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 1, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 9.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 3.75 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 6.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 11.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 22.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 4.26 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 11.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 4.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 4.81 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 10.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 2.49 ms Current: 1.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 7.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 4.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 20.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 2.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 4.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 11.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 7.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 2.84 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 5.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 4.21 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.72 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 5.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 9.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.13 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 5.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 4.62 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 40], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 4.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.37 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 7.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 2.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.29 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 5.88 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.96 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 9.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 3.62 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 11.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.56 ms Current: 1.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 16], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 4.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 2, 20], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 9.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.49 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 4.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 5.57 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.38 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 5.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 4.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 1, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 5.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.65 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 8.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 1.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.43 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 6.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 4.01 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 8.92 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.46 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 7.25 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.89 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 4.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 7.52 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.83 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.22 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 6.63 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 20], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 6.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 2, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 7.85 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 8], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.68 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 40], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 4.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 5.48 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.39 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 4.09 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 3.47 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 20], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 7.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.06 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 1.49 ms Current: 2.84 ms
 Search time for [2, 10, 10, 2560, 3, 3, 1280, 8, 8, 1, 1, 0] is 135.3496716360096
 Searching for [2, 8, 8, 2560, 1, 1, 1280, 8, 8, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 10000000.0 ms Current: 0.535 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 4], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.535 ms Current: 0.716 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.535 ms Current: 0.35 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 40], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.35 ms Current: 0.281 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.281 ms Current: 0.794 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.281 ms Current: 0.733 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.281 ms Current: 0.364 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.281 ms Current: 0.216 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.686 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.574 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.643 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.316 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.394 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.41 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.677 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.628 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.735 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.628 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.216 ms Current: 0.179 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.239 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.301 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 64], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.627 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.547 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.707 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 16], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.218 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 1.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.282 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.253 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.345 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.396 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.742 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 20], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.336 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.239 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.305 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.712 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 8], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.357 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.718 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.743 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 1.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.588 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.32 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.734 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.549 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.801 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 4], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.716 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 2, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.553 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.587 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.395 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.347 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.357 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.399 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.535 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.573 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 8], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.394 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 1.23 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 40], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.216 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.885 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.653 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.861 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 20], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.336 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.441 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.349 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 32], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.251 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 10], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.573 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 1, 4, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.409 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.347 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 40], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.53 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.803 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 32], [0, 4, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.596 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 16], [0, 4, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.82 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.627 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.288 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.425 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 16], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.398 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 8], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.725 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 16], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.252 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.555 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.648 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 40], [0, 1, 1, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.529 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 4, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.837 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.92 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 1, 1, 32], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 1.31 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.202 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 2, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.358 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 2, 32], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.179 ms Current: 0.167 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 0.267 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 4, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 0.886 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 0.178 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 8], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 0.406 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 4], [0, 2, 1, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 1.15 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 0.628 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 8, 32], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 0.351 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 0.758 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x8x8x2560xf32>, tensor<1x1x2560x1280xf32>) outs(%1 : tensor<2x8x8x1280xf32>) -> tensor<2x8x8x1280xf32>
 Best: 0.167 ms Current: 0.758 ms
 Search time for [2, 8, 8, 2560, 1, 1, 1280, 8, 8, 1, 1, 0] is 108.52120002702577
 Searching for [2, 18, 18, 2560, 3, 3, 1280, 16, 16, 1, 1, 0]
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 16], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 10000000.0 ms Current: 16.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 64], [0, 2, 2, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 16.6 ms Current: 11.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 2, 4, 64], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 11.7 ms Current: 5.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 2, 32], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.71 ms Current: 5.94 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 4], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.71 ms Current: 43.6 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.71 ms Current: 13.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 32], [0, 8, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.71 ms Current: 8.24 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 32], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.71 ms Current: 5.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 2], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 89.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 16], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 10.2 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 64], [0, 8, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 45.9 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 1, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 6.28 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 4, 64], [0, 2, 1, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 6.03 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 4, 4, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 8.61 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 16], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 10.1 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 40], [0, 2, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 8.51 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 1, 8, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 5.74 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 8.55 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [16, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.2 ms Current: 5.11 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 64], [0, 2, 4, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 4, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 5.66 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 10], [0, 2, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 19.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 40], [0, 2, 8, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [40, 2, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 7.08 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 16], [0, 2, 4, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 17.4 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 12.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 2], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 81.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 4, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 14.7 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 1, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 9.71 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 4, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 6.58 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 16, 40], [0, 1, 4, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 5.56 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 32], [0, 8, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 15.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 10], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 18.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 8, 32], [0, 1, 4, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [32, 2, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 5.87 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 64], [0, 2, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 12.0 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 16, 40], [0, 4, 2, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 8, 4]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 9.18 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 64], [0, 1, 1, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [8, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 7.02 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 4, 64], [0, 1, 1, 16], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 9.5 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 64], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [64, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 8.45 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 8], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [2, 8, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 22.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 4], [0, 1, 2, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 40.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 4, 8, 20], [0, 2, 1, 1], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [20, 8, 2]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 11.8 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 20], [0, 1, 1, 5], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 16, 8]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 12.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 4, 20], [0, 1, 2, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [10, 2, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 13.3 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 8, 40], [0, 1, 2, 10], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 4, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x18x18x2560xf32>, tensor<3x3x2560x1280xf32>) outs(%1 : tensor<2x16x16x1280xf32>) -> tensor<2x16x16x1280xf32>
 Best: 5.11 ms Current: 9.16 ms
 Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 8, 16, 32], [0, 1, 2, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 8]>, dilations = dense<1> : tensor<2xi64>, stri