pashu123 · September 30, 2024 13:52
diff --git a/err.txt b/err.txt
 latest.mlir:3:10: error: 'func.func' op unhandled function with multiple blocks
    %0 = flow.dispatch.region -> (tensor<2x4xf32>) {
         ^
 latest.mlir:2:3: note: called from
  func.func @simple_test_with_cfg(%arg0: i8) -> tensor<2x4xf32> {
  ^
 latest.mlir:3:10: note: see current operation: 
 "func.func"() <{function_type = () -> (), sym_name = "simple_test_with_cfg_dispatch_0"}> ({
  %0 = "arith.constant"() <{value = 0 : index}> : () -> index
  %1 = "arith.constant"() <{value = 0 : i8}> : () -> i8
  %2 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<2x4xf32>}> : () -> tensor<2x4xf32>
  %3 = "bufferization.to_memref"(%2) : (tensor<2x4xf32>) -> memref<2x4xf32>
  %4 = "hal.interface.constant.load"() {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index} : () -> i32
  %5 = "arith.trunci"(%4) : (i32) -> i8
  %6 = "hal.interface.binding.subspan"(%0) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 2 : i32, layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, operandSegmentSizes = array<i32: 1, 0>} : (index) -> memref<2x4xf32, #hal.descriptor_type<storage_buffer>>
  "memref.assume_alignment"(%6) <{alignment = 64 : i32}> : (memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
  %7 = "arith.cmpi"(%5, %1) <{predicate = 0 : i64}> : (i8, i8) -> i1
  "cf.cond_br"(%7)[^bb1, ^bb2] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
 ^bb1:  // pred: ^bb0
  "func.return"() : () -> ()
 ^bb2:  // pred: ^bb0
  "linalg.generic"(%3, %6) <{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 1, 1>}> ({
  ^bb0(%arg0: f32, %arg1: f32):
    "linalg.yield"(%arg0) : (f32) -> ()
  }) : (memref<2x4xf32>, memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
  "func.return"() : () -> ()
 }) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
    %0 = flow.dispatch.region -> (tensor<2x4xf32>) {
         ^
 latest.mlir:3:10: error: 'hal.executable.variant' op failed in iree-codegen-reconcile-translation-info pass
    %0 = flow.dispatch.region -> (tensor<2x4xf32>) {
         ^
 latest.mlir:2:3: note: called from
  func.func @simple_test_with_cfg(%arg0: i8) -> tensor<2x4xf32> {
  ^
 latest.mlir:3:10: note: see current operation: 
 "hal.executable.variant"() ({
  "hal.executable.export"() ({
  ^bb0(%arg2: !hal.device):
    %8 = "arith.constant"() <{value = 1 : index}> : () -> index
    %9 = "arith.constant"() <{value = 1 : index}> : () -> index
    %10 = "arith.constant"() <{value = 1 : index}> : () -> index
    "hal.return"(%8, %9, %10) : (index, index, index) -> ()
  }) {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index, sym_name = "simple_test_with_cfg_dispatch_0"} : () -> ()
  "builtin.module"() ({
    "func.func"() <{function_type = () -> (), sym_name = "simple_test_with_cfg_dispatch_0"}> ({
      %0 = "arith.constant"() <{value = 0 : index}> : () -> index
      %1 = "arith.constant"() <{value = 0 : i8}> : () -> i8
      %2 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<2x4xf32>}> : () -> tensor<2x4xf32>
      %3 = "bufferization.to_memref"(%2) : (tensor<2x4xf32>) -> memref<2x4xf32>
      %4 = "hal.interface.constant.load"() {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index} : () -> i32
      %5 = "arith.trunci"(%4) : (i32) -> i8
      %6 = "hal.interface.binding.subspan"(%0) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 2 : i32, layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, operandSegmentSizes = array<i32: 1, 0>} : (index) -> memref<2x4xf32, #hal.descriptor_type<storage_buffer>>
      "memref.assume_alignment"(%6) <{alignment = 64 : i32}> : (memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
      %7 = "arith.cmpi"(%5, %1) <{predicate = 0 : i64}> : (i8, i8) -> i1
      "cf.cond_br"(%7)[^bb1, ^bb2] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
    ^bb1:  // pred: ^bb0
      "func.return"() : () -> ()
    ^bb2:  // pred: ^bb0
      "linalg.generic"(%3, %6) <{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 1, 1>}> ({
      ^bb0(%arg0: f32, %arg1: f32):
        "linalg.yield"(%arg0) : (f32) -> ()
      }) : (memref<2x4xf32>, memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
      "func.return"() : () -> ()
    }) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
  }) : () -> ()
  "hal.executable.variant_end"() : () -> ()
 }) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+evex512,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = "mmt4d"}>} : () -> ()
    %0 = flow.dispatch.region -> (tensor<2x4xf32>) {
         ^
 latest.mlir:3:10: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+evex512,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = "mmt4d"}>
    %0 = flow.dispatch.region -> (tensor<2x4xf32>) {
         ^
 latest.mlir:2:3: note: called from
  func.func @simple_test_with_cfg(%arg0: i8) -> tensor<2x4xf32> {
  ^
 latest.mlir:3:10: note: see current operation: 
 "hal.executable.variant"() ({
  "hal.executable.export"() ({
  ^bb0(%arg2: !hal.device):
    %8 = "arith.constant"() <{value = 1 : index}> : () -> index
    %9 = "arith.constant"() <{value = 1 : index}> : () -> index
    %10 = "arith.constant"() <{value = 1 : index}> : () -> index
    "hal.return"(%8, %9, %10) : (index, index, index) -> ()
  }) {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index, sym_name = "simple_test_with_cfg_dispatch_0"} : () -> ()
  "builtin.module"() ({
    "func.func"() <{function_type = () -> (), sym_name = "simple_test_with_cfg_dispatch_0"}> ({
      %0 = "arith.constant"() <{value = 0 : index}> : () -> index
      %1 = "arith.constant"() <{value = 0 : i8}> : () -> i8
      %2 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<2x4xf32>}> : () -> tensor<2x4xf32>
      %3 = "bufferization.to_memref"(%2) : (tensor<2x4xf32>) -> memref<2x4xf32>
      %4 = "hal.interface.constant.load"() {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index} : () -> i32
      %5 = "arith.trunci"(%4) : (i32) -> i8
      %6 = "hal.interface.binding.subspan"(%0) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 2 : i32, layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, operandSegmentSizes = array<i32: 1, 0>} : (index) -> memref<2x4xf32, #hal.descriptor_type<storage_buffer>>
      "memref.assume_alignment"(%6) <{alignment = 64 : i32}> : (memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
      %7 = "arith.cmpi"(%5, %1) <{predicate = 0 : i64}> : (i8, i8) -> i1
      "cf.cond_br"(%7)[^bb1, ^bb2] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
    ^bb1:  // pred: ^bb0
      "func.return"() : () -> ()
    ^bb2:  // pred: ^bb0
      "linalg.generic"(%3, %6) <{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 1, 1>}> ({
      ^bb0(%arg0: f32, %arg1: f32):
        "linalg.yield"(%arg0) : (f32) -> ()
      }) : (memref<2x4xf32>, memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
      "func.return"() : () -> ()
    }) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
  }) : () -> ()
  "hal.executable.variant_end"() : () -> ()
 }) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+evex512,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = "mmt4d"}>} : () -> ()
    %0 = flow.dispatch.region -> (tensor<2x4xf32>) {
         ^
 failed to translate executables
	latest.mlir:3:10: error: 'func.func' op unhandled function with multiple blocks
	%0 = flow.dispatch.region -> (tensor<2x4xf32>) {
	^
	latest.mlir:2:3: note: called from
	func.func @simple_test_with_cfg(%arg0: i8) -> tensor<2x4xf32> {
	^
	latest.mlir:3:10: note: see current operation:
	"func.func"() <{function_type = () -> (), sym_name = "simple_test_with_cfg_dispatch_0"}> ({
	%0 = "arith.constant"() <{value = 0 : index}> : () -> index
	%1 = "arith.constant"() <{value = 0 : i8}> : () -> i8
	%2 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<2x4xf32>}> : () -> tensor<2x4xf32>
	%3 = "bufferization.to_memref"(%2) : (tensor<2x4xf32>) -> memref<2x4xf32>
	%4 = "hal.interface.constant.load"() {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index} : () -> i32
	%5 = "arith.trunci"(%4) : (i32) -> i8
	%6 = "hal.interface.binding.subspan"(%0) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 2 : i32, layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, operandSegmentSizes = array<i32: 1, 0>} : (index) -> memref<2x4xf32, #hal.descriptor_type<storage_buffer>>
	"memref.assume_alignment"(%6) <{alignment = 64 : i32}> : (memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
	%7 = "arith.cmpi"(%5, %1) <{predicate = 0 : i64}> : (i8, i8) -> i1
	"cf.cond_br"(%7)[^bb1, ^bb2] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
	^bb1: // pred: ^bb0
	"func.return"() : () -> ()
	^bb2: // pred: ^bb0
	"linalg.generic"(%3, %6) <{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 1, 1>}> ({
	^bb0(%arg0: f32, %arg1: f32):
	"linalg.yield"(%arg0) : (f32) -> ()
	}) : (memref<2x4xf32>, memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
	"func.return"() : () -> ()
	}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
	%0 = flow.dispatch.region -> (tensor<2x4xf32>) {
	^
	latest.mlir:3:10: error: 'hal.executable.variant' op failed in iree-codegen-reconcile-translation-info pass
	%0 = flow.dispatch.region -> (tensor<2x4xf32>) {
	^
	latest.mlir:2:3: note: called from
	func.func @simple_test_with_cfg(%arg0: i8) -> tensor<2x4xf32> {
	^
	latest.mlir:3:10: note: see current operation:
	"hal.executable.variant"() ({
	"hal.executable.export"() ({
	^bb0(%arg2: !hal.device):
	%8 = "arith.constant"() <{value = 1 : index}> : () -> index
	%9 = "arith.constant"() <{value = 1 : index}> : () -> index
	%10 = "arith.constant"() <{value = 1 : index}> : () -> index
	"hal.return"(%8, %9, %10) : (index, index, index) -> ()
	}) {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index, sym_name = "simple_test_with_cfg_dispatch_0"} : () -> ()
	"builtin.module"() ({
	"func.func"() <{function_type = () -> (), sym_name = "simple_test_with_cfg_dispatch_0"}> ({
	%0 = "arith.constant"() <{value = 0 : index}> : () -> index
	%1 = "arith.constant"() <{value = 0 : i8}> : () -> i8
	%2 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<2x4xf32>}> : () -> tensor<2x4xf32>
	%3 = "bufferization.to_memref"(%2) : (tensor<2x4xf32>) -> memref<2x4xf32>
	%4 = "hal.interface.constant.load"() {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index} : () -> i32
	%5 = "arith.trunci"(%4) : (i32) -> i8
	%6 = "hal.interface.binding.subspan"(%0) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 2 : i32, layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, operandSegmentSizes = array<i32: 1, 0>} : (index) -> memref<2x4xf32, #hal.descriptor_type<storage_buffer>>
	"memref.assume_alignment"(%6) <{alignment = 64 : i32}> : (memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
	%7 = "arith.cmpi"(%5, %1) <{predicate = 0 : i64}> : (i8, i8) -> i1
	"cf.cond_br"(%7)[^bb1, ^bb2] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
	^bb1: // pred: ^bb0
	"func.return"() : () -> ()
	^bb2: // pred: ^bb0
	"linalg.generic"(%3, %6) <{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 1, 1>}> ({
	^bb0(%arg0: f32, %arg1: f32):
	"linalg.yield"(%arg0) : (f32) -> ()
	}) : (memref<2x4xf32>, memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
	"func.return"() : () -> ()
	}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
	}) : () -> ()
	"hal.executable.variant_end"() : () -> ()
	}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+evex512,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = "mmt4d"}>} : () -> ()
	%0 = flow.dispatch.region -> (tensor<2x4xf32>) {
	^
	latest.mlir:3:10: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+evex512,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = "mmt4d"}>
	%0 = flow.dispatch.region -> (tensor<2x4xf32>) {
	^
	latest.mlir:2:3: note: called from
	func.func @simple_test_with_cfg(%arg0: i8) -> tensor<2x4xf32> {
	^
	latest.mlir:3:10: note: see current operation:
	"hal.executable.variant"() ({
	"hal.executable.export"() ({
	^bb0(%arg2: !hal.device):
	%8 = "arith.constant"() <{value = 1 : index}> : () -> index
	%9 = "arith.constant"() <{value = 1 : index}> : () -> index
	%10 = "arith.constant"() <{value = 1 : index}> : () -> index
	"hal.return"(%8, %9, %10) : (index, index, index) -> ()
	}) {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index, sym_name = "simple_test_with_cfg_dispatch_0"} : () -> ()
	"builtin.module"() ({
	"func.func"() <{function_type = () -> (), sym_name = "simple_test_with_cfg_dispatch_0"}> ({
	%0 = "arith.constant"() <{value = 0 : index}> : () -> index
	%1 = "arith.constant"() <{value = 0 : i8}> : () -> i8
	%2 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<2x4xf32>}> : () -> tensor<2x4xf32>
	%3 = "bufferization.to_memref"(%2) : (tensor<2x4xf32>) -> memref<2x4xf32>
	%4 = "hal.interface.constant.load"() {layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index} : () -> i32
	%5 = "arith.trunci"(%4) : (i32) -> i8
	%6 = "hal.interface.binding.subspan"(%0) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 2 : i32, layout = #hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, operandSegmentSizes = array<i32: 1, 0>} : (index) -> memref<2x4xf32, #hal.descriptor_type<storage_buffer>>
	"memref.assume_alignment"(%6) <{alignment = 64 : i32}> : (memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
	%7 = "arith.cmpi"(%5, %1) <{predicate = 0 : i64}> : (i8, i8) -> i1
	"cf.cond_br"(%7)[^bb1, ^bb2] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> ()
	^bb1: // pred: ^bb0
	"func.return"() : () -> ()
	^bb2: // pred: ^bb0
	"linalg.generic"(%3, %6) <{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 1, 1>}> ({
	^bb0(%arg0: f32, %arg1: f32):
	"linalg.yield"(%arg0) : (f32) -> ()
	}) : (memref<2x4xf32>, memref<2x4xf32, #hal.descriptor_type<storage_buffer>>) -> ()
	"func.return"() : () -> ()
	}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
	}) : () -> ()
	"hal.executable.variant_end"() : () -> ()
	}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+cx8,+f16c,+fsgsbase,+crc32,+invpcid,+rdpru,+sahf,+lzcnt,+movbe,+mwaitx,+x87,+pku,+evex512,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+fxsr", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = "mmt4d"}>} : () -> ()
	%0 = flow.dispatch.region -> (tensor<2x4xf32>) {
	^
	failed to translate executables