Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Created November 5, 2024 21:54
Show Gist options
  • Save AmosLewis/7bca7726a28965db0bccc9006c9a6b0c to your computer and use it in GitHub Desktop.
Save AmosLewis/7bca7726a28965db0bccc9006c9a6b0c to your computer and use it in GitHub Desktop.
module {
func.func @tf2onnx(%arg0: !torch.vtensor<[?,768],f32>, %arg1: !torch.vtensor<[3],si64>, %arg2: !torch.vtensor<[?,256,768],f32>) -> ( !torch.vtensor<[?,256,768],f32>) attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 21 : si64, torch.onnx_meta.producer_name = "tf2onnx", torch.onnx_meta.producer_version = "1.5.2"} {
%reshape = torch.operator "onnx.Reshape"(%arg0, %arg1) : (!torch.vtensor<[?,768],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,256,768],f32>
%866 = torch.operator "onnx.Add"(%reshape, %arg2) : (!torch.vtensor<[?,256,768],f32>, !torch.vtensor<[?,256,768],f32>) -> !torch.vtensor<[?,256,768],f32>
return %866 : !torch.vtensor<[?,256,768],f32>
}
}
@AmosLewis
Copy link
Author

model.11.hal.mlir

module {
  hal.executable private @model_linked {
    hal.executable.binary public @embedded_elf_x86_64 attributes {data = dense<"vector<4496xi8>, format = "embedded-elf-x86_64", mime_type = "application/x-elf"}
  }
  util.global private @__device_0 : !hal.device
  util.initializer {
    %c18_i32 = arith.constant 18 : i32
    %false = arith.constant false
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %0 = util.null : !hal.device
    %device_count = hal.devices.count : index
    cf.br ^bb1(%c0, %c0, %0 : index, index, !hal.device)
  ^bb1(%1: index, %2: index, %3: !hal.device):  // 2 preds: ^bb0, ^bb4
    %4 = util.cmp.eq %3, %0 : !hal.device
    %5 = arith.cmpi slt, %1, %device_count : index
    %6 = arith.andi %4, %5 : i1
    cf.cond_br %6, ^bb2, ^bb5
  ^bb2:  // pred: ^bb1
    %device_n = hal.devices.get %1 : !hal.device
    %ok, %value = hal.device.query<%device_n : !hal.device> key("hal.device.id" :: "local*") : i1, i1 = false
    cf.cond_br %value, ^bb3, ^bb4(%false : i1)
  ^bb3:  // pred: ^bb2
    %ok_0, %value_1 = hal.device.query<%device_n : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false
    cf.br ^bb4(%value_1 : i1)
  ^bb4(%7: i1):  // 2 preds: ^bb2, ^bb3
    %8 = arith.cmpi eq, %2, %c0 : index
    %9 = arith.select %7, %c1, %c0 : index
    %10 = arith.addi %2, %9 : index
    %11 = arith.andi %7, %8 : i1
    %12 = arith.select %11, %device_n, %0 : !hal.device
    %13 = arith.addi %1, %c1 : index
    cf.br ^bb1(%13, %10, %12 : index, index, !hal.device)
  ^bb5:  // pred: ^bb1
    cf.cond_br %4, ^bb6, ^bb7
  ^bb6:  // pred: ^bb5
    util.status.check_ok %c18_i32, "HAL device `__device_0` not found or unavailable: #hal.device.target<\22local\22, [#hal.executable.target<\22llvm-cpu\22, \22embedded-elf-x86_64\22, {cpu = \22\22, cpu_features = \22\22, data_layout = \22e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128\22, native_vector_size = 16 : i64, target_triple = \22x86_64-unknown-unknown-eabi-elf\22}>]>"
    cf.br ^bb7
  ^bb7:  // 2 preds: ^bb5, ^bb6
    util.global.store %3, @__device_0 : !hal.device
    util.return
  }
  util.global private @__device_0_query_0_hal_executable_format_embedded_elf_x86_64 : i1
  util.initializer {
    %__device_0 = util.global.load @__device_0 : !hal.device
    %ok, %value = hal.device.query<%__device_0 : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false
    util.global.store %value, @__device_0_query_0_hal_executable_format_embedded_elf_x86_64 : i1
    util.return
  }
  util.global private @__device_0_executable_0_model_linked : !hal.executable
  util.initializer {
    %0 = util.null : !hal.executable
    %c14_i32 = arith.constant 14 : i32
    %c0 = arith.constant 0 : index
    %c-1 = arith.constant -1 : index
    %__device_0_query_0_hal_executable_format_embedded_elf_x86_64 = util.global.load @__device_0_query_0_hal_executable_format_embedded_elf_x86_64 : i1
    %__device_0 = util.global.load @__device_0 : !hal.device
    %1 = arith.select %__device_0_query_0_hal_executable_format_embedded_elf_x86_64, %c0, %c-1 : index
    %2 = arith.cmpi eq, %1, %c0 : index
    cf.cond_br %2, ^bb1, ^bb2
  ^bb1:  // pred: ^bb0
    %executable = hal.executable.create device(%__device_0 : !hal.device) target(@model_linked::@embedded_elf_x86_64) : !hal.executable
    cf.br ^bb3(%executable : !hal.executable)
  ^bb2:  // pred: ^bb0
    util.status.check_ok %c14_i32, "HAL device `__device_0` does not support any variant of executable `model_linked`; available formats: [embedded-elf-x86_64]"
    cf.br ^bb3(%0 : !hal.executable)
  ^bb3(%3: !hal.executable):  // 2 preds: ^bb1, ^bb2
    util.global.store %3, @__device_0_executable_0_model_linked : !hal.executable
    util.return
  }
  util.func public @tf2onnx$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view, %arg3: !hal.fence, %arg4: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
    %c0 = arith.constant 0 : index
    %c256 = arith.constant 256 : index
    %c768 = arith.constant 768 : index
    %c3 = arith.constant 3 : index
    %c3072 = arith.constant 3072 : index
    %c24 = arith.constant 24 : index
    %c786432 = arith.constant 786432 : index
    %c8 = arith.constant 8 : index
    %c64 = arith.constant 64 : index
    %c32_i64 = arith.constant 32 : i64
    %c-1_i64 = arith.constant -1 : i64
    %c0_i64 = arith.constant 0 : i64
    %c1 = arith.constant 1 : index
    %c-1_i32 = arith.constant -1 : i32
    %__device_0 = util.global.load immutable @__device_0 : !hal.device
    %__device_0_executable_0_model_linked = util.global.load immutable @__device_0_executable_0_model_linked : !hal.executable
    %0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
    %element_type_f32 = hal.element_type<f32> : i32
    %dense_row_major = hal.encoding_type<dense_row_major> : i32
    hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%0, %c768]) type(%element_type_f32) encoding(%dense_row_major)
    %1 = arith.muli %0, %c3072 : index
    %buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer
    %allocator = hal.device.allocator<%__device_0 : !hal.device> : !hal.allocator
    hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%1) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage")
    %element_type_i64 = hal.element_type<i64> : i32
    hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c3]) type(%element_type_i64) encoding(%dense_row_major)
    %buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer
    hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c24) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage")
    %2 = hal.buffer_view.dim<%arg2 : !hal.buffer_view>[0] : index
    hal.buffer_view.assert<%arg2 : !hal.buffer_view> message("tensor") shape([%2, %c256, %c768]) type(%element_type_f32) encoding(%dense_row_major)
    %3 = arith.muli %2, %c786432 : index
    %buffer_1 = hal.buffer_view.buffer<%arg2 : !hal.buffer_view> : !hal.buffer
    hal.buffer.assert<%buffer_1 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%3) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage")
    %fence = hal.fence.create device(%__device_0 : !hal.device) flags("None") : !hal.fence
    %transient_buffer = hal.device.queue.alloca<%__device_0 : !hal.device> affinity(%c-1_i64) wait(%arg3) signal(%fence) pool(%c0_i64) type("HostVisible|HostCoherent|HostLocal|DeviceVisible") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c8}
    %fence_2 = hal.fence.create device(%__device_0 : !hal.device) flags("None") : !hal.fence
    %transient_buffer_3 = hal.device.queue.alloca<%__device_0 : !hal.device> affinity(%c-1_i64) wait(%arg3) signal(%fence_2) pool(%c0_i64) type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%c64}
    %fence_4 = hal.fence.join at([%fence, %fence_2]) -> !hal.fence
    %4 = arith.index_castui %0 : index to i64
    %5 = arith.index_castui %0 : index to i32
    %6 = arith.shrui %4, %c32_i64 : i64
    %7 = arith.trunci %6 : i64 to i32
    %cmd = hal.command_buffer.create device(%__device_0 : !hal.device) mode(OneShot) categories("Transfer|Dispatch") affinity(%c-1_i64) : !hal.command_buffer
    hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%__device_0_executable_0_model_linked : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) constants([%5, %7]) bindings([
      (%buffer_0 : !hal.buffer)[%c0, %c24], 
      (%transient_buffer_3 : !hal.buffer)[%c0, %c64]
    ]) flags("None")
    hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None")
    hal.command_buffer.copy_buffer<%cmd : !hal.command_buffer> source(%transient_buffer_3 : !hal.buffer)[%c0] target(%transient_buffer : !hal.buffer)[%c0] length(%c8)
    hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None")
    hal.command_buffer.finalize<%cmd : !hal.command_buffer>
    %fence_5 = hal.fence.create device(%__device_0 : !hal.device) flags("None") : !hal.fence
    hal.device.queue.execute<%__device_0 : !hal.device> affinity(%c-1_i64) wait(%fence_4) signal(%fence_5) commands([%cmd])
    %fence_6 = hal.fence.create device(%__device_0 : !hal.device) flags("None") : !hal.fence
    hal.device.queue.dealloca<%__device_0 : !hal.device> affinity(%c-1_i64) wait(%fence_5) signal(%fence_6) buffer(%transient_buffer_3 : !hal.buffer)
    %status = hal.fence.await until([%fence_6]) timeout_millis(%c-1_i32) : i32
    util.status.check_ok %status, "failed to wait on timepoint"
    %8 = hal.buffer.load<%transient_buffer : !hal.buffer>[%c0] : i64
    %9 = arith.index_cast %8 : i64 to index
    %10 = arith.muli %9, %c786432 : index
    %fence_7 = hal.fence.create device(%__device_0 : !hal.device) flags("None") : !hal.fence
    %transient_buffer_8 = hal.device.queue.alloca<%__device_0 : !hal.device> affinity(%c-1_i64) wait(%arg3) signal(%fence_7) pool(%c0_i64) type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%10}
    %11 = arith.index_castui %9 : index to i64
    %12 = arith.index_castui %9 : index to i32
    %13 = arith.shrui %11, %c32_i64 : i64
    %14 = arith.trunci %13 : i64 to i32
    %15 = arith.index_castui %2 : index to i64
    %16 = arith.index_castui %2 : index to i32
    %17 = arith.shrui %15, %c32_i64 : i64
    %18 = arith.trunci %17 : i64 to i32
    %cmd_9 = hal.command_buffer.create device(%__device_0 : !hal.device) mode(OneShot) categories("Transfer|Dispatch") affinity(%c-1_i64) : !hal.command_buffer
    %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %9, %c256, %c768
    hal.command_buffer.dispatch<%cmd_9 : !hal.command_buffer> target(%__device_0_executable_0_model_linked : !hal.executable)[%c1] workgroups([%x, %y, %z]) constants([%12, %14, %16, %18]) bindings([
      (%buffer : !hal.buffer)[%c0, %1], 
      (%buffer_1 : !hal.buffer)[%c0, %3], 
      (%transient_buffer_8 : !hal.buffer)[%c0, %10]
    ]) flags("None")
    hal.command_buffer.execution_barrier<%cmd_9 : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None")
    hal.command_buffer.finalize<%cmd_9 : !hal.command_buffer>
    hal.device.queue.execute<%__device_0 : !hal.device> affinity(%c-1_i64) wait(%fence_7) signal(%arg4) commands([%cmd_9])
    %view = hal.buffer_view.create buffer(%transient_buffer_8 : !hal.buffer)[%c0, %10] shape([%9, %c256, %c768]) type(%element_type_f32) encoding(%dense_row_major) : !hal.buffer_view
    util.return %view : !hal.buffer_view
  }
  util.func public @tf2onnx(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
    %0 = util.null : !hal.fence
    %c-1_i32 = arith.constant -1 : i32
    %c0 = arith.constant 0 : index
    %device_0 = hal.devices.get %c0 : !hal.device
    %fence = hal.fence.create device(%device_0 : !hal.device) flags("None") : !hal.fence
    %1 = util.call @tf2onnx$async(%arg0, %arg1, %arg2, %0, %fence) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.fence, !hal.fence) -> !hal.buffer_view
    %status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32
    util.return %1 : !hal.buffer_view
  }
}

@AmosLewis
Copy link
Author

AmosLewis commented Nov 5, 2024

model.11.vm.mlir

<unknown>:0: error: failed to legalize unresolved materialization from ('i64') to ('index') that remained live after conversion
<unknown>:0: note: see current operation: %14 = "builtin.unrealized_conversion_cast"(%13) : (i64) -> index
model.mlir:4:12: note: see existing live user here: %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %16, %1, %0
    %866 = torch.operator "onnx.Add"(%reshape, %arg2) : (!torch.vtensor<[?,256,768],f32>, !torch.vtensor<[?,256,768],f32>) -> !torch.vtensor<[?,256,768],f32> 
           ^
model.mlir:1:1: error: conversion to vm.module failed
module {
^
model.mlir:1:1: note: see current operation: 
"builtin.module"() ({
  "vm.module"() <{sym_name = "module"}> ({
    "vm.global.ref"() <{sym_name = "__device_0", sym_visibility = "private", type = !vm.ref<!hal.device>}> : () -> ()
    "vm.initializer"() <{function_type = () -> ()}> ({
      %141 = "vm.const.i64"() <{value = -1 : i64}> : () -> i64
      %142 = "vm.const.i32"() <{value = 14 : i32}> : () -> i32
      %143 = "vm.const.ref.zero"() : () -> !vm.ref<!hal.executable>
      %144 = "vm.const.i32"() <{value = 18 : i32}> : () -> i32
      %145 = "vm.const.i32.zero"() : () -> i32
      %146 = "vm.const.i64.zero"() : () -> i64
      %147 = "vm.const.i64"() <{value = 1 : i64}> : () -> i64
      %148 = "vm.const.ref.zero"() : () -> !vm.ref<!hal.device>
      %149 = "vm.call"() <{callee = @hal.devices.count}> {nosideeffects} : () -> i32
      %150 = "vm.ext.i32.i64.s"(%149) : (i32) -> i64
      "vm.br"(%146, %146, %148)[^bb1] : (i64, i64, !vm.ref<!hal.device>) -> ()
    ^bb1(%151: i64, %152: i64, %153: !vm.ref<!hal.device>):  // 2 preds: ^bb0, ^bb4
      %154 = "vm.cmp.eq.ref"(%153, %148) : (!vm.ref<!hal.device>, !vm.ref<!hal.device>) -> i32
      %155 = "vm.cmp.lt.i64.s"(%151, %150) : (i64, i64) -> i32
      %156 = "vm.and.i32"(%154, %155) : (i32, i32) -> i32
      "vm.cond_br"(%156)[^bb2, ^bb5] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i32) -> ()
    ^bb2:  // pred: ^bb1
      %157 = "vm.trunc.i64.i32"(%151) : (i64) -> i32
      %158 = "vm.call"(%157) <{callee = @hal.devices.get}> {nosideeffects} : (i32) -> !vm.ref<!hal.device>
      %159 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_hal_device_id_680D30050DBEAAF7", value = "hal.device.id"}> : () -> !vm.buffer
      %160 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_local_7BB666CFBF0A60B9", value = "local*"}> : () -> !vm.buffer
      %161:2 = "vm.call"(%158, %159, %160) <{callee = @hal.device.query.i64}> {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
      %162 = "vm.cmp.nz.i64"(%161#1) : (i64) -> i32
      %163 = "vm.const.i32.zero"() : () -> i32
      %164 = "vm.select.i32"(%161#0, %162, %163) : (i32, i32, i32) -> i32
      %165 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      "vm.cond_br"(%164, %145)[^bb3, ^bb4] <{operandSegmentSizes = array<i32: 1, 0, 1>}> : (i32, i32) -> ()
    ^bb3:  // pred: ^bb2
      %166 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_hal_executable_format_461E49907F1DDBF6", value = "hal.executable.format"}> : () -> !vm.buffer
      %167 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_embedded_elf_x86_64_2423CFA1BD5591FA", value = "embedded-elf-x86_64"}> : () -> !vm.buffer
      %168:2 = "vm.call"(%158, %166, %167) <{callee = @hal.device.query.i64}> {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
      %169 = "vm.cmp.nz.i64"(%168#1) : (i64) -> i32
      %170 = "vm.const.i32.zero"() : () -> i32
      %171 = "vm.select.i32"(%168#0, %169, %170) : (i32, i32, i32) -> i32
      %172 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      "vm.br"(%171)[^bb4] : (i32) -> ()
    ^bb4(%173: i32):  // 2 preds: ^bb2, ^bb3
      %174 = "vm.cmp.eq.i64"(%152, %146) : (i64, i64) -> i32
      %175 = "vm.select.i64"(%173, %147, %146) : (i32, i64, i64) -> i64
      %176 = "vm.add.i64"(%152, %175) : (i64, i64) -> i64
      %177 = "vm.and.i32"(%173, %174) : (i32, i32) -> i32
      %178 = "vm.select.ref"(%177, %158, %148) : (i32, !vm.ref<!hal.device>, !vm.ref<!hal.device>) -> !vm.ref<!hal.device>
      %179 = "vm.add.i64"(%151, %147) : (i64, i64) -> i64
      "vm.br"(%179, %176, %178)[^bb1] : (i64, i64, !vm.ref<!hal.device>) -> ()
    ^bb5:  // pred: ^bb1
      "vm.cond_br"(%154)[^bb6, ^bb7] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i32) -> ()
    ^bb6:  // pred: ^bb5
      "vm.cond_fail"(%144, %144) <{message = "HAL device `__device_0` not found or unavailable: #hal.device.target<\22local\22, [#hal.executable.target<\22llvm-cpu\22, \22embedded-elf-x86_64\22, {cpu = \22\22, cpu_features = \22\22, data_layout = \22e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128\22, native_vector_size = 16 : i64, target_triple = \22x86_64-unknown-unknown-eabi-elf\22}>]>"}> : (i32, i32) -> ()
      "vm.br"()[^bb7] : () -> ()
    ^bb7:  // 2 preds: ^bb5, ^bb6
      %180 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_hal_executable_format_461E49907F1DDBF6", value = "hal.executable.format"}> : () -> !vm.buffer
      %181 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_embedded_elf_x86_64_2423CFA1BD5591FA", value = "embedded-elf-x86_64"}> : () -> !vm.buffer
      %182:2 = "vm.call"(%153, %180, %181) <{callee = @hal.device.query.i64}> {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
      %183 = "vm.cmp.nz.i64"(%182#1) : (i64) -> i32
      %184 = "vm.const.i32.zero"() : () -> i32
      %185 = "vm.select.i32"(%182#0, %183, %184) : (i32, i32, i32) -> i32
      %186 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      %187 = "vm.select.i64"(%185, %146, %141) : (i32, i64, i64) -> i64
      %188 = "vm.cmp.eq.i64"(%187, %146) : (i64, i64) -> i32
      "vm.global.store.ref"(%153) <{global = @__device_0}> : (!vm.ref<!hal.device>) -> ()
      "vm.cond_br"(%188)[^bb8, ^bb9] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i32) -> ()
    ^bb8:  // pred: ^bb7
      %189 = "vm.rodata.inline"() <{alignment = 16 : i64, mime_type = "application/x-elf", name = "model_linked_embedded_elf_x86_64", value = dense_resource<__elided__> : vector<4496xi8>}> : () -> !vm.buffer
      %190 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_embedded_elf_x86_64_2423CFA1BD5591FA", value = "embedded-elf-x86_64"}> : () -> !vm.buffer
      %191 = "vm.const.ref.zero"() : () -> !vm.buffer
      %192 = "vm.call"(%153, %190, %189, %191) <{callee = @hal.executable.create}> {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer) -> !vm.ref<!hal.executable>
      "vm.br"(%192)[^bb10] : (!vm.ref<!hal.executable>) -> ()
    ^bb9:  // pred: ^bb7
      "vm.cond_fail"(%142, %142) <{message = "HAL device `__device_0` does not support any variant of executable `model_linked`; available formats: [embedded-elf-x86_64]"}> : (i32, i32) -> ()
      "vm.br"(%143)[^bb10] : (!vm.ref<!hal.executable>) -> ()
    ^bb10(%193: !vm.ref<!hal.executable>):  // 2 preds: ^bb8, ^bb9
      "vm.global.store.ref"(%193) <{global = @__device_0_executable_0_model_linked}> : (!vm.ref<!hal.executable>) -> ()
      "vm.return"() : () -> ()
    }) : () -> ()
    "vm.global.ref"() <{sym_name = "__device_0_executable_0_model_linked", sym_visibility = "private", type = !vm.ref<!hal.executable>}> : () -> ()
    "vm.func"() <{function_type = (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>) -> !vm.ref<!hal.buffer_view>, inlining_policy = #util.inline.never}> ({
    ^bb0(%arg3: !vm.ref<!hal.buffer_view>, %arg4: !vm.ref<!hal.buffer_view>, %arg5: !vm.ref<!hal.buffer_view>, %arg6: !vm.ref<!hal.fence>, %arg7: !vm.ref<!hal.fence>):
      %9 = "vm.const.i32"() <{value = -1 : i32}> : () -> i32
      %10 = "vm.const.i64"() <{value = 1 : i64}> : () -> i64
      %11 = "vm.const.i64.zero"() : () -> i64
      %12 = "vm.const.i64"() <{value = -1 : i64}> : () -> i64
      %13 = "vm.const.i64"() <{value = 32 : i64}> : () -> i64
      %14 = "vm.const.i64"() <{value = 64 : i64}> : () -> i64
      %15 = "vm.const.i64"() <{value = 8 : i64}> : () -> i64
      %16 = "vm.const.i64"() <{value = 786432 : i64}> : () -> i64
      %17 = "vm.const.i64"() <{value = 24 : i64}> : () -> i64
      %18 = "vm.const.i64"() <{value = 3072 : i64}> : () -> i64
      %19 = "vm.const.i64"() <{value = 3 : i64}> : () -> i64
      %20 = "vm.const.i64"() <{value = 768 : i64}> : () -> i64
      %21 = "builtin.unrealized_conversion_cast"(%20) : (i64) -> index
      %22 = "vm.const.i64"() <{value = 256 : i64}> : () -> i64
      %23 = "builtin.unrealized_conversion_cast"(%22) : (i64) -> index
      %24 = "vm.const.i64.zero"() : () -> i64
      %25 = "vm.global.load.ref"() <{global = @__device_0, is_immutable}> : () -> !vm.ref<!hal.device>
      %26 = "vm.global.load.ref"() <{global = @__device_0_executable_0_model_linked, is_immutable}> : () -> !vm.ref<!hal.executable>
      %27 = "vm.const.i32.zero"() : () -> i32
      %28 = "vm.call"(%arg3, %27) <{callee = @hal.buffer_view.dim}> {nosideeffects} : (!vm.ref<!hal.buffer_view>, i32) -> i64
      %29 = "vm.const.i32"() <{value = 553648160 : i32}> : () -> i32
      %30 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      %31 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_tensor_7767330EDA5F02D", value = "tensor"}> : () -> !vm.buffer
      "vm.call.variadic"(%arg3, %31, %29, %30, %28, %20) <{callee = @hal.buffer_view.assert, segment_sizes = dense<[-1, -1, -1, -1, 2]> : vector<5xi16>, segment_types = [!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64]}> : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64, i64) -> ()
      %32 = "vm.mul.i64"(%28, %18) : (i64, i64) -> i64
      %33 = "vm.call"(%arg3) <{callee = @hal.buffer_view.buffer}> {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer>
      %34 = "vm.call"(%25) <{callee = @hal.device.allocator}> {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator>
      %35 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_tensor_7767330EDA5F02D", value = "tensor"}> : () -> !vm.buffer
      %36 = "vm.const.i32"() <{value = 16 : i32}> : () -> i32
      %37 = "vm.const.i32"() <{value = 3075 : i32}> : () -> i32
      "vm.call"(%33, %35, %34, %32, %36, %37) <{callee = @hal.buffer.assert}> : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> ()
      %38 = "vm.const.i32"() <{value = 268435520 : i32}> : () -> i32
      %39 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_tensor_7767330EDA5F02D", value = "tensor"}> : () -> !vm.buffer
      "vm.call.variadic"(%arg4, %39, %38, %30, %19) <{callee = @hal.buffer_view.assert, segment_sizes = dense<[-1, -1, -1, -1, 1]> : vector<5xi16>, segment_types = [!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64]}> : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64) -> ()
      %40 = "vm.call"(%arg4) <{callee = @hal.buffer_view.buffer}> {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer>
      %41 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_tensor_7767330EDA5F02D", value = "tensor"}> : () -> !vm.buffer
      %42 = "vm.const.i32"() <{value = 16 : i32}> : () -> i32
      %43 = "vm.const.i32"() <{value = 3075 : i32}> : () -> i32
      "vm.call"(%40, %41, %34, %17, %42, %43) <{callee = @hal.buffer.assert}> : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> ()
      %44 = "vm.const.i32.zero"() : () -> i32
      %45 = "vm.call"(%arg5, %44) <{callee = @hal.buffer_view.dim}> {nosideeffects} : (!vm.ref<!hal.buffer_view>, i32) -> i64
      %46 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_tensor_7767330EDA5F02D", value = "tensor"}> : () -> !vm.buffer
      "vm.call.variadic"(%arg5, %46, %29, %30, %45, %22, %20) <{callee = @hal.buffer_view.assert, segment_sizes = dense<[-1, -1, -1, -1, 3]> : vector<5xi16>, segment_types = [!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64]}> : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64, i64, i64) -> ()
      %47 = "vm.mul.i64"(%45, %16) : (i64, i64) -> i64
      %48 = "vm.call"(%arg5) <{callee = @hal.buffer_view.buffer}> {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer>
      %49 = "vm.rodata.inline"() <{alignment = 1 : i64, name = "_utf8_tensor_7767330EDA5F02D", value = "tensor"}> : () -> !vm.buffer
      %50 = "vm.const.i32"() <{value = 16 : i32}> : () -> i32
      %51 = "vm.const.i32"() <{value = 3075 : i32}> : () -> i32
      "vm.call"(%48, %49, %34, %47, %50, %51) <{callee = @hal.buffer.assert}> : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> ()
      %52 = "vm.const.i32.zero"() : () -> i32
      %53 = "vm.call"(%25, %52) <{callee = @hal.fence.create}> : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.fence>
      %54 = "vm.const.i32.zero"() : () -> i32
      %55 = "vm.const.i32"() <{value = 86 : i32}> : () -> i32
      %56 = "vm.const.i32"() <{value = 150998019 : i32}> : () -> i32
      %57 = "vm.call"(%25, %12, %arg6, %53, %54, %55, %56, %15) <{callee = @hal.device.queue.alloca}> : (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, i32, i32, i32, i64) -> !vm.ref<!hal.buffer>
      %58 = "vm.const.i32.zero"() : () -> i32
      %59 = "vm.call"(%25, %58) <{callee = @hal.fence.create}> : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.fence>
      %60 = "vm.const.i32.zero"() : () -> i32
      %61 = "vm.const.i32"() <{value = 48 : i32}> : () -> i32
      %62 = "vm.const.i32"() <{value = 3075 : i32}> : () -> i32
      %63 = "vm.call"(%25, %12, %arg6, %59, %60, %61, %62, %14) <{callee = @hal.device.queue.alloca}> : (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, i32, i32, i32, i64) -> !vm.ref<!hal.buffer>
      %64 = "vm.call.variadic"(%53, %59) <{callee = @hal.fence.join, segment_sizes = dense<2> : vector<1xi16>, segment_types = [!vm.ref<!hal.fence>]}> {nosideeffects} : (!vm.ref<!hal.fence>, !vm.ref<!hal.fence>) -> !vm.ref<!hal.fence>
      %65 = "vm.trunc.i64.i32"(%28) : (i64) -> i32
      %66 = "vm.const.i32"() <{value = 32 : i32}> : () -> i32
      %67 = "vm.shr.i64.u"(%28, %66) : (i64, i32) -> i64
      %68 = "vm.trunc.i64.i32"(%67) : (i64) -> i32
      %69 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      %70 = "vm.const.i32"() <{value = 3 : i32}> : () -> i32
      %71 = "vm.const.i32.zero"() : () -> i32
      %72 = "vm.call"(%25, %69, %70, %12, %71) <{callee = @hal.command_buffer.create}> : (!vm.ref<!hal.device>, i32, i32, i64, i32) -> !vm.ref<!hal.command_buffer>
      %73 = "vm.const.i32.zero"() : () -> i32
      %74 = "vm.const.i64"() <{value = 0 : i64}> : () -> i64
      %75 = "vm.const.i32.zero"() : () -> i32
      %76 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      %77 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      %78 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      %79 = "vm.const.i32.zero"() : () -> i32
      %80 = "vm.const.i32.zero"() : () -> i32
      "vm.call.variadic"(%72, %26, %75, %76, %77, %78, %74, %65, %68, %73, %79, %40, %24, %17, %73, %80, %63, %24, %14) <{callee = @hal.command_buffer.dispatch, segment_sizes = dense<[-1, -1, -1, -1, -1, -1, -1, 2, 2]> : vector<9xi16>, segment_types = [!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32, i64, i32, tuple<i32, i32, !vm.ref<!hal.buffer>, i64, i64>]}> : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32, i64, i32, i32, i32, i32, !vm.ref<!hal.buffer>, i64, i64, i32, i32, !vm.ref<!hal.buffer>, i64, i64) -> ()
      %81 = "vm.const.i32"() <{value = 28 : i32}> : () -> i32
      %82 = "vm.const.i32"() <{value = 13 : i32}> : () -> i32
      %83 = "vm.const.i32.zero"() : () -> i32
      "vm.call"(%72, %81, %82, %83) <{callee = @hal.command_buffer.execution_barrier}> : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> ()
      %84 = "vm.const.i32.zero"() : () -> i32
      %85 = "vm.const.i32.zero"() : () -> i32
      "vm.call"(%72, %84, %85, %63, %24, %57, %24, %15) <{callee = @hal.command_buffer.copy_buffer}> : (!vm.ref<!hal.command_buffer>, i32, i32, !vm.ref<!hal.buffer>, i64, !vm.ref<!hal.buffer>, i64, i64) -> ()
      %86 = "vm.const.i32"() <{value = 28 : i32}> : () -> i32
      %87 = "vm.const.i32"() <{value = 13 : i32}> : () -> i32
      %88 = "vm.const.i32.zero"() : () -> i32
      "vm.call"(%72, %86, %87, %88) <{callee = @hal.command_buffer.execution_barrier}> : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> ()
      "vm.call"(%72) <{callee = @hal.command_buffer.finalize}> : (!vm.ref<!hal.command_buffer>) -> ()
      %89 = "vm.const.i32.zero"() : () -> i32
      %90 = "vm.call"(%25, %89) <{callee = @hal.fence.create}> : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.fence>
      "vm.call.variadic"(%25, %12, %64, %90, %72) <{callee = @hal.device.queue.execute, segment_sizes = dense<[-1, -1, -1, -1, 1]> : vector<5xi16>, segment_types = [!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.command_buffer>]}> : (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.command_buffer>) -> ()
      %91 = "vm.const.i32.zero"() : () -> i32
      %92 = "vm.call"(%25, %91) <{callee = @hal.fence.create}> : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.fence>
      "vm.call"(%25, %12, %90, %92, %63) <{callee = @hal.device.queue.dealloca}> : (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.buffer>) -> ()
      %93 = "vm.call.variadic"(%9, %92) <{callee = @hal.fence.await, segment_sizes = dense<[-1, 1]> : vector<2xi16>, segment_types = [i32, !vm.ref<!hal.fence>]}> : (i32, !vm.ref<!hal.fence>) -> i32
      "vm.cond_fail"(%93, %93) <{message = "failed to wait on timepoint"}> : (i32, i32) -> ()
      %94 = "vm.const.i32"() <{value = 4 : i32}> : () -> i32
      %95 = "vm.const.i64"() <{value = 4 : i64}> : () -> i64
      %96 = "vm.const.i64"() <{value = 4 : i64}> : () -> i64
      %97 = "vm.call"(%57, %96, %94) <{callee = @hal.buffer.load}> : (!vm.ref<!hal.buffer>, i64, i32) -> i32
      %98 = "vm.ext.i32.i64.u"(%97) : (i32) -> i64
      %99 = "vm.const.i32"() <{value = 32 : i32}> : () -> i32
      %100 = "vm.shl.i64"(%98, %99) : (i64, i32) -> i64
      %101 = "vm.call"(%57, %24, %94) <{callee = @hal.buffer.load}> : (!vm.ref<!hal.buffer>, i64, i32) -> i32
      %102 = "vm.ext.i32.i64.u"(%101) : (i32) -> i64
      %103 = "vm.or.i64"(%102, %100) : (i64, i64) -> i64
      %104 = "builtin.unrealized_conversion_cast"(%103) : (i64) -> index
      %105 = "vm.mul.i64"(%103, %16) : (i64, i64) -> i64
      %106 = "vm.const.i32.zero"() : () -> i32
      %107 = "vm.call"(%25, %106) <{callee = @hal.fence.create}> : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.fence>
      %108 = "vm.const.i32.zero"() : () -> i32
      %109 = "vm.const.i32"() <{value = 48 : i32}> : () -> i32
      %110 = "vm.const.i32"() <{value = 3075 : i32}> : () -> i32
      %111 = "vm.call"(%25, %12, %arg6, %107, %108, %109, %110, %105) <{callee = @hal.device.queue.alloca}> : (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, i32, i32, i32, i64) -> !vm.ref<!hal.buffer>
      %112 = "vm.trunc.i64.i32"(%103) : (i64) -> i32
      %113 = "vm.const.i32"() <{value = 32 : i32}> : () -> i32
      %114 = "vm.shr.i64.u"(%103, %113) : (i64, i32) -> i64
      %115 = "vm.trunc.i64.i32"(%114) : (i64) -> i32
      %116 = "vm.trunc.i64.i32"(%45) : (i64) -> i32
      %117 = "vm.const.i32"() <{value = 32 : i32}> : () -> i32
      %118 = "vm.shr.i64.u"(%45, %117) : (i64, i32) -> i64
      %119 = "vm.trunc.i64.i32"(%118) : (i64) -> i32
      %120 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      %121 = "vm.const.i32"() <{value = 3 : i32}> : () -> i32
      %122 = "vm.const.i32.zero"() : () -> i32
      %123 = "vm.call"(%25, %120, %121, %12, %122) <{callee = @hal.command_buffer.create}> : (!vm.ref<!hal.device>, i32, i32, i64, i32) -> !vm.ref<!hal.command_buffer>
      %124:3 = "flow.dispatch.workgroup_count_from_dag_root"(%104, %23, %21) : (index, index, index) -> (index, index, index)
      %125 = "builtin.unrealized_conversion_cast"(%124#2) : (index) -> i64
      %126 = "builtin.unrealized_conversion_cast"(%124#1) : (index) -> i64
      %127 = "builtin.unrealized_conversion_cast"(%124#0) : (index) -> i64
      %128 = "vm.const.i32.zero"() : () -> i32
      %129 = "vm.const.i64"() <{value = 0 : i64}> : () -> i64
      %130 = "vm.const.i32"() <{value = 1 : i32}> : () -> i32
      %131 = "vm.trunc.i64.i32"(%127) : (i64) -> i32
      %132 = "vm.trunc.i64.i32"(%126) : (i64) -> i32
      %133 = "vm.trunc.i64.i32"(%125) : (i64) -> i32
      %134 = "vm.const.i32.zero"() : () -> i32
      %135 = "vm.const.i32.zero"() : () -> i32
      %136 = "vm.const.i32.zero"() : () -> i32
      "vm.call.variadic"(%123, %26, %130, %131, %132, %133, %129, %112, %115, %116, %119, %128, %134, %33, %24, %32, %128, %135, %48, %24, %47, %128, %136, %111, %24, %105) <{callee = @hal.command_buffer.dispatch, segment_sizes = dense<[-1, -1, -1, -1, -1, -1, -1, 4, 3]> : vector<9xi16>, segment_types = [!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32, i64, i32, tuple<i32, i32, !vm.ref<!hal.buffer>, i64, i64>]}> : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, !vm.ref<!hal.buffer>, i64, i64, i32, i32, !vm.ref<!hal.buffer>, i64, i64, i32, i32, !vm.ref<!hal.buffer>, i64, i64) -> ()
      %137 = "vm.const.i32"() <{value = 28 : i32}> : () -> i32
      %138 = "vm.const.i32"() <{value = 13 : i32}> : () -> i32
      %139 = "vm.const.i32.zero"() : () -> i32
      "vm.call"(%123, %137, %138, %139) <{callee = @hal.command_buffer.execution_barrier}> : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> ()
      "vm.call"(%123) <{callee = @hal.command_buffer.finalize}> : (!vm.ref<!hal.command_buffer>) -> ()
      "vm.call.variadic"(%25, %12, %107, %arg7, %123) <{callee = @hal.device.queue.execute, segment_sizes = dense<[-1, -1, -1, -1, 1]> : vector<5xi16>, segment_types = [!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.command_buffer>]}> : (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.command_buffer>) -> ()
      %140 = "vm.call.variadic"(%111, %24, %105, %29, %30, %103, %22, %20) <{callee = @hal.buffer_view.create, segment_sizes = dense<[-1, -1, -1, -1, -1, 3]> : vector<6xi16>, segment_types = [!vm.ref<!hal.buffer>, i64, i64, i32, i32, i64]}> {nosideeffects} : (!vm.ref<!hal.buffer>, i64, i64, i32, i32, i64, i64, i64) -> !vm.ref<!hal.buffer_view>
      "vm.return"(%140) : (!vm.ref<!hal.buffer_view>) -> ()
    }) {sym_name = "tf2onnx$async", sym_visibility = "private"} : () -> ()
    "vm.export"() <{export_name = "tf2onnx$async", function_ref = @tf2onnx$async}> {iree.abi.model = "coarse-fences", iree.abi.stub} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "access"}, {vm.name = "buffer"}, {vm.name = "offset"}, {vm.name = "length"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.device>, i64, i32, !vm.buffer, i64, i64, i32) -> !vm.ref<!hal.file>, sym_name = "hal.ex.file.from_memory", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "allocator"}, {vm.name = "queue_affinity"}, {vm.name = "memory_types"}, {vm.name = "buffer_usage"}, {vm.name = "allocation_size"}], function_type = (!vm.ref<!hal.allocator>, i64, i32, i32, i64) -> !vm.ref<!hal.buffer>, sym_name = "hal.allocator.allocate", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "allocator"}, {vm.name = "try"}, {vm.name = "queue_affinity"}, {vm.name = "memory_types"}, {vm.name = "buffer_usage"}, {vm.name = "source"}, {vm.name = "offset"}, {vm.name = "length"}], function_type = (!vm.ref<!hal.allocator>, i32, i64, i32, i32, !vm.buffer, i64, i64) -> !vm.ref<!hal.buffer>, sym_name = "hal.allocator.import", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer"}, {vm.name = "message"}, {vm.name = "allocator"}, {vm.name = "minimum_length"}, {vm.name = "memory_types"}, {vm.name = "buffer_usage"}], function_type = (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> (), sym_name = "hal.buffer.assert", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "source_buffer"}, {vm.name = "source_offset"}, {vm.name = "length"}], function_type = (!vm.ref<!hal.buffer>, i64, i64) -> !vm.ref<!hal.buffer>, sym_name = "hal.buffer.subspan", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer"}], function_type = (!vm.ref<!hal.buffer>) -> i64, sym_name = "hal.buffer.length", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "source_buffer"}, {vm.name = "source_offset"}, {vm.name = "length"}], function_type = (!vm.ref<!hal.buffer>, i64, i32) -> i32, sym_name = "hal.buffer.load", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "value"}, {vm.name = "target_buffer"}, {vm.name = "target_offset"}, {vm.name = "length"}], function_type = (i32, !vm.ref<!hal.buffer>, i64, i32) -> (), sym_name = "hal.buffer.store", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer"}, {vm.name = "source_offset"}, {vm.name = "source_length"}, {vm.name = "element_type"}, {vm.name = "encoding_type"}, {vm.name = "shape", vm.variadic}], function_type = (!vm.ref<!hal.buffer>, i64, i64, i32, i32, i64) -> !vm.ref<!hal.buffer_view>, sym_name = "hal.buffer_view.create", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer_view"}, {vm.name = "message"}, {vm.name = "element_type"}, {vm.name = "encoding_type"}, {vm.name = "shape", vm.variadic}], function_type = (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64) -> (), sym_name = "hal.buffer_view.assert", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer_view"}], function_type = (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer>, sym_name = "hal.buffer_view.buffer", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer_view"}], function_type = (!vm.ref<!hal.buffer_view>) -> i32, sym_name = "hal.buffer_view.element_type", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer_view"}], function_type = (!vm.ref<!hal.buffer_view>) -> i32, sym_name = "hal.buffer_view.encoding_type", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer_view"}], function_type = (!vm.ref<!hal.buffer_view>) -> i32, sym_name = "hal.buffer_view.rank", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "buffer_view"}, {vm.name = "index"}], function_type = (!vm.ref<!hal.buffer_view>, i32) -> i64, sym_name = "hal.buffer_view.dim", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "key"}, {vm.name = "operands", vm.variadic}], function_type = (!vm.buffer, !vm.ref<!hal.buffer_view>) -> (), sym_name = "hal.buffer_view.trace", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "flags"}, {vm.name = "id"}, {vm.name = "group"}, {vm.name = "rank"}, {vm.name = "count"}], function_type = (!vm.ref<!hal.device>, i64, i32, !vm.buffer, !vm.buffer, i32, i32) -> !vm.ref<!hal.channel>, sym_name = "hal.channel.create", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "channel"}, {vm.name = "color"}, {vm.name = "key"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.channel>, i32, i32, i32) -> !vm.ref<!hal.channel>, sym_name = "hal.channel.split", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "channel"}], function_type = (!vm.ref<!hal.channel>) -> (i32, i32), sym_name = "hal.channel.rank_and_count", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "modes"}, {vm.name = "command_categories"}, {vm.name = "queue_affinity"}, {vm.name = "binding_capacity"}], function_type = (!vm.ref<!hal.device>, i32, i32, i64, i32) -> !vm.ref<!hal.command_buffer>, minimum_version = 5 : i32, sym_name = "hal.command_buffer.create", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}], function_type = (!vm.ref<!hal.command_buffer>) -> (), sym_name = "hal.command_buffer.finalize", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}, {vm.name = "label"}], function_type = (!vm.ref<!hal.command_buffer>, !vm.buffer) -> (), sym_name = "hal.command_buffer.begin_debug_group", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}], function_type = (!vm.ref<!hal.command_buffer>) -> (), sym_name = "hal.command_buffer.end_debug_group", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}, {vm.name = "source_stage_mask"}, {vm.name = "target_stage_mask"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> (), sym_name = "hal.command_buffer.execution_barrier", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}, {vm.name = "target_buffer"}, {vm.name = "target_offset"}, {vm.name = "length"}, {vm.name = "target_buffer_slot"}, {vm.name = "pattern"}, {vm.name = "pattern_length"}], function_type = (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.buffer>, i64, i64, i32, i32, i32) -> (), sym_name = "hal.command_buffer.fill_buffer", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}, {vm.name = "source_buffer"}, {vm.name = "source_offset"}, {vm.name = "target_buffer"}, {vm.name = "target_offset"}, {vm.name = "length"}, {vm.name = "target_buffer_slot"}], function_type = (!vm.ref<!hal.command_buffer>, !vm.buffer, i64, !vm.ref<!hal.buffer>, i64, i64, i32) -> (), sym_name = "hal.command_buffer.update_buffer", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}, {vm.name = "source_buffer_slot"}, {vm.name = "target_buffer_slot"}, {vm.name = "source_buffer"}, {vm.name = "source_offset"}, {vm.name = "target_buffer"}, {vm.name = "target_offset"}, {vm.name = "length"}], function_type = (!vm.ref<!hal.command_buffer>, i32, i32, !vm.ref<!hal.buffer>, i64, !vm.ref<!hal.buffer>, i64, i64) -> (), sym_name = "hal.command_buffer.copy_buffer", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}, {vm.name = "channel"}, {vm.name = "op"}, {vm.name = "param"}, {vm.name = "send_buffer_slot"}, {vm.name = "recv_buffer_slot"}, {vm.name = "send_buffer"}, {vm.name = "recv_buffer"}, {vm.name = "send_offset"}, {vm.name = "send_length"}, {vm.name = "recv_offset"}, {vm.name = "recv_length"}, {vm.name = "element_count"}], function_type = (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.channel>, i32, i32, i32, i32, !vm.ref<!hal.buffer>, !vm.ref<!hal.buffer>, i64, i64, i64, i64, i64) -> (), sym_name = "hal.command_buffer.collective", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}, {vm.name = "executable"}, {vm.name = "entry_point"}, {vm.name = "workgroup_x"}, {vm.name = "workgroup_y"}, {vm.name = "workgroup_z"}, {vm.name = "flags"}, {vm.name = "constants", vm.variadic}, {vm.name = "bindings", vm.variadic}], function_type = (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32, i64, i32, tuple<i32, i32, !vm.ref<!hal.buffer>, i64, i64>) -> (), sym_name = "hal.command_buffer.dispatch", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "command_buffer"}, {vm.name = "executable"}, {vm.name = "entry_point"}, {vm.name = "workgroups_buffer_slot"}, {vm.name = "workgroups_buffer"}, {vm.name = "workgroups_offset"}, {vm.name = "flags"}, {vm.name = "constants", vm.variadic}, {vm.name = "bindings", vm.variadic}], function_type = (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, !vm.ref<!hal.buffer>, i64, i64, i32, tuple<i32, i32, !vm.ref<!hal.buffer>, i64, i64>) -> (), sym_name = "hal.command_buffer.dispatch.indirect", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}], function_type = (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator>, sym_name = "hal.device.allocator", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "category"}, {vm.name = "key"}], function_type = (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64), sym_name = "hal.device.query.i64", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "pool"}, {vm.name = "memory_types"}, {vm.name = "buffer_usage"}, {vm.name = "allocation_size"}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, i32, i32, i32, i64) -> !vm.ref<!hal.buffer>, sym_name = "hal.device.queue.alloca", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "buffer"}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.buffer>) -> (), sym_name = "hal.device.queue.dealloca", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "target_buffer"}, {vm.name = "target_offset"}, {vm.name = "length"}, {vm.name = "pattern"}, {vm.name = "pattern_length"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.buffer>, i64, i64, i64, i32, i64) -> (), sym_name = "hal.device.queue.fill", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "source_buffer"}, {vm.name = "source_offset"}, {vm.name = "target_buffer"}, {vm.name = "target_offset"}, {vm.name = "length"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.buffer, i64, !vm.ref<!hal.buffer>, i64, i64, i64) -> (), sym_name = "hal.device.queue.update", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "source_buffer"}, {vm.name = "source_offset"}, {vm.name = "target_buffer"}, {vm.name = "target_offset"}, {vm.name = "length"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.buffer>, i64, !vm.ref<!hal.buffer>, i64, i64, i64) -> (), sym_name = "hal.device.queue.copy", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "source_file"}, {vm.name = "source_offset"}, {vm.name = "target_buffer"}, {vm.name = "target_offset"}, {vm.name = "length"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.file>, i64, !vm.ref<!hal.buffer>, i64, i64, i32) -> (), sym_name = "hal.device.queue.read", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "source_buffer"}, {vm.name = "source_offset"}, {vm.name = "target_file"}, {vm.name = "target_offset"}, {vm.name = "length"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.buffer>, i64, !vm.ref<!hal.file>, i64, i64, i32) -> (), sym_name = "hal.device.queue.write", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "command_buffers", vm.variadic}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.command_buffer>) -> (), sym_name = "hal.device.queue.execute", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}, {vm.name = "wait_fence"}, {vm.name = "signal_fence"}, {vm.name = "command_buffer"}, {vm.name = "binding_table", vm.variadic}], function_type = (!vm.ref<!hal.device>, i64, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>, !vm.ref<!hal.command_buffer>, tuple<!vm.ref<!hal.buffer>, i64, i64>) -> (), sym_name = "hal.device.queue.execute.indirect", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "queue_affinity"}], function_type = (!vm.ref<!hal.device>, i64) -> (), sym_name = "hal.device.queue.flush", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{function_type = () -> i32, sym_name = "hal.devices.count", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "index"}], function_type = (i32) -> !vm.ref<!hal.device>, sym_name = "hal.devices.get", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "executable_format"}, {vm.name = "executable_data"}, {vm.name = "constants"}], function_type = (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer) -> !vm.ref<!hal.executable>, sym_name = "hal.executable.create", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "device"}, {vm.name = "flags"}], function_type = (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.fence>, sym_name = "hal.fence.create", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "fences", vm.variadic}], function_type = (!vm.ref<!hal.fence>) -> !vm.ref<!hal.fence>, sym_name = "hal.fence.join", sym_visibility = "private"}> ({
    }) {nosideeffects} : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "fence"}], function_type = (!vm.ref<!hal.fence>) -> i32, sym_name = "hal.fence.query", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "fence"}], function_type = (!vm.ref<!hal.fence>) -> (), sym_name = "hal.fence.signal", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "fence"}, {vm.name = "status"}], function_type = (!vm.ref<!hal.fence>, i32) -> (), sym_name = "hal.fence.fail", sym_visibility = "private"}> ({
    }) : () -> ()
    "vm.import"() <{arg_attrs = [{vm.name = "timeout_millis"}, {vm.name = "fences", vm.variadic}], function_type = (i32, !vm.ref<!hal.fence>) -> i32, sym_name = "hal.fence.await", sym_visibility = "private"}> ({
    }) {vm.yield} : () -> ()
    "vm.func"() <{function_type = (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer_view>}> ({
    ^bb0(%arg0: !vm.ref<!hal.buffer_view>, %arg1: !vm.ref<!hal.buffer_view>, %arg2: !vm.ref<!hal.buffer_view>):
      %0 = "vm.const.ref.zero"() : () -> !vm.ref<!hal.fence>
      %1 = "vm.const.i32"() <{value = -1 : i32}> : () -> i32
      %2 = "vm.const.i64.zero"() : () -> i64
      %3 = "vm.const.i32.zero"() : () -> i32
      %4 = "vm.call"(%3) <{callee = @hal.devices.get}> {nosideeffects} : (i32) -> !vm.ref<!hal.device>
      %5 = "vm.const.i32.zero"() : () -> i32
      %6 = "vm.call"(%4, %5) <{callee = @hal.fence.create}> : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.fence>
      %7 = "vm.call"(%arg0, %arg1, %arg2, %0, %6) <{callee = @tf2onnx$async}> : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>, !vm.ref<!hal.fence>, !vm.ref<!hal.fence>) -> !vm.ref<!hal.buffer_view>
      %8 = "vm.call.variadic"(%1, %6) <{callee = @hal.fence.await, segment_sizes = dense<[-1, 1]> : vector<2xi16>, segment_types = [i32, !vm.ref<!hal.fence>]}> : (i32, !vm.ref<!hal.fence>) -> i32
      "vm.return"(%7) : (!vm.ref<!hal.buffer_view>) -> ()
    }) {sym_name = "tf2onnx", sym_visibility = "private"} : () -> ()
    "vm.export"() <{export_name = "tf2onnx", function_ref = @tf2onnx}> {iree.abi.stub} : () -> ()
    "vm.module_terminator"() : () -> ()
  }) : () -> ()
}) {vm.toplevel} : () -> ()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment