Created
February 14, 2023 05:02
-
-
Save pashu123/bcb869744964e72b19a68b1c24d28504 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}>], legacy_sync}>]} { | |
hal.executable private @forward_dispatch_43 { | |
hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}> { | |
hal.executable.export public @forward_dispatch_43 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @forward_dispatch_43() { | |
%c94371840 = arith.constant 94371840 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c94371840) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10x9216x9216xf32>> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x9216x9216xf32>> | |
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [10, 9216, 9216], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<10x9216x9216xf32>> -> tensor<10x9216x9216xf32> | |
%3 = tensor.empty() : tensor<10x9216x9216xf32> | |
%4 = iree_linalg_ext.softmax dimension(2) ins(%2 : tensor<10x9216x9216xf32>) outs(%3 : tensor<10x9216x9216xf32>) -> tensor<10x9216x9216xf32> | |
flow.dispatch.tensor.store %4, %1, offsets = [0, 0, 0], sizes = [10, 9216, 9216], strides = [1, 1, 1] : tensor<10x9216x9216xf32> -> !flow.dispatch.tensor<writeonly:tensor<10x9216x9216xf32>> | |
return | |
} | |
} | |
} | |
} | |
util.global private mutable @forward_dispatch_43_vulkan_spirv_fb_forward_dispatch_43_10x9216x1_buffer : !hal.buffer | |
util.initializer { | |
%c6889144320 = arith.constant 6889144320 : index | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
%buffer = hal.allocator.allocate<%allocator : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%c6889144320} | |
util.global.store %buffer, @forward_dispatch_43_vulkan_spirv_fb_forward_dispatch_43_10x9216x1_buffer : !hal.buffer | |
util.initializer.return | |
} | |
func.func @forward_dispatch_43_vulkan_spirv_fb_forward_dispatch_43_10x9216x1(%arg0: i32) attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "dispatch"}} { | |
%c-1_i32 = arith.constant -1 : i32 | |
%c-1_i64 = arith.constant -1 : i64 | |
%c9216 = arith.constant 9216 : index | |
%c10 = arith.constant 10 : index | |
%c3397386240 = arith.constant 3397386240 : index | |
%c1 = arith.constant 1 : index | |
%c3491758080 = arith.constant 3491758080 : index | |
%c0 = arith.constant 0 : index | |
%0 = arith.index_cast %arg0 : i32 to index | |
%device = hal.ex.shared_device : !hal.device | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories(Dispatch) : !hal.command_buffer | |
%pipeline_layout = hal.pipeline_layout.lookup device(%device : !hal.device) layout(<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) : !hal.pipeline_layout | |
%forward_dispatch_43_vulkan_spirv_fb_forward_dispatch_43_10x9216x1_buffer = util.global.load @forward_dispatch_43_vulkan_spirv_fb_forward_dispatch_43_10x9216x1_buffer : !hal.buffer | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([ | |
%c0 = (%forward_dispatch_43_vulkan_spirv_fb_forward_dispatch_43_10x9216x1_buffer : !hal.buffer)[%c0, %c3491758080], | |
%c1 = (%forward_dispatch_43_vulkan_spirv_fb_forward_dispatch_43_10x9216x1_buffer : !hal.buffer)[%c3491758080, %c3397386240] | |
]) | |
%workgroup_x, %workgroup_y, %workgroup_z = hal.executable.calculate_workgroups device(%device : !hal.device) target(@forward_dispatch_43::@vulkan_spirv_fb::@forward_dispatch_43) workload([%c10, %c9216, %c1]) : index, index, index | |
scf.for %arg1 = %c0 to %0 step %c1 { | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@forward_dispatch_43::@vulkan_spirv_fb::@forward_dispatch_43) workgroups([%workgroup_x, %workgroup_y, %workgroup_z]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None") | |
} | |
hal.command_buffer.finalize<%cmd : !hal.command_buffer> | |
%1 = util.null : !hal.fence | |
%2 = hal.timeline.advance : !hal.fence | |
hal.device.queue.execute<%device : !hal.device> affinity(%c-1_i64) wait(%1) signal(%2) commands([%cmd]) | |
%status = hal.fence.await until([%2]) timeout_millis(%c-1_i32) : i32 | |
util.status.check_ok %status, "failed to wait on timepoint" | |
return | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment