Created
February 8, 2023 15:43
-
-
Save pashu123/fb46b1099b31c0af98ec079b9aeb67b6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}>], legacy_sync}>]} { | |
hal.executable private @forward_dispatch_42 { | |
hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, NVIDIA:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 49152, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 64], min_subgroup_size = 32, max_subgroup_size = 32, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 8, n_size = 8, k_size = 32, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}> { | |
hal.executable.export public @forward_dispatch_42_generic_10x9216x9216 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @forward_dispatch_42_generic_10x9216x9216() { | |
%c94371840 = arith.constant 94371840 : index | |
%c3491758080 = arith.constant 3491758080 : index | |
%cst = arith.constant -3.40282347E+38 : f32 | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c94371840) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10x9216x9216xf32>> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c3491758080) : !flow.dispatch.tensor<writeonly:tensor<10x9216x9216xf32>> | |
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [10, 9216, 9216], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<10x9216x9216xf32>> -> tensor<10x9216x9216xf32> | |
%3 = tensor.empty() : tensor<10x9216x9216xf32> | |
%4 = tensor.empty() : tensor<10x9216xf32> | |
%5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<10x9216xf32>) -> tensor<10x9216xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<10x9216x9216xf32>) outs(%5 : tensor<10x9216xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%8 = arith.maxf %in, %out : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<10x9216xf32> | |
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2, %6 : tensor<10x9216x9216xf32>, tensor<10x9216xf32>) outs(%3 : tensor<10x9216x9216xf32>) { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.subf %in, %in_0 : f32 | |
%9 = math.exp %8 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<10x9216x9216xf32> | |
flow.dispatch.tensor.store %7, %1, offsets = [0, 0, 0], sizes = [10, 9216, 9216], strides = [1, 1, 1] : tensor<10x9216x9216xf32> -> !flow.dispatch.tensor<writeonly:tensor<10x9216x9216xf32>> | |
return | |
} | |
} | |
} | |
} | |
util.global private mutable @forward_dispatch_42_vulkan_spirv_fb_forward_dispatch_42_generic_10x9216x9216_10x9216x9216_buffer : !hal.buffer | |
util.initializer { | |
%c13778288640 = arith.constant 13778288640 : index | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
%buffer = hal.allocator.allocate<%allocator : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%c13778288640} | |
util.global.store %buffer, @forward_dispatch_42_vulkan_spirv_fb_forward_dispatch_42_generic_10x9216x9216_10x9216x9216_buffer : !hal.buffer | |
util.initializer.return | |
} | |
func.func @forward_dispatch_42_vulkan_spirv_fb_forward_dispatch_42_generic_10x9216x9216_10x9216x9216(%arg0: i32) attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "dispatch"}} { | |
%c-1_i32 = arith.constant -1 : i32 | |
%c-1_i64 = arith.constant -1 : i64 | |
%c9216 = arith.constant 9216 : index | |
%c10 = arith.constant 10 : index | |
%c1 = arith.constant 1 : index | |
%c6889144320 = arith.constant 6889144320 : index | |
%c0 = arith.constant 0 : index | |
%0 = arith.index_cast %arg0 : i32 to index | |
%device = hal.ex.shared_device : !hal.device | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories(Dispatch) : !hal.command_buffer | |
%pipeline_layout = hal.pipeline_layout.lookup device(%device : !hal.device) layout(<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) : !hal.pipeline_layout | |
%forward_dispatch_42_vulkan_spirv_fb_forward_dispatch_42_generic_10x9216x9216_10x9216x9216_buffer = util.global.load @forward_dispatch_42_vulkan_spirv_fb_forward_dispatch_42_generic_10x9216x9216_10x9216x9216_buffer : !hal.buffer | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([ | |
%c0 = (%forward_dispatch_42_vulkan_spirv_fb_forward_dispatch_42_generic_10x9216x9216_10x9216x9216_buffer : !hal.buffer)[%c0, %c6889144320], | |
%c1 = (%forward_dispatch_42_vulkan_spirv_fb_forward_dispatch_42_generic_10x9216x9216_10x9216x9216_buffer : !hal.buffer)[%c6889144320, %c6889144320] | |
]) | |
%workgroup_x, %workgroup_y, %workgroup_z = hal.executable.calculate_workgroups device(%device : !hal.device) target(@forward_dispatch_42::@vulkan_spirv_fb::@forward_dispatch_42_generic_10x9216x9216) workload([%c10, %c9216, %c9216]) : index, index, index | |
scf.for %arg1 = %c0 to %0 step %c1 { | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@forward_dispatch_42::@vulkan_spirv_fb::@forward_dispatch_42_generic_10x9216x9216) workgroups([%workgroup_x, %workgroup_y, %workgroup_z]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None") | |
} | |
hal.command_buffer.finalize<%cmd : !hal.command_buffer> | |
%1 = util.null : !hal.fence | |
%2 = hal.timeline.advance : !hal.fence | |
hal.device.queue.execute<%device : !hal.device> affinity(%c-1_i64) wait(%1) signal(%2) commands([%cmd]) | |
%status = hal.fence.await until([%2]) timeout_millis(%c-1_i32) : i32 | |
util.status.check_ok %status, "failed to wait on timepoint" | |
return | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment