Last active
November 6, 2022 17:04
-
-
Save raphlinus/e355655df39c94287cc1db36f57797d2 to your computer and use it in GitHub Desktop.
Minimal metal repro of piet-gpu#199
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
import Metal | |
// This is translated by naga from tile_alloc.wgsl | |
// naga --buffer-bounds-check-policy ReadZeroSkipWrite tile_alloc.wgsl tile_alloc.metal | |
let metalProgram = """ | |
// language: metal2.0 | |
#include <metal_stdlib> | |
#include <simd/simd.h> | |
using metal::uint; | |
struct DefaultConstructible { | |
template<typename T> | |
operator T() && { | |
return T {}; | |
} | |
}; | |
struct _mslBufferSizes { | |
uint size1; | |
}; | |
constexpr constant unsigned WG_SIZE = 256u; | |
typedef uint type_2[1]; | |
struct main_Input { | |
}; | |
kernel void main_( | |
metal::uint3 global_id [[thread_position_in_grid]] | |
, metal::uint3 local_id [[thread_position_in_threadgroup]] | |
, device metal::atomic_uint& bump [[user(fake0)]] | |
, device type_2& paths [[user(fake0)]] | |
, threadgroup uint& sh_tile_offset | |
, constant _mslBufferSizes& _buffer_sizes [[user(fake0)]] | |
) { | |
uint drawobj_ix = global_id.x; | |
uint tile_count_in = local_id.x + 1u; | |
if (local_id.x == (WG_SIZE - 1u)) { | |
uint _e15 = metal::atomic_fetch_add_explicit(&bump, tile_count_in, metal::memory_order_relaxed); | |
sh_tile_offset = 1u + _e15; | |
} | |
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); | |
uint tile_offset = sh_tile_offset; | |
if (drawobj_ix < 3u) { | |
if (uint(drawobj_ix) < 1 + (_buffer_sizes.size1 - 0 - 4) / 4) { | |
paths[drawobj_ix] = tile_offset; | |
} | |
return; | |
} else { | |
return; | |
} | |
} | |
""" | |
func barrierTest() { | |
do { | |
let device = MTLCreateSystemDefaultDevice()! | |
let queue = device.makeCommandQueue()! | |
let cmdbuf = queue.makeCommandBuffer()! | |
let library = try device.makeLibrary(source: metalProgram, options: | |
nil) | |
let function = library.makeFunction(name: "main_")! | |
let pipeline = try device.makeComputePipelineState(function: function) | |
let sharedBuf = MTLResourceOptions.storageModeShared | |
let buf0 = device.makeBuffer(length: 1024, options: sharedBuf)! | |
let buf1 = device.makeBuffer(length: 1024, options: sharedBuf)! | |
let encoder = cmdbuf.makeComputeCommandEncoder()! | |
encoder.setComputePipelineState(pipeline) | |
encoder.setBuffer(buf0, offset: 0, index: 0) | |
encoder.setBuffer(buf1, offset: 0, index: 1) | |
let sizes: [UInt32] = [1024]; | |
encoder.setBytes(sizes, length: 4, index: 2) | |
encoder.setThreadgroupMemoryLength(16, index: 0) | |
encoder.dispatchThreadgroups(MTLSizeMake(1, 1, 1), threadsPerThreadgroup: MTLSizeMake(256, 1, 1)) | |
encoder.endEncoding() | |
cmdbuf.commit() | |
cmdbuf.waitUntilCompleted() | |
let contents = buf1.contents() | |
for i in 0..<4 { | |
print(i, contents.load(fromByteOffset: i * 4, as: UInt32.self)) | |
} | |
} catch { | |
print("got an error") | |
} | |
} | |
barrierTest() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment