Skip to content

Instantly share code, notes, and snippets.

@philipturner
Created October 16, 2025 12:47
Show Gist options
  • Select an option

  • Save philipturner/d8dd53c319ba31056cffb18ff79394ab to your computer and use it in GitHub Desktop.

Select an option

Save philipturner/d8dd53c319ba31056cffb18ff79394ab to your computer and use it in GitHub Desktop.
Rotating beam benchmark
// For profiling with D3D12 timestamp queries.
#if os(Windows)
import SwiftCOM
import WinSDK
#endif
// TODO: Before finishing the acceleration structure PR, remove the public
// modifier for the functions in this extension.
extension Application {
public func checkCrashBuffer(frameID: Int) {
if frameID >= 3 {
let elementCount = CounterResources.crashBufferSize / 4
var output = [UInt32](repeating: .zero, count: elementCount)
bvhBuilder.counters.crashBuffer.read(
data: &output,
inFlightFrameID: frameID % 3)
if output[0] != 1 {
var crashInfoDesc = CrashInfoDescriptor()
crashInfoDesc.bufferContents = output
crashInfoDesc.clockFrames = clock.frames
crashInfoDesc.displayFrameRate = display.frameRate
crashInfoDesc.frameID = frameID
crashInfoDesc.memorySlotCount = bvhBuilder.voxels.memorySlotCount
crashInfoDesc.worldDimension = bvhBuilder.voxels.worldDimension
let crashInfo = CrashInfo(descriptor: crashInfoDesc)
fatalError(crashInfo.message)
}
}
}
public func checkExecutionTime(frameID: Int) {
if frameID >= 3 {
#if os(Windows)
let destinationBuffer = bvhBuilder.counters
.queryDestinationBuffers[frameID % 3]
var output = [UInt64](repeating: .zero, count: 4)
output.withUnsafeMutableBytes { bufferPointer in
destinationBuffer.read(output: bufferPointer)
}
let timestampFrequency = try! device.commandQueue.d3d12CommandQueue
.GetTimestampFrequency()
func latencyMicroseconds(startIndex: Int) -> Int {
let startCounter = output[startIndex]
let endCounter = output[startIndex + 1]
var elapsedTime = Double(endCounter - startCounter)
elapsedTime /= Double(timestampFrequency)
return Int(elapsedTime * 1e6)
}
let updateBVHLatency = latencyMicroseconds(startIndex: 0)
let renderLatency = latencyMicroseconds(startIndex: 2)
#else
var updateBVHLatency: Int = 0
var renderLatency: Int = 0
bvhBuilder.counters.queue.sync {
updateBVHLatency = bvhBuilder.counters
.updateBVHLatencies[frameID % 3]
renderLatency = bvhBuilder.counters
.renderLatencies[frameID % 3]
}
#endif
print("update BVH:", updateBVHLatency, "μs")
print("render:", renderLatency, "μs")
}
}
public func updateBVH(inFlightFrameID: Int) {
let transaction = atoms.registerChanges()
// print()
// print("removed:", transaction.removedIDs.count)
// print("moved:", transaction.movedIDs.count)
// print("added:", transaction.addedIDs.count)
device.commandQueue.withCommandList { commandList in
#if os(Windows)
try! commandList.d3d12CommandList.EndQuery(
bvhBuilder.counters.queryHeap,
D3D12_QUERY_TYPE_TIMESTAMP,
0)
// Bind the descriptor heap.
commandList.setDescriptorHeap(descriptorHeap)
#endif
bvhBuilder.purgeResources(
commandList: commandList)
bvhBuilder.setupGeneralCounters(
commandList: commandList)
bvhBuilder.upload(
transaction: transaction,
commandList: commandList,
inFlightFrameID: inFlightFrameID)
// Encode the remove process.
bvhBuilder.removeProcess1(
commandList: commandList,
inFlightFrameID: inFlightFrameID)
bvhBuilder.removeProcess2(
commandList: commandList)
bvhBuilder.removeProcess3(
commandList: commandList)
bvhBuilder.removeProcess4(
commandList: commandList)
// Encode the add process.
bvhBuilder.addProcess1(
commandList: commandList,
inFlightFrameID: inFlightFrameID)
bvhBuilder.addProcess2(
commandList: commandList)
bvhBuilder.addProcess3(
commandList: commandList,
inFlightFrameID: inFlightFrameID)
// Encode the rebuild process.
bvhBuilder.rebuildProcess1(
commandList: commandList)
bvhBuilder.rebuildProcess2(
commandList: commandList)
bvhBuilder.counters.crashBuffer.download(
commandList: commandList,
inFlightFrameID: inFlightFrameID)
#if os(Windows)
try! commandList.d3d12CommandList.EndQuery(
bvhBuilder.counters.queryHeap,
D3D12_QUERY_TYPE_TIMESTAMP,
1)
let destinationBuffer = bvhBuilder.counters
.queryDestinationBuffers[inFlightFrameID]
try! commandList.d3d12CommandList.ResolveQueryData(
bvhBuilder.counters.queryHeap,
D3D12_QUERY_TYPE_TIMESTAMP,
0,
2,
destinationBuffer.d3d12Resource,
0)
#endif
#if os(macOS)
nonisolated(unsafe)
let selfReference = self
commandList.mtlCommandBuffer.addCompletedHandler { commandBuffer in
selfReference.bvhBuilder.counters.queue.sync {
var executionTime = commandBuffer.gpuEndTime
executionTime -= commandBuffer.gpuStartTime
let latencyMicroseconds = Int(executionTime * 1e6)
selfReference.bvhBuilder.counters
.updateBVHLatencies[inFlightFrameID] = latencyMicroseconds
}
}
#endif
}
}
public func forgetIdleState(inFlightFrameID: Int) {
device.commandQueue.withCommandList { commandList in
// Bind the descriptor heap.
#if os(Windows)
commandList.setDescriptorHeap(descriptorHeap)
#endif
bvhBuilder.resetMotionVectors(
commandList: commandList,
inFlightFrameID: inFlightFrameID)
bvhBuilder.resetVoxelMarks(
commandList: commandList)
#if os(Windows)
bvhBuilder.computeUAVBarrier(commandList: commandList)
#endif
}
// Delete the transactionArgs state variable.
bvhBuilder.transactionArgs = nil
}
}
// TODO: Before finishing the acceleration structure PR, remove these debugging
// utilities from the code base.
extension Application {
// Circumvent a flaky crash by holding a reference to the buffer while the
// command list executes. Do not abuse this by calling any of the 'Debug'
// functions more than once in a single program execution.
nonisolated(unsafe)
private static var downloadBuffers: [Buffer] = []
public func downloadGeneralCounters() -> [UInt32] {
func copySourceBuffer() -> Buffer {
bvhBuilder.counters.general
}
var output = [UInt32](repeating: .zero, count: 10)
downloadDebugOutput(
&output, copySourceBuffer: copySourceBuffer())
return output
}
private func downloadDebugOutput<T>(
_ outputData: inout [T],
copySourceBuffer: Buffer
) {
#if os(macOS)
let outputBuffer = copySourceBuffer
#else
let nativeBuffer = copySourceBuffer
var bufferDesc = BufferDescriptor()
bufferDesc.device = device
bufferDesc.size = nativeBuffer.size
bufferDesc.type = .output
let outputBuffer = Buffer(descriptor: bufferDesc)
#endif
Self.downloadBuffers.append(outputBuffer)
#if os(Windows)
device.commandQueue.withCommandList { commandList in
commandList.download(
nativeBuffer: nativeBuffer,
outputBuffer: outputBuffer)
}
#endif
device.commandQueue.flush()
outputData.withUnsafeMutableBytes { bufferPointer in
outputBuffer.read(output: bufferPointer)
}
}
}
import Foundation
import HDL
import MolecularRenderer
import QuaternionModule
// Current task:
// - Archive the current state of the rotating beam benchmark in Tests.
// - Clean up the backend code, archive 'checkExecutionTime' to a GitHub gist.
// - That will conclude the implementation of the BVH update process.
// MARK: - Compile Structures
func passivate(topology: inout Topology) {
func createHydrogen(
atomID: UInt32,
orbital: SIMD3<Float>
) -> Atom {
let atom = topology.atoms[Int(atomID)]
var bondLength = atom.element.covalentRadius
bondLength += Element.hydrogen.covalentRadius
let position = atom.position + bondLength * orbital
return Atom(position: position, element: .hydrogen)
}
let orbitalLists = topology.nonbondingOrbitals()
var insertedAtoms: [Atom] = []
var insertedBonds: [SIMD2<UInt32>] = []
for atomID in topology.atoms.indices {
let orbitalList = orbitalLists[atomID]
for orbital in orbitalList {
let hydrogen = createHydrogen(
atomID: UInt32(atomID),
orbital: orbital)
let hydrogenID = topology.atoms.count + insertedAtoms.count
insertedAtoms.append(hydrogen)
let bond = SIMD2(
UInt32(atomID),
UInt32(hydrogenID))
insertedBonds.append(bond)
}
}
topology.atoms += insertedAtoms
topology.bonds += insertedBonds
}
let crossThickness: Int = 16
let crossSize: Int = 120
let beamDepth: Int = 112
let worldDimension: Float = 96
func createCross() -> Topology {
let lattice = Lattice<Cubic> { h, k, l in
Bounds {
Float(crossSize) * h +
Float(crossSize) * k +
Float(2) * l
}
Material { .checkerboard(.silicon, .carbon) }
for isPositiveX in [false, true] {
for isPositiveY in [false, true] {
let halfSize = Float(crossSize) / 2
let center = halfSize * h + halfSize * k
let directionX = isPositiveX ? h : -h
let directionY = isPositiveY ? k : -k
let halfThickness = Float(crossThickness) / 2
Volume {
Concave {
Convex {
Origin { center + halfThickness * directionX }
Plane { isPositiveX ? h : -h }
}
Convex {
Origin { center + halfThickness * directionY }
Plane { isPositiveY ? k : -k }
}
}
Replace { .empty }
}
}
}
}
var reconstruction = Reconstruction()
reconstruction.atoms = lattice.atoms
reconstruction.material = .checkerboard(.silicon, .carbon)
var topology = reconstruction.compile()
passivate(topology: &topology)
for atomID in topology.atoms.indices {
var atom = topology.atoms[atomID]
// This offset captures just one Si and one C for each unit cell on the
// (001) surface. By capture, I mean that atom.position.z > 0. We want a
// small number of static atoms in a 2 nm voxel that overlaps some moving
// atoms.
atom.position += SIMD3(0, 0, -0.800)
// Shift the origin to allow larger beam depth, with fixed world dimension.
atom.position.z -= worldDimension / 2
atom.position.z += 8
// Shift so the structure is centered in X and Y.
let latticeConstant = Constant(.square) {
.checkerboard(.silicon, .carbon)
}
let halfSize = Float(crossSize) / 2
atom.position.x -= halfSize * latticeConstant
atom.position.y -= halfSize * latticeConstant
topology.atoms[atomID] = atom
}
return topology
}
func createBeam() -> Topology {
let lattice = Lattice<Cubic> { h, k, l in
Bounds {
Float(crossThickness) * h +
Float(crossSize) * k +
Float(beamDepth) * l
}
Material { .checkerboard(.silicon, .carbon) }
}
var reconstruction = Reconstruction()
reconstruction.atoms = lattice.atoms
reconstruction.material = .checkerboard(.silicon, .carbon)
var topology = reconstruction.compile()
passivate(topology: &topology)
for atomID in topology.atoms.indices {
var atom = topology.atoms[atomID]
// Capture just one Si and one C for each unit cell. This time, capturing
// happens if atom.position.z < 0.
atom.position += SIMD3(0, 0, -0.090)
// Shift so both captured surfaces fall in the [0, 2] nm range for sharing
// a voxel.
atom.position.z += 2
// Shift the origin to allow larger beam depth, with fixed world dimension.
atom.position.z -= worldDimension / 2
atom.position.z += 8
// Shift so the structure is centered in X and Y.
let latticeConstant = Constant(.square) {
.checkerboard(.silicon, .carbon)
}
let halfThickness = Float(crossThickness) / 2
let halfSize = Float(crossSize) / 2
atom.position.x -= halfThickness * latticeConstant
atom.position.y -= halfSize * latticeConstant
topology.atoms[atomID] = atom
}
return topology
}
func analyze(topology: Topology) {
print()
print("atom count:", topology.atoms.count)
do {
var minimum = SIMD3<Float>(repeating: .greatestFiniteMagnitude)
var maximum = SIMD3<Float>(repeating: -.greatestFiniteMagnitude)
for atom in topology.atoms {
let position = atom.position
minimum.replace(with: position, where: position .< minimum)
maximum.replace(with: position, where: position .> maximum)
}
print("minimum:", minimum)
print("maximum:", maximum)
}
}
let cross = createCross()
let beam = createBeam()
analyze(topology: cross)
analyze(topology: beam)
// MARK: - Rotation Animation
@MainActor
func createRotatedBeam(frameID: Int) -> Topology {
// 0.5 Hz -> 3 degrees/frame @ 60 Hz
//
// WARNING: Systems with different display refresh rates may have different
// benchmark results. The benchmark should be robust to this variation in
// degrees/frame.
//
// Solution: animate by clock.frames instead of the actual time. On 120 Hz
// systems, the benchmark will rotate 2x faster than on 60 Hz systems.
let angleDegrees: Float = 3 * Float(frameID)
let rotation = Quaternion<Float>(
angle: angleDegrees * Float.pi / 180,
axis: SIMD3(0, 0, 1))
// Circumvent a massive CPU-side bottleneck from 'rotation.act()'.
let basis0 = rotation.act(on: SIMD3<Float>(1, 0, 0))
let basis1 = rotation.act(on: SIMD3<Float>(0, 1, 0))
let basis2 = rotation.act(on: SIMD3<Float>(0, 0, 1))
let start = Date()
var topology = beam
for atomID in topology.atoms.indices {
var atom = topology.atoms[atomID]
var rotatedPosition: SIMD3<Float> = .zero
rotatedPosition += basis0 * atom.position[0]
rotatedPosition += basis1 * atom.position[1]
rotatedPosition += basis2 * atom.position[2]
atom.position = rotatedPosition
topology.atoms[atomID] = atom
}
let end = Date()
let rotateLatency = end.timeIntervalSince(start)
let rotateLatencyMicroseconds = Int(rotateLatency * 1e6)
print("rotate:", rotateLatencyMicroseconds, "μs")
return topology
}
// MARK: - Launch Application
@MainActor
func createApplication() -> Application {
// Set up the device.
var deviceDesc = DeviceDescriptor()
deviceDesc.deviceID = Device.fastestDeviceID
let device = Device(descriptor: deviceDesc)
// Set up the display.
var displayDesc = DisplayDescriptor()
displayDesc.device = device
displayDesc.frameBufferSize = SIMD2<Int>(1080, 1080)
displayDesc.monitorID = device.fastestMonitorID
let display = Display(descriptor: displayDesc)
// Set up the application.
var applicationDesc = ApplicationDescriptor()
applicationDesc.device = device
applicationDesc.display = display
applicationDesc.upscaleFactor = 1
applicationDesc.addressSpaceSize = 4_000_000
applicationDesc.voxelAllocationSize = 500_000_000
applicationDesc.worldDimension = worldDimension
let application = Application(descriptor: applicationDesc)
return application
}
let application = createApplication()
#if false
application.run {
let image = application.render()
application.present(image: image)
}
#else
@MainActor
func analyzeGeneralCounters() {
let output = application.downloadGeneralCounters()
print("atoms removed voxel count:", output[0])
guard output[1] == 1,
output[2] == 1 else {
fatalError("Indirect dispatch arguments were malformatted.")
}
print("vacant slot count:", output[4])
print("allocated slot count:", output[5])
print("rebuilt voxel count:", output[6])
guard output[7] == 1,
output[8] == 1 else {
fatalError("Indirect dispatch arguments were malformatted.")
}
}
for atomID in cross.atoms.indices {
let atom = cross.atoms[atomID]
application.atoms[atomID] = atom
}
for frameID in 0..<16 {
// print()
// print("===============")
// print("=== frame \(frameID) ===")
// print("===============")
// print()
// print("rotation: \(frameID * 3) degrees")
print()
let rotatedBeam = createRotatedBeam(frameID: frameID)
for atomID in rotatedBeam.atoms.indices {
let atom = rotatedBeam.atoms[atomID]
let offset = cross.atoms.count
application.atoms[offset + atomID] = atom
}
application.checkCrashBuffer(frameID: frameID)
application.checkExecutionTime(frameID: frameID)
application.updateBVH(inFlightFrameID: frameID % 3)
application.forgetIdleState(inFlightFrameID: frameID % 3)
// print()
// analyzeGeneralCounters()
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment