Created
October 16, 2025 12:47
-
-
Save philipturner/d8dd53c319ba31056cffb18ff79394ab to your computer and use it in GitHub Desktop.
Rotating beam benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // For profiling with D3D12 timestamp queries. | |
| #if os(Windows) | |
| import SwiftCOM | |
| import WinSDK | |
| #endif | |
| // TODO: Before finishing the acceleration structure PR, remove the public | |
| // modifier for the functions in this extension. | |
| extension Application { | |
| public func checkCrashBuffer(frameID: Int) { | |
| if frameID >= 3 { | |
| let elementCount = CounterResources.crashBufferSize / 4 | |
| var output = [UInt32](repeating: .zero, count: elementCount) | |
| bvhBuilder.counters.crashBuffer.read( | |
| data: &output, | |
| inFlightFrameID: frameID % 3) | |
| if output[0] != 1 { | |
| var crashInfoDesc = CrashInfoDescriptor() | |
| crashInfoDesc.bufferContents = output | |
| crashInfoDesc.clockFrames = clock.frames | |
| crashInfoDesc.displayFrameRate = display.frameRate | |
| crashInfoDesc.frameID = frameID | |
| crashInfoDesc.memorySlotCount = bvhBuilder.voxels.memorySlotCount | |
| crashInfoDesc.worldDimension = bvhBuilder.voxels.worldDimension | |
| let crashInfo = CrashInfo(descriptor: crashInfoDesc) | |
| fatalError(crashInfo.message) | |
| } | |
| } | |
| } | |
| public func checkExecutionTime(frameID: Int) { | |
| if frameID >= 3 { | |
| #if os(Windows) | |
| let destinationBuffer = bvhBuilder.counters | |
| .queryDestinationBuffers[frameID % 3] | |
| var output = [UInt64](repeating: .zero, count: 4) | |
| output.withUnsafeMutableBytes { bufferPointer in | |
| destinationBuffer.read(output: bufferPointer) | |
| } | |
| let timestampFrequency = try! device.commandQueue.d3d12CommandQueue | |
| .GetTimestampFrequency() | |
| func latencyMicroseconds(startIndex: Int) -> Int { | |
| let startCounter = output[startIndex] | |
| let endCounter = output[startIndex + 1] | |
| var elapsedTime = Double(endCounter - startCounter) | |
| elapsedTime /= Double(timestampFrequency) | |
| return Int(elapsedTime * 1e6) | |
| } | |
| let updateBVHLatency = latencyMicroseconds(startIndex: 0) | |
| let renderLatency = latencyMicroseconds(startIndex: 2) | |
| #else | |
| var updateBVHLatency: Int = 0 | |
| var renderLatency: Int = 0 | |
| bvhBuilder.counters.queue.sync { | |
| updateBVHLatency = bvhBuilder.counters | |
| .updateBVHLatencies[frameID % 3] | |
| renderLatency = bvhBuilder.counters | |
| .renderLatencies[frameID % 3] | |
| } | |
| #endif | |
| print("update BVH:", updateBVHLatency, "μs") | |
| print("render:", renderLatency, "μs") | |
| } | |
| } | |
| public func updateBVH(inFlightFrameID: Int) { | |
| let transaction = atoms.registerChanges() | |
| // print() | |
| // print("removed:", transaction.removedIDs.count) | |
| // print("moved:", transaction.movedIDs.count) | |
| // print("added:", transaction.addedIDs.count) | |
| device.commandQueue.withCommandList { commandList in | |
| #if os(Windows) | |
| try! commandList.d3d12CommandList.EndQuery( | |
| bvhBuilder.counters.queryHeap, | |
| D3D12_QUERY_TYPE_TIMESTAMP, | |
| 0) | |
| // Bind the descriptor heap. | |
| commandList.setDescriptorHeap(descriptorHeap) | |
| #endif | |
| bvhBuilder.purgeResources( | |
| commandList: commandList) | |
| bvhBuilder.setupGeneralCounters( | |
| commandList: commandList) | |
| bvhBuilder.upload( | |
| transaction: transaction, | |
| commandList: commandList, | |
| inFlightFrameID: inFlightFrameID) | |
| // Encode the remove process. | |
| bvhBuilder.removeProcess1( | |
| commandList: commandList, | |
| inFlightFrameID: inFlightFrameID) | |
| bvhBuilder.removeProcess2( | |
| commandList: commandList) | |
| bvhBuilder.removeProcess3( | |
| commandList: commandList) | |
| bvhBuilder.removeProcess4( | |
| commandList: commandList) | |
| // Encode the add process. | |
| bvhBuilder.addProcess1( | |
| commandList: commandList, | |
| inFlightFrameID: inFlightFrameID) | |
| bvhBuilder.addProcess2( | |
| commandList: commandList) | |
| bvhBuilder.addProcess3( | |
| commandList: commandList, | |
| inFlightFrameID: inFlightFrameID) | |
| // Encode the rebuild process. | |
| bvhBuilder.rebuildProcess1( | |
| commandList: commandList) | |
| bvhBuilder.rebuildProcess2( | |
| commandList: commandList) | |
| bvhBuilder.counters.crashBuffer.download( | |
| commandList: commandList, | |
| inFlightFrameID: inFlightFrameID) | |
| #if os(Windows) | |
| try! commandList.d3d12CommandList.EndQuery( | |
| bvhBuilder.counters.queryHeap, | |
| D3D12_QUERY_TYPE_TIMESTAMP, | |
| 1) | |
| let destinationBuffer = bvhBuilder.counters | |
| .queryDestinationBuffers[inFlightFrameID] | |
| try! commandList.d3d12CommandList.ResolveQueryData( | |
| bvhBuilder.counters.queryHeap, | |
| D3D12_QUERY_TYPE_TIMESTAMP, | |
| 0, | |
| 2, | |
| destinationBuffer.d3d12Resource, | |
| 0) | |
| #endif | |
| #if os(macOS) | |
| nonisolated(unsafe) | |
| let selfReference = self | |
| commandList.mtlCommandBuffer.addCompletedHandler { commandBuffer in | |
| selfReference.bvhBuilder.counters.queue.sync { | |
| var executionTime = commandBuffer.gpuEndTime | |
| executionTime -= commandBuffer.gpuStartTime | |
| let latencyMicroseconds = Int(executionTime * 1e6) | |
| selfReference.bvhBuilder.counters | |
| .updateBVHLatencies[inFlightFrameID] = latencyMicroseconds | |
| } | |
| } | |
| #endif | |
| } | |
| } | |
| public func forgetIdleState(inFlightFrameID: Int) { | |
| device.commandQueue.withCommandList { commandList in | |
| // Bind the descriptor heap. | |
| #if os(Windows) | |
| commandList.setDescriptorHeap(descriptorHeap) | |
| #endif | |
| bvhBuilder.resetMotionVectors( | |
| commandList: commandList, | |
| inFlightFrameID: inFlightFrameID) | |
| bvhBuilder.resetVoxelMarks( | |
| commandList: commandList) | |
| #if os(Windows) | |
| bvhBuilder.computeUAVBarrier(commandList: commandList) | |
| #endif | |
| } | |
| // Delete the transactionArgs state variable. | |
| bvhBuilder.transactionArgs = nil | |
| } | |
| } | |
| // TODO: Before finishing the acceleration structure PR, remove these debugging | |
| // utilities from the code base. | |
| extension Application { | |
| // Circumvent a flaky crash by holding a reference to the buffer while the | |
| // command list executes. Do not abuse this by calling any of the 'Debug' | |
| // functions more than once in a single program execution. | |
| nonisolated(unsafe) | |
| private static var downloadBuffers: [Buffer] = [] | |
| public func downloadGeneralCounters() -> [UInt32] { | |
| func copySourceBuffer() -> Buffer { | |
| bvhBuilder.counters.general | |
| } | |
| var output = [UInt32](repeating: .zero, count: 10) | |
| downloadDebugOutput( | |
| &output, copySourceBuffer: copySourceBuffer()) | |
| return output | |
| } | |
| private func downloadDebugOutput<T>( | |
| _ outputData: inout [T], | |
| copySourceBuffer: Buffer | |
| ) { | |
| #if os(macOS) | |
| let outputBuffer = copySourceBuffer | |
| #else | |
| let nativeBuffer = copySourceBuffer | |
| var bufferDesc = BufferDescriptor() | |
| bufferDesc.device = device | |
| bufferDesc.size = nativeBuffer.size | |
| bufferDesc.type = .output | |
| let outputBuffer = Buffer(descriptor: bufferDesc) | |
| #endif | |
| Self.downloadBuffers.append(outputBuffer) | |
| #if os(Windows) | |
| device.commandQueue.withCommandList { commandList in | |
| commandList.download( | |
| nativeBuffer: nativeBuffer, | |
| outputBuffer: outputBuffer) | |
| } | |
| #endif | |
| device.commandQueue.flush() | |
| outputData.withUnsafeMutableBytes { bufferPointer in | |
| outputBuffer.read(output: bufferPointer) | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import Foundation | |
| import HDL | |
| import MolecularRenderer | |
| import QuaternionModule | |
| // Current task: | |
| // - Archive the current state of the rotating beam benchmark in Tests. | |
| // - Clean up the backend code, archive 'checkExecutionTime' to a GitHub gist. | |
| // - That will conclude the implementation of the BVH update process. | |
| // MARK: - Compile Structures | |
| func passivate(topology: inout Topology) { | |
| func createHydrogen( | |
| atomID: UInt32, | |
| orbital: SIMD3<Float> | |
| ) -> Atom { | |
| let atom = topology.atoms[Int(atomID)] | |
| var bondLength = atom.element.covalentRadius | |
| bondLength += Element.hydrogen.covalentRadius | |
| let position = atom.position + bondLength * orbital | |
| return Atom(position: position, element: .hydrogen) | |
| } | |
| let orbitalLists = topology.nonbondingOrbitals() | |
| var insertedAtoms: [Atom] = [] | |
| var insertedBonds: [SIMD2<UInt32>] = [] | |
| for atomID in topology.atoms.indices { | |
| let orbitalList = orbitalLists[atomID] | |
| for orbital in orbitalList { | |
| let hydrogen = createHydrogen( | |
| atomID: UInt32(atomID), | |
| orbital: orbital) | |
| let hydrogenID = topology.atoms.count + insertedAtoms.count | |
| insertedAtoms.append(hydrogen) | |
| let bond = SIMD2( | |
| UInt32(atomID), | |
| UInt32(hydrogenID)) | |
| insertedBonds.append(bond) | |
| } | |
| } | |
| topology.atoms += insertedAtoms | |
| topology.bonds += insertedBonds | |
| } | |
| let crossThickness: Int = 16 | |
| let crossSize: Int = 120 | |
| let beamDepth: Int = 112 | |
| let worldDimension: Float = 96 | |
| func createCross() -> Topology { | |
| let lattice = Lattice<Cubic> { h, k, l in | |
| Bounds { | |
| Float(crossSize) * h + | |
| Float(crossSize) * k + | |
| Float(2) * l | |
| } | |
| Material { .checkerboard(.silicon, .carbon) } | |
| for isPositiveX in [false, true] { | |
| for isPositiveY in [false, true] { | |
| let halfSize = Float(crossSize) / 2 | |
| let center = halfSize * h + halfSize * k | |
| let directionX = isPositiveX ? h : -h | |
| let directionY = isPositiveY ? k : -k | |
| let halfThickness = Float(crossThickness) / 2 | |
| Volume { | |
| Concave { | |
| Convex { | |
| Origin { center + halfThickness * directionX } | |
| Plane { isPositiveX ? h : -h } | |
| } | |
| Convex { | |
| Origin { center + halfThickness * directionY } | |
| Plane { isPositiveY ? k : -k } | |
| } | |
| } | |
| Replace { .empty } | |
| } | |
| } | |
| } | |
| } | |
| var reconstruction = Reconstruction() | |
| reconstruction.atoms = lattice.atoms | |
| reconstruction.material = .checkerboard(.silicon, .carbon) | |
| var topology = reconstruction.compile() | |
| passivate(topology: &topology) | |
| for atomID in topology.atoms.indices { | |
| var atom = topology.atoms[atomID] | |
| // This offset captures just one Si and one C for each unit cell on the | |
| // (001) surface. By capture, I mean that atom.position.z > 0. We want a | |
| // small number of static atoms in a 2 nm voxel that overlaps some moving | |
| // atoms. | |
| atom.position += SIMD3(0, 0, -0.800) | |
| // Shift the origin to allow larger beam depth, with fixed world dimension. | |
| atom.position.z -= worldDimension / 2 | |
| atom.position.z += 8 | |
| // Shift so the structure is centered in X and Y. | |
| let latticeConstant = Constant(.square) { | |
| .checkerboard(.silicon, .carbon) | |
| } | |
| let halfSize = Float(crossSize) / 2 | |
| atom.position.x -= halfSize * latticeConstant | |
| atom.position.y -= halfSize * latticeConstant | |
| topology.atoms[atomID] = atom | |
| } | |
| return topology | |
| } | |
| func createBeam() -> Topology { | |
| let lattice = Lattice<Cubic> { h, k, l in | |
| Bounds { | |
| Float(crossThickness) * h + | |
| Float(crossSize) * k + | |
| Float(beamDepth) * l | |
| } | |
| Material { .checkerboard(.silicon, .carbon) } | |
| } | |
| var reconstruction = Reconstruction() | |
| reconstruction.atoms = lattice.atoms | |
| reconstruction.material = .checkerboard(.silicon, .carbon) | |
| var topology = reconstruction.compile() | |
| passivate(topology: &topology) | |
| for atomID in topology.atoms.indices { | |
| var atom = topology.atoms[atomID] | |
| // Capture just one Si and one C for each unit cell. This time, capturing | |
| // happens if atom.position.z < 0. | |
| atom.position += SIMD3(0, 0, -0.090) | |
| // Shift so both captured surfaces fall in the [0, 2] nm range for sharing | |
| // a voxel. | |
| atom.position.z += 2 | |
| // Shift the origin to allow larger beam depth, with fixed world dimension. | |
| atom.position.z -= worldDimension / 2 | |
| atom.position.z += 8 | |
| // Shift so the structure is centered in X and Y. | |
| let latticeConstant = Constant(.square) { | |
| .checkerboard(.silicon, .carbon) | |
| } | |
| let halfThickness = Float(crossThickness) / 2 | |
| let halfSize = Float(crossSize) / 2 | |
| atom.position.x -= halfThickness * latticeConstant | |
| atom.position.y -= halfSize * latticeConstant | |
| topology.atoms[atomID] = atom | |
| } | |
| return topology | |
| } | |
| func analyze(topology: Topology) { | |
| print() | |
| print("atom count:", topology.atoms.count) | |
| do { | |
| var minimum = SIMD3<Float>(repeating: .greatestFiniteMagnitude) | |
| var maximum = SIMD3<Float>(repeating: -.greatestFiniteMagnitude) | |
| for atom in topology.atoms { | |
| let position = atom.position | |
| minimum.replace(with: position, where: position .< minimum) | |
| maximum.replace(with: position, where: position .> maximum) | |
| } | |
| print("minimum:", minimum) | |
| print("maximum:", maximum) | |
| } | |
| } | |
| let cross = createCross() | |
| let beam = createBeam() | |
| analyze(topology: cross) | |
| analyze(topology: beam) | |
| // MARK: - Rotation Animation | |
| @MainActor | |
| func createRotatedBeam(frameID: Int) -> Topology { | |
| // 0.5 Hz -> 3 degrees/frame @ 60 Hz | |
| // | |
| // WARNING: Systems with different display refresh rates may have different | |
| // benchmark results. The benchmark should be robust to this variation in | |
| // degrees/frame. | |
| // | |
| // Solution: animate by clock.frames instead of the actual time. On 120 Hz | |
| // systems, the benchmark will rotate 2x faster than on 60 Hz systems. | |
| let angleDegrees: Float = 3 * Float(frameID) | |
| let rotation = Quaternion<Float>( | |
| angle: angleDegrees * Float.pi / 180, | |
| axis: SIMD3(0, 0, 1)) | |
| // Circumvent a massive CPU-side bottleneck from 'rotation.act()'. | |
| let basis0 = rotation.act(on: SIMD3<Float>(1, 0, 0)) | |
| let basis1 = rotation.act(on: SIMD3<Float>(0, 1, 0)) | |
| let basis2 = rotation.act(on: SIMD3<Float>(0, 0, 1)) | |
| let start = Date() | |
| var topology = beam | |
| for atomID in topology.atoms.indices { | |
| var atom = topology.atoms[atomID] | |
| var rotatedPosition: SIMD3<Float> = .zero | |
| rotatedPosition += basis0 * atom.position[0] | |
| rotatedPosition += basis1 * atom.position[1] | |
| rotatedPosition += basis2 * atom.position[2] | |
| atom.position = rotatedPosition | |
| topology.atoms[atomID] = atom | |
| } | |
| let end = Date() | |
| let rotateLatency = end.timeIntervalSince(start) | |
| let rotateLatencyMicroseconds = Int(rotateLatency * 1e6) | |
| print("rotate:", rotateLatencyMicroseconds, "μs") | |
| return topology | |
| } | |
| // MARK: - Launch Application | |
| @MainActor | |
| func createApplication() -> Application { | |
| // Set up the device. | |
| var deviceDesc = DeviceDescriptor() | |
| deviceDesc.deviceID = Device.fastestDeviceID | |
| let device = Device(descriptor: deviceDesc) | |
| // Set up the display. | |
| var displayDesc = DisplayDescriptor() | |
| displayDesc.device = device | |
| displayDesc.frameBufferSize = SIMD2<Int>(1080, 1080) | |
| displayDesc.monitorID = device.fastestMonitorID | |
| let display = Display(descriptor: displayDesc) | |
| // Set up the application. | |
| var applicationDesc = ApplicationDescriptor() | |
| applicationDesc.device = device | |
| applicationDesc.display = display | |
| applicationDesc.upscaleFactor = 1 | |
| applicationDesc.addressSpaceSize = 4_000_000 | |
| applicationDesc.voxelAllocationSize = 500_000_000 | |
| applicationDesc.worldDimension = worldDimension | |
| let application = Application(descriptor: applicationDesc) | |
| return application | |
| } | |
| let application = createApplication() | |
| #if false | |
| application.run { | |
| let image = application.render() | |
| application.present(image: image) | |
| } | |
| #else | |
| @MainActor | |
| func analyzeGeneralCounters() { | |
| let output = application.downloadGeneralCounters() | |
| print("atoms removed voxel count:", output[0]) | |
| guard output[1] == 1, | |
| output[2] == 1 else { | |
| fatalError("Indirect dispatch arguments were malformatted.") | |
| } | |
| print("vacant slot count:", output[4]) | |
| print("allocated slot count:", output[5]) | |
| print("rebuilt voxel count:", output[6]) | |
| guard output[7] == 1, | |
| output[8] == 1 else { | |
| fatalError("Indirect dispatch arguments were malformatted.") | |
| } | |
| } | |
| for atomID in cross.atoms.indices { | |
| let atom = cross.atoms[atomID] | |
| application.atoms[atomID] = atom | |
| } | |
| for frameID in 0..<16 { | |
| // print() | |
| // print("===============") | |
| // print("=== frame \(frameID) ===") | |
| // print("===============") | |
| // print() | |
| // print("rotation: \(frameID * 3) degrees") | |
| print() | |
| let rotatedBeam = createRotatedBeam(frameID: frameID) | |
| for atomID in rotatedBeam.atoms.indices { | |
| let atom = rotatedBeam.atoms[atomID] | |
| let offset = cross.atoms.count | |
| application.atoms[offset + atomID] = atom | |
| } | |
| application.checkCrashBuffer(frameID: frameID) | |
| application.checkExecutionTime(frameID: frameID) | |
| application.updateBVH(inFlightFrameID: frameID % 3) | |
| application.forgetIdleState(inFlightFrameID: frameID % 3) | |
| // print() | |
| // analyzeGeneralCounters() | |
| } | |
| #endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment