Created
December 6, 2025 07:02
-
-
Save sithumonline/93d7b0cf9b013f632de29bd4d019fb88 to your computer and use it in GitHub Desktop.
GPU vs CPU: When Metal Obliterates Go in Parallel Workloads
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Compile the Metal shader | |
| xcrun -sdk macosx metal -c Shaders.metal -o Shaders.air | |
| xcrun -sdk macosx metallib Shaders.air -o default.metallib | |
| # Compile and run Swift | |
| swiftc main.swift -o metal_fib -framework Metal -framework Foundation | |
| ./metal_fib | |
| go build -ldflags="-s -w" -o mprocessing-go main.go |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "log" | |
| "sync" | |
| "time" | |
| ) | |
| func simulateWork(n int) int { | |
| if n <= 1 { | |
| return n | |
| } | |
| var a, b int = 0, 1 | |
| for i := 2; i <= n; i++ { | |
| a, b = b, a+b | |
| } | |
| return b | |
| } | |
| // worker processes jobs from its dedicated jobs channel, does some work, | |
| // then sends the result on the results channel. | |
| func worker(id int, jobs <-chan int, results chan<- int, wg *sync.WaitGroup) { | |
| defer wg.Done() | |
| log.Printf("Worker %d: starting\n", id) | |
| for j := range jobs { | |
| log.Printf("Worker %d: started job %d\n", id, j) | |
| result := simulateWork(45) // Example: Calculate the 45th Fibonacci number | |
| log.Printf("Worker %d: finished job %d with result %d\n", id, j, result) | |
| results <- result | |
| } | |
| } | |
| func main() { | |
| start := time.Now() // Start timer | |
| const numJobs = 1_000_000 // 5 * 100 | |
| const numWorkers = 3 * 100 | |
| // Create a dedicated jobs channel for each worker | |
| workerJobs := make([]chan int, numWorkers) | |
| for i := range workerJobs { | |
| workerJobs[i] = make(chan int, numJobs) | |
| } | |
| results := make(chan int, numJobs) | |
| var wg sync.WaitGroup | |
| // Start the worker goroutines | |
| for w := 1; w <= numWorkers; w++ { | |
| wg.Add(1) | |
| go worker(w, workerJobs[w-1], results, &wg) | |
| } | |
| log.Println("Main: All workers started") | |
| // Send jobs to specific workers | |
| for j := 1; j <= numJobs; j++ { | |
| workerID := (j - 1) % numWorkers // Assign jobs in a round-robin manner | |
| workerJobs[workerID] <- j | |
| log.Printf("Main: sent job %d to worker %d\n", j, workerID+1) | |
| } | |
| // Close all worker job channels | |
| for _, ch := range workerJobs { | |
| close(ch) | |
| } | |
| // Wait for all workers to finish, then close results | |
| wg.Wait() | |
| close(results) | |
| // Collect and print results | |
| for res := range results { | |
| log.Printf("Result received: %d\n", res) | |
| } | |
| elapsed := time.Since(start) // End timer | |
| log.Printf("Execution time: %s\n", elapsed) | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import Metal | |
| import Foundation | |
| func main() { | |
| let start = CFAbsoluteTimeGetCurrent() | |
| // Configuration | |
| let numJobs = 1_000_000 // 500 | |
| let fibNumber: Int32 = 45 | |
| // Get Metal device | |
| guard let device = MTLCreateSystemDefaultDevice() else { | |
| fatalError("Metal is not supported on this device") | |
| } | |
| print("Using device: \(device.name)") | |
| // Load the shader library | |
| guard let library = device.makeDefaultLibrary() else { | |
| fatalError("Failed to load Metal library") | |
| } | |
| guard let function = library.makeFunction(name: "fibonacci_kernel") else { | |
| fatalError("Failed to find fibonacci_kernel function") | |
| } | |
| // Create compute pipeline | |
| let pipeline: MTLComputePipelineState | |
| do { | |
| pipeline = try device.makeComputePipelineState(function: function) | |
| } catch { | |
| fatalError("Failed to create pipeline: \(error)") | |
| } | |
| // Create command queue | |
| guard let commandQueue = device.makeCommandQueue() else { | |
| fatalError("Failed to create command queue") | |
| } | |
| // Prepare input data (all jobs compute the same Fibonacci number) | |
| var jobs = [Int32](repeating: fibNumber, count: numJobs) | |
| // Create buffers | |
| guard let jobsBuffer = device.makeBuffer( | |
| bytes: &jobs, | |
| length: MemoryLayout<Int32>.stride * numJobs, | |
| options: .storageModeShared | |
| ) else { | |
| fatalError("Failed to create jobs buffer") | |
| } | |
| guard let resultsBuffer = device.makeBuffer( | |
| length: MemoryLayout<UInt64>.stride * numJobs, | |
| options: .storageModeShared | |
| ) else { | |
| fatalError("Failed to create results buffer") | |
| } | |
| // Create command buffer and encoder | |
| guard let commandBuffer = commandQueue.makeCommandBuffer(), | |
| let encoder = commandBuffer.makeComputeCommandEncoder() else { | |
| fatalError("Failed to create command buffer/encoder") | |
| } | |
| encoder.setComputePipelineState(pipeline) | |
| encoder.setBuffer(jobsBuffer, offset: 0, index: 0) | |
| encoder.setBuffer(resultsBuffer, offset: 0, index: 1) | |
| // Configure thread groups | |
| let threadsPerGrid = MTLSize(width: numJobs, height: 1, depth: 1) | |
| let maxThreadsPerGroup = pipeline.maxTotalThreadsPerThreadgroup | |
| print("Max threads per group: \(maxThreadsPerGroup)") | |
| let threadsPerGroup = MTLSize(width: min(numJobs, maxThreadsPerGroup), height: 1, depth: 1) | |
| encoder.dispatchThreads(threadsPerGrid, threadsPerThreadgroup: threadsPerGroup) | |
| encoder.endEncoding() | |
| // Execute and wait | |
| commandBuffer.commit() | |
| commandBuffer.waitUntilCompleted() | |
| // Read results | |
| let resultsPointer = resultsBuffer.contents().bindMemory(to: UInt64.self, capacity: numJobs) | |
| let results = Array(UnsafeBufferPointer(start: resultsPointer, count: numJobs)) | |
| // Print some results | |
| print("First 5 results:") | |
| for i in 0..<min(5, numJobs) { | |
| print(" Job \(i): fib(\(fibNumber)) = \(results[i])") | |
| } | |
| let elapsed = CFAbsoluteTimeGetCurrent() - start | |
| print("Total jobs: \(numJobs)") | |
| print("Execution time: \(String(format: "%.4f", elapsed))s") | |
| } | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "results": [ | |
| { | |
| "command": "./metal_fib", | |
| "mean": 0.12367221734, | |
| "stddev": 0.16942545033319764, | |
| "median": 0.06980722564, | |
| "user": 0.00964524, | |
| "system": 0.01644698, | |
| "min": 0.06056543414, | |
| "max": 0.60559114214, | |
| "times": [ | |
| 0.60559114214, | |
| 0.07662976814000001, | |
| 0.07422935114000001, | |
| 0.06877580914, | |
| 0.07083864214, | |
| 0.06056543414, | |
| 0.06554097514, | |
| 0.08020380914000001, | |
| 0.06646126714, | |
| 0.06788597514 | |
| ], | |
| "exit_codes": [ | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0 | |
| ] | |
| }, | |
| { | |
| "command": "./mprocessing-go", | |
| "mean": 3.58517456314, | |
| "stddev": 0.11821375404667352, | |
| "median": 3.56436251714, | |
| "user": 3.55224154, | |
| "system": 2.4159574800000003, | |
| "min": 3.46224505914, | |
| "max": 3.81418480914, | |
| "times": [ | |
| 3.81418480914, | |
| 3.46932664214, | |
| 3.46224505914, | |
| 3.57611510014, | |
| 3.72334918414, | |
| 3.64247955914, | |
| 3.63194905914, | |
| 3.55260993414, | |
| 3.46886797514, | |
| 3.51061830914 | |
| ], | |
| "exit_codes": [ | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0 | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment