Last active
May 30, 2020 19:35
-
-
Save noppoMan/8bc7dbdf3bb583ab4fd75a055e86fabd to your computer and use it in GitHub Desktop.
A Sample code for computing sigmoid function with Metal
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let input = (0...10000).map(Float.init) | |
print(try sigmoid_on_gpu(input)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Metal | |
import MetalPerformanceShaders | |
let device = MTLCreateSystemDefaultDevice()! | |
let commandQueue = device.makeCommandQueue()! | |
let defaultLibrary = device.makeDefaultLibrary()! // Loads Shaders.metal | |
// refere from: https://memkite.com/blog/2014/12/15/data-parallel-programming-with-metal-and-swift-for-iphoneipad-gpu/index.html | |
func sigmoid_on_gpu(_ input: [Float]) throws -> [Float] { | |
var input = input | |
let commandBuffer = commandQueue.makeCommandBuffer()! | |
let computeCommandEncoder = commandBuffer.makeComputeCommandEncoder()! | |
let sigmoidFunction = defaultLibrary.makeFunction(name: "sigmoid")! | |
let computePipelineFilter = try device.makeComputePipelineState(function: sigmoidFunction) | |
computeCommandEncoder.setComputePipelineState(computePipelineFilter) | |
let inputByteLength = input.count*MemoryLayout<Float>.size | |
let inVectorBuffer = device.makeBuffer(bytes: &input, length: inputByteLength, options: []) | |
computeCommandEncoder.setBuffer(inVectorBuffer, offset: 0, index: 0) | |
var resultdata = [Float].init(repeating: 0, count: input.count) | |
let outVectorBuffer = device.makeBuffer(bytes: &resultdata, length: inputByteLength, options: []) | |
computeCommandEncoder.setBuffer(outVectorBuffer, offset: 0, index: 1) | |
let threadsPerGroup = MTLSize(width:32,height:1,depth:1) | |
let numThreadgroups = MTLSize(width:(input.count+31)/32, height:1, depth:1) | |
computeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup) | |
computeCommandEncoder.endEncoding() | |
commandBuffer.commit() | |
commandBuffer.waitUntilCompleted() | |
let data = NSData(bytesNoCopy: outVectorBuffer!.contents(), length: input.count*MemoryLayout<Float>.size, freeWhenDone: false) | |
var output = [Float].init(repeating: 0, count: input.count) | |
data.getBytes(&output, length:input.count * MemoryLayout<Float>.size) | |
return output | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment