Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jhurliman/5536785 to your computer and use it in GitHub Desktop.
Save jhurliman/5536785 to your computer and use it in GitHub Desktop.
public synchronized float[] solveWithDeviceMemory() {
// Allocate native (device) memory for the input data
CLBuffer<Float> bufIn = _context.createFloatBuffer(CLMem.Usage.Input, _input.length);
// Allocate native (device) memory for the output data
CLBuffer<Float> bufOut = _context.createFloatBuffer(CLMem.Usage.Output, _input.length);
// Map input/output buffers for implicit copy
Pointer<Float> ptrIn = bufIn.map(_queue, CLMem.MapFlags.Write);
Pointer<Float> ptrOut = bufOut.map(_queue, CLMem.MapFlags.Read);
ptrIn.setFloats(_input);
// Setup the method arguments for the kernel
_testKernel.setArg(0, bufIn);
_testKernel.setArg(1, bufOut);
_testKernel.setArg(2, _input.length);
// Enqueue execution of the kernel
CLEvent completion = _testKernel.enqueueNDRange(_queue, new int[] { _input.length });
// wait for completion of the event
completion.waitFor();
// do an implicit copy of the device output buffer to host memory
float[] returnVal = ptrOut.getFloats();
bufIn.unmap(_queue, ptrIn);
bufOut.unmap(_queue, ptrIn);
return returnVal;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment