Skip to content

Instantly share code, notes, and snippets.

@gidili
Last active December 17, 2015 02:29
Show Gist options
  • Save gidili/5536361 to your computer and use it in GitHub Desktop.
Save gidili/5536361 to your computer and use it in GitHub Desktop.
Using only implicit copying - works fine with CPU but not with GPU
public synchronized float[] solveWithDeviceMemory() {
// Allocate native (device) memory for the input data
CLBuffer<Float> bufIn = _context.createFloatBuffer(CLMem.Usage.Input, _input.length);
// Allocate native (device) memory for the output data
CLBuffer<Float> bufOut = _context.createFloatBuffer(CLMem.Usage.Output, _input.length);
// Copy input data directly to device memory
Pointer<Float> ptrIn = bufIn.map(_queue, CLMem.MapFlags.Write);
ptrIn.setFloats(_input);
bufIn.unmap(_queue, ptrIn);
// Setup the method arguments for the kernel
_testKernel.setArg(0, bufIn);
_testKernel.setArg(1, bufOut);
_testKernel.setArg(2, _input.length);
// Enqueue execution of the kernel
CLEvent completion = _testKernel.enqueueNDRange(_queue, new int[] { _input.length });
// wait for completion of the event
completion.waitFor();
// map output buffer so that we can read from it
Pointer<Float> ptrOut = bufOut.map(_queue, CLMem.MapFlags.Read);
float[] returnVal = ptrOut.getFloats();
bufOut.unmap(_queue, ptrOut);
return returnVal;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment