Last active
September 14, 2016 21:19
-
-
Save jrprice/84e3f80cf0c851041d274a663d7ef73f to your computer and use it in GitHub Desktop.
Simple OpenCL backend for StreamExecutor for testing purposes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifdef __APPLE__ | |
#include <OpenCL/cl.h> | |
#else | |
#include <CL/cl.h> | |
#endif | |
#include <streamexecutor/StreamExecutor.h> | |
#include <vector> | |
#define CHECK_OCL(Op, Err) \ | |
if (Err != CL_SUCCESS) { \ | |
return streamexecutor::make_error("OpenCL error " + llvm::Twine(Err) + \ | |
" while " Op); \ | |
} | |
class OCLPlatformDevice : public streamexecutor::PlatformDevice { | |
public: | |
OCLPlatformDevice(cl_device_id Dev) : Device(Dev) { | |
// TODO: Check for errors here | |
Context = clCreateContext(NULL, 1, &Device, NULL, NULL, NULL); | |
SyncQueue = clCreateCommandQueue(Context, Device, 0, NULL); | |
} | |
~OCLPlatformDevice() { | |
clReleaseCommandQueue(SyncQueue); | |
clReleaseContext(Context); | |
} | |
private: | |
cl_context Context; | |
cl_device_id Device; | |
cl_command_queue SyncQueue; | |
std::string getName() const override { | |
char Name[256]; | |
clGetDeviceInfo(Device, CL_DEVICE_NAME, sizeof(Name), Name, NULL); | |
std::string Output = Name; | |
Output += " (OpenCL)"; | |
return Output; | |
} | |
std::string getPlatformName() const override { return "OpenCL"; } | |
streamexecutor::Expected<const void *> createStream() override { | |
cl_int Err; | |
cl_command_queue Queue = clCreateCommandQueue(Context, Device, 0, &Err); | |
CHECK_OCL("creating command-queue", Err); | |
return Queue; | |
} | |
streamexecutor::Error destroyStream(const void *Handle) override { | |
CHECK_OCL("destroying command-queue", | |
clReleaseCommandQueue((cl_command_queue)Handle)); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Expected<const void *> | |
createKernel(const streamexecutor::MultiKernelLoaderSpec &Spec) override { | |
cl_int Err; | |
const char *Source = Spec.getOpenCLTextInMemory().getText(); | |
// Create program | |
cl_program Program = | |
clCreateProgramWithSource(Context, 1, &Source, NULL, &Err); | |
CHECK_OCL("creating program", Err); | |
// Build program | |
Err = clBuildProgram(Program, 1, &Device, "", NULL, NULL); | |
if (Err == CL_BUILD_PROGRAM_FAILURE) { | |
char buildlog[4096]; | |
clGetProgramBuildInfo(Program, Device, CL_PROGRAM_BUILD_LOG, | |
sizeof(buildlog), buildlog, NULL); | |
return streamexecutor::make_error("OpenCL build failed:\n" + | |
llvm::Twine(buildlog)); | |
} | |
CHECK_OCL("building program", Err); | |
// Create kernel | |
cl_kernel Kernel = | |
clCreateKernel(Program, Spec.getKernelName().c_str(), &Err); | |
CHECK_OCL("creating kernel", Err); | |
return Kernel; | |
} | |
streamexecutor::Error destroyKernel(const void *Handle) override { | |
CHECK_OCL("destroying kernel", clReleaseKernel((cl_kernel)Handle)); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error blockHostUntilDone(const void *S) override { | |
cl_int Err = clFinish((cl_command_queue)S); | |
CHECK_OCL("waiting for queue", Err); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error | |
launch(const void *S, streamexecutor::BlockDimensions BlockSize, | |
streamexecutor::GridDimensions GridSize, const void *K, | |
const streamexecutor::PackedKernelArgumentArrayBase &ArgumentArray) | |
override { | |
cl_int Err; | |
cl_kernel Kernel = (cl_kernel)K; | |
// Set kernel arguments | |
for (size_t i = 0; i < ArgumentArray.getArgumentCount(); i++) { | |
const void *Arg = ArgumentArray.getAddress(i); | |
Err = clSetKernelArg(Kernel, i, ArgumentArray.getSize(i), Arg); | |
CHECK_OCL("setting kernel argument", Err); | |
} | |
// Calculate global work size | |
size_t Global[3] = {BlockSize.X * GridSize.X, BlockSize.Y * GridSize.Y, | |
BlockSize.Z * GridSize.Z}; | |
size_t Local[3] = {BlockSize.X, BlockSize.Y, BlockSize.Z}; | |
// Enqueue kernel | |
cl_command_queue Queue = (cl_command_queue)S; | |
Err = clEnqueueNDRangeKernel(Queue, Kernel, 3, NULL, Global, Local, 0, NULL, | |
NULL); | |
CHECK_OCL("enqueuing kernel", Err); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Expected<void *> | |
allocateDeviceMemory(size_t ByteCount) override { | |
cl_int Err; | |
cl_mem Memory = clCreateBuffer(Context, 0, ByteCount, NULL, &Err); | |
CHECK_OCL("creating buffer", Err); | |
return Memory; | |
} | |
streamexecutor::Error freeDeviceMemory(const void *Handle) override { | |
clReleaseMemObject((cl_mem)Handle); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error registerHostMemory(void *Memory, | |
size_t ByteCount) override { | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error unregisterHostMemory(const void *Memory) override { | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error copyD2H(const void *S, const void *DeviceHandleSrc, | |
size_t SrcByteOffset, void *HostDst, | |
size_t DstByteOffset, | |
size_t ByteCount) override { | |
cl_command_queue Queue = (cl_command_queue)S; | |
cl_int Err = clEnqueueReadBuffer( | |
Queue, (cl_mem)DeviceHandleSrc, CL_FALSE, SrcByteOffset, ByteCount, | |
(char *)HostDst + DstByteOffset, 0, NULL, NULL); | |
CHECK_OCL("reading buffer", Err); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error copyH2D(const void *S, const void *HostSrc, | |
size_t SrcByteOffset, | |
const void *DeviceHandleDst, | |
size_t DstByteOffset, | |
size_t ByteCount) override { | |
cl_command_queue Queue = (cl_command_queue)S; | |
cl_int Err = clEnqueueWriteBuffer( | |
Queue, (cl_mem)DeviceHandleDst, CL_FALSE, DstByteOffset, ByteCount, | |
(const char *)HostSrc + SrcByteOffset, 0, NULL, NULL); | |
CHECK_OCL("writing buffer", Err); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error copyD2D(const void *S, const void *DeviceHandleSrc, | |
size_t SrcByteOffset, | |
const void *DeviceHandleDst, | |
size_t DstByteOffset, | |
size_t ByteCount) override { | |
cl_command_queue Queue = (cl_command_queue)S; | |
cl_int Err = clEnqueueCopyBuffer(Queue, (cl_mem)DeviceHandleSrc, | |
(cl_mem)DeviceHandleDst, SrcByteOffset, | |
DstByteOffset, ByteCount, 0, NULL, NULL); | |
CHECK_OCL("enqueuing copy buffer", Err); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error synchronousCopyD2H(const void *DeviceHandleSrc, | |
size_t SrcByteOffset, void *HostDst, | |
size_t DstByteOffset, | |
size_t ByteCount) override { | |
cl_int Err = clEnqueueReadBuffer( | |
SyncQueue, (cl_mem)DeviceHandleSrc, CL_TRUE, SrcByteOffset, ByteCount, | |
(char *)HostDst + DstByteOffset, 0, NULL, NULL); | |
CHECK_OCL("reading buffer", Err); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error synchronousCopyH2D(const void *HostSrc, | |
size_t SrcByteOffset, | |
const void *DeviceHandleDst, | |
size_t DstByteOffset, | |
size_t ByteCount) override { | |
cl_int Err = clEnqueueWriteBuffer( | |
SyncQueue, (cl_mem)DeviceHandleDst, CL_TRUE, DstByteOffset, ByteCount, | |
(const char *)HostSrc + SrcByteOffset, 0, NULL, NULL); | |
CHECK_OCL("writing buffer", Err); | |
return streamexecutor::Error::success(); | |
} | |
streamexecutor::Error synchronousCopyD2D(const void *DeviceHandleSrc, | |
size_t SrcByteOffset, | |
const void *DeviceHandleDst, | |
size_t DstByteOffset, | |
size_t ByteCount) override { | |
cl_int Err = clEnqueueCopyBuffer(SyncQueue, (cl_mem)DeviceHandleSrc, | |
(cl_mem)DeviceHandleDst, SrcByteOffset, | |
DstByteOffset, ByteCount, 0, NULL, NULL); | |
CHECK_OCL("enqueuing copy buffer", Err); | |
CHECK_OCL("waiting for queue", clFinish(SyncQueue)); | |
return streamexecutor::Error::success(); | |
} | |
}; | |
class OCLPlatform : public streamexecutor::Platform { | |
public: | |
OCLPlatform() { | |
cl_uint NumPlatforms = 0; | |
cl_platform_id Platforms[8]; | |
clGetPlatformIDs(8, Platforms, &NumPlatforms); | |
for (cl_uint p = 0; p < NumPlatforms; p++) { | |
cl_uint NumDevices = 0; | |
cl_device_id DeviceIDs[8]; | |
clGetDeviceIDs(Platforms[p], CL_DEVICE_TYPE_ALL, 8, DeviceIDs, | |
&NumDevices); | |
for (cl_uint i = 0; i < NumDevices; i++) { | |
streamexecutor::PlatformDevice *PDevice = | |
new OCLPlatformDevice(DeviceIDs[i]); | |
Devices.push_back(streamexecutor::Device(PDevice)); | |
} | |
} | |
} | |
size_t getDeviceCount() const { return Devices.size(); } | |
streamexecutor::Expected<streamexecutor::Device> | |
getDevice(size_t DeviceIndex) { | |
if (DeviceIndex >= Devices.size()) | |
return streamexecutor::make_error("DeviceIndex out of range"); | |
return Devices[DeviceIndex]; | |
} | |
private: | |
std::vector<streamexecutor::Device> Devices; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment