Skip to content

Instantly share code, notes, and snippets.

@PhDP
Created December 8, 2014 13:12
Show Gist options
  • Save PhDP/952cdc626d48650b9834 to your computer and use it in GitHub Desktop.
Save PhDP/952cdc626d48650b9834 to your computer and use it in GitHub Desktop.
Simple opencl exaple. It works on UNIX but fails on Windows with NVIDIA drivers for some reason.
// From Gaster et al.'s "Heterogeneous Computing with OpenCL".
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#if defined(__APPLE__) && defined(__MACH__)
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#endif
const char* programSource =
"__kernel void vecadd(__global int *A, __global int *B, __global int *C) {\n"
" int idx = get_global_id(0);\n"
" C[idx] = A[idx] + B[idx];\n"
"}";
int main(int argc, char **argv) {
#if defined(CL_VERSION_1_2)
printf("OpenCL version 1.2.\n");
#elif defined(CL_VERSION_1_1)
printf("OpenCL version 1.1.\n");
#endif
const int elements = 2048;
size_t datasize = sizeof(int) * elements;
int *a = (int*)malloc(datasize);
int *b = (int*)malloc(datasize);
int *c = (int*)malloc(datasize);
for (int i = 0; i < elements; ++i) {
a[i] = i;
b[i] = i + 1;
}
cl_uint numPlatforms = 0;
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
cl_platform_id *platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
cl_uint numDevices = 0;
cl_device_id *devices = NULL;
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);
cl_context context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status);
cl_command_queue cmdQueue = clCreateCommandQueue(context, devices[0], 0, &status);
cl_mem bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
cl_mem bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
cl_mem bufferC = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
status = clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, 0, datasize, a, 0, NULL, NULL);
status = clEnqueueWriteBuffer(cmdQueue, bufferB, CL_FALSE, 0, datasize, b, 0, NULL, NULL);
cl_program program = clCreateProgramWithSource(context, 1, (const char**)&programSource, NULL, &status);
status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "vecadd", &status);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA);
status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB);
status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC);
size_t globalWorkSize[1];
globalWorkSize[0] = elements;
status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, 0, datasize, c, 0, NULL, NULL);
bool result = true;
for (int i = 0; i < elements; i++) {
if (c[i] != a[i] + b[i]) {
result = false;
break;
}
}
if (result) {
printf("Output is correct\n");
}
else {
printf("Output is incorrect\n");
}
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(cmdQueue);
clReleaseMemObject(bufferA);
clReleaseMemObject(bufferB);
clReleaseMemObject(bufferC);
clReleaseContext(context);
free(a);
free(b);
free(c);
free(platforms);
free(devices);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment