Created
December 8, 2014 13:12
-
-
Save PhDP/952cdc626d48650b9834 to your computer and use it in GitHub Desktop.
Simple opencl exaple. It works on UNIX but fails on Windows with NVIDIA drivers for some reason.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// From Gaster et al.'s "Heterogeneous Computing with OpenCL". | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#if defined(__APPLE__) && defined(__MACH__) | |
#include <OpenCL/OpenCL.h> | |
#else | |
#include <CL/cl.h> | |
#endif | |
const char* programSource = | |
"__kernel void vecadd(__global int *A, __global int *B, __global int *C) {\n" | |
" int idx = get_global_id(0);\n" | |
" C[idx] = A[idx] + B[idx];\n" | |
"}"; | |
int main(int argc, char **argv) { | |
#if defined(CL_VERSION_1_2) | |
printf("OpenCL version 1.2.\n"); | |
#elif defined(CL_VERSION_1_1) | |
printf("OpenCL version 1.1.\n"); | |
#endif | |
const int elements = 2048; | |
size_t datasize = sizeof(int) * elements; | |
int *a = (int*)malloc(datasize); | |
int *b = (int*)malloc(datasize); | |
int *c = (int*)malloc(datasize); | |
for (int i = 0; i < elements; ++i) { | |
a[i] = i; | |
b[i] = i + 1; | |
} | |
cl_uint numPlatforms = 0; | |
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); | |
cl_platform_id *platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id)); | |
status = clGetPlatformIDs(numPlatforms, platforms, NULL); | |
cl_uint numDevices = 0; | |
cl_device_id *devices = NULL; | |
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); | |
devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id)); | |
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL); | |
cl_context context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status); | |
cl_command_queue cmdQueue = clCreateCommandQueue(context, devices[0], 0, &status); | |
cl_mem bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status); | |
cl_mem bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status); | |
cl_mem bufferC = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status); | |
status = clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, 0, datasize, a, 0, NULL, NULL); | |
status = clEnqueueWriteBuffer(cmdQueue, bufferB, CL_FALSE, 0, datasize, b, 0, NULL, NULL); | |
cl_program program = clCreateProgramWithSource(context, 1, (const char**)&programSource, NULL, &status); | |
status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL); | |
cl_kernel kernel = clCreateKernel(program, "vecadd", &status); | |
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA); | |
status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB); | |
status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC); | |
size_t globalWorkSize[1]; | |
globalWorkSize[0] = elements; | |
status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL); | |
clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, 0, datasize, c, 0, NULL, NULL); | |
bool result = true; | |
for (int i = 0; i < elements; i++) { | |
if (c[i] != a[i] + b[i]) { | |
result = false; | |
break; | |
} | |
} | |
if (result) { | |
printf("Output is correct\n"); | |
} | |
else { | |
printf("Output is incorrect\n"); | |
} | |
clReleaseKernel(kernel); | |
clReleaseProgram(program); | |
clReleaseCommandQueue(cmdQueue); | |
clReleaseMemObject(bufferA); | |
clReleaseMemObject(bufferB); | |
clReleaseMemObject(bufferC); | |
clReleaseContext(context); | |
free(a); | |
free(b); | |
free(c); | |
free(platforms); | |
free(devices); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment