Created
May 16, 2022 19:44
-
-
Save eyalroz/8312f7d417d13ef00f192b38d2df097d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define __CL_ENABLE_EXCEPTIONS | |
#include <stdio.h> | |
#include <math.h> | |
#ifdef __APPLE__ | |
#include <OpenCL/cl.hpp> | |
#else | |
#include <CL/cl.hpp> | |
#endif | |
#include <cstdio> | |
#include <cstdlib> | |
#include <iostream> | |
#include <math.h> | |
// OpenCL kernel. Each work item takes care of one element of c | |
const char *kernelSource = "\n" \ | |
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n" \ | |
"__kernel void vecAdd( __global double *a, \n" \ | |
" __global double *b, \n" \ | |
" __global double *c, \n" \ | |
" const unsigned int n) \n" \ | |
"{ \n" \ | |
" //Get our global thread ID \n" \ | |
" int id = get_global_id(0); \n" \ | |
" \n" \ | |
" //Make sure we do not go out of bounds \n" \ | |
" if (id < n) \n" \ | |
" c[id] = a[id] + b[id]; \n" \ | |
"} \n" \ | |
"\n" ; | |
int main(int argc, char *argv[]) | |
{ | |
// Length of vectors | |
unsigned int n = 1000; | |
// Host input vectors | |
double *h_a; | |
double *h_b; | |
// Host output vector | |
double *h_c; | |
// Device input buffers | |
cl::Buffer d_a; | |
cl::Buffer d_b; | |
// Device output buffer | |
cl::Buffer d_c; | |
// Size, in bytes, of each vector | |
size_t bytes = n*sizeof(double); | |
// Allocate memory for each vector on host | |
h_a = new double[n]; | |
h_b = new double[n]; | |
h_c = new double[n]; | |
// Initialize vectors on host | |
for(int i = 0; i < n; i++ ) | |
{ | |
h_a[i] = sinf(i)*sinf(i); | |
h_b[i] = cosf(i)*cosf(i); | |
} | |
cl_int err = CL_SUCCESS; | |
try { | |
// Query platforms | |
std::vector<cl::Platform> platforms; | |
cl::Platform::get(&platforms); | |
if (platforms.size() == 0) { | |
std::cout << "Platform size 0\n"; | |
return -1; | |
} | |
// Get list of devices on default platform and create context | |
cl_context_properties properties[] = | |
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; | |
cl::Context context(CL_DEVICE_TYPE_GPU, properties); | |
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); | |
// Create command queue for first device | |
cl::CommandQueue queue(context, devices[0], 0, &err); | |
// Create device memory buffers | |
d_a = cl::Buffer(context, CL_MEM_READ_ONLY, bytes); | |
d_b = cl::Buffer(context, CL_MEM_READ_ONLY, bytes); | |
d_c = cl::Buffer(context, CL_MEM_WRITE_ONLY, bytes); | |
// Bind memory buffers | |
queue.enqueueWriteBuffer(d_a, CL_TRUE, 0, bytes, h_a); | |
queue.enqueueWriteBuffer(d_b, CL_TRUE, 0, bytes, h_b); | |
//Build kernel from source string | |
cl::Program::Sources source(1, | |
std::make_pair(kernelSource,strlen(kernelSource))); | |
cl::Program program_ = cl::Program(context, source); | |
program_.build(devices); | |
// Create kernel object | |
cl::Kernel kernel(program_, "vecAdd", &err); | |
// Bind kernel arguments to kernel | |
kernel.setArg(0, d_a); | |
kernel.setArg(1, d_b); | |
kernel.setArg(2, d_c); | |
kernel.setArg(3, n); | |
// Number of work items in each local work group | |
cl::NDRange localSize(64); | |
// Number of total work items - localSize must be devisor | |
cl::NDRange globalSize((int)(ceil(n/(float)64)*64)); | |
// Enqueue kernel | |
cl::Event event; | |
queue.enqueueNDRangeKernel( | |
kernel, | |
cl::NullRange, | |
globalSize, | |
localSize, | |
NULL, | |
&event); | |
// Block until kernel completion | |
event.wait(); | |
// Read back d_c | |
queue.enqueueReadBuffer(d_c, CL_TRUE, 0, bytes, h_c); | |
} | |
catch (cl::Error err) { | |
std::cerr | |
<< "ERROR: "<<err.what()<<"("<<err.err()<<")"<<std::endl; | |
} | |
// Sum up vector c and print result divided by n, this should equal 1 within error | |
double sum = 0; | |
for(int i=0; i<n; i++) | |
sum += h_c[i]; | |
std::cout<<"final result (divided by n): "<<sum/n<<std::endl; | |
// Release host memory | |
delete(h_a); | |
delete(h_b); | |
delete(h_c); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment