Created
June 17, 2014 15:27
-
-
Save gicmo/192d23002df59420d22a to your computer and use it in GitHub Desktop.
Sample program to expose a big in exp() on OSX 10.9.3 + FirePro D300
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyleft (c) 2014 Christian Kellner | |
// | |
// Usage: ./clexptest [cast_to_float] [device_index] | |
// cast_to_float: cast argument to exp() to float | |
// device_index: index of the list of devices to use for testing | |
// | |
// Compile: | |
// clang -std=c++11 -Wall -o clexptest -framework OpenCL -lc++ clexptest.cpp | |
// | |
// LICENSE: BSD-3 clause | |
#include <cassert> | |
#include <vector> | |
#include <iostream> | |
#include <OpenCL/OpenCL.h> | |
#include <cmath> | |
static const char *kernel_source = " \ | |
#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n \ | |
kernel void exp_test(__global double *data_in, __global double *data_out) \n \ | |
{ \n \ | |
size_t i = get_global_id(0); \n \ | |
#ifdef CAST_TO_FLOAT \n \ | |
data_out[i] = exp((float) data_in[i]); \n \ | |
#else \n \ | |
data_out[i] = exp(data_in[i]); \n \ | |
#endif \n \ | |
} \n \ | |
"; | |
double dx[] = {1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7}; | |
int main(int argc, const char * argv[]) { | |
const size_t N = sizeof(dx) / sizeof(double); | |
bool cast_to_float = false; | |
cl_context context = nullptr; | |
cl_command_queue cmdq = nullptr; | |
cl_program program = nullptr; | |
cl_kernel kernel = nullptr; | |
cl_uint num_devices = 0; | |
cl_int err; | |
err = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, 0, | |
nullptr, &num_devices); | |
assert(err == CL_SUCCESS); | |
std::vector<cl_device_id> devices(num_devices); | |
err = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, num_devices, | |
devices.data(), nullptr); | |
assert(err == CL_SUCCESS); | |
std::cout << "Found " << devices.size() << " devices" << std::endl; | |
for (const auto dev : devices) { | |
std::cout << dev << std::endl; | |
} | |
assert(devices.size() > 0); | |
//on my machine the offline GPU is the last one on the array | |
//this also seems to be quite stable | |
size_t device_to_use = devices.size() - 1; | |
if (argc > 1) { | |
cast_to_float = atoi(argv[1]); | |
if (argc > 2) { | |
device_to_use = atoi(argv[2]); | |
} | |
} | |
cl_device_id device = devices[device_to_use]; | |
std::cout << "Using device: " << device << std::endl; | |
context = clCreateContext(nullptr, | |
1, | |
&device, | |
nullptr, | |
nullptr, | |
&err); | |
assert(err == CL_SUCCESS); | |
program = clCreateProgramWithSource(context, | |
1, | |
&kernel_source, | |
nullptr, | |
&err); | |
assert(err == CL_SUCCESS); | |
const char *cast_opt = "-DCAST_TO_FLOAT"; | |
const char *options = cast_to_float ? cast_opt : nullptr; | |
std::cout << "Casting to float: " << cast_to_float << " "; | |
std::cout << (options ? options : "") << std::endl; | |
err = clBuildProgram(program, 1, &device, options, nullptr, nullptr); | |
assert(err == CL_SUCCESS); | |
kernel = clCreateKernel(program, "exp_test", &err); | |
assert(err == CL_SUCCESS); | |
cmdq = clCreateCommandQueue(context, device, 0, &err); | |
assert(err == CL_SUCCESS); | |
cl_mem m_in = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(double) * N, NULL, &err); | |
cl_mem m_out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(double) * N, NULL, &err); | |
err = clEnqueueWriteBuffer(cmdq, | |
m_in, | |
CL_TRUE, | |
0, | |
sizeof(double) * 8, | |
dx, | |
0, NULL, NULL); | |
assert(err == CL_SUCCESS); | |
cl_int res; | |
res = clSetKernelArg(kernel, 0, sizeof(m_in), &m_in); | |
res |= clSetKernelArg(kernel, 1, sizeof(m_out), &m_out); | |
assert(err == CL_SUCCESS); | |
std::cout << "launching kernel (N = " << N << ")" << std::endl; | |
std::vector<size_t> global = {N}; | |
err = clEnqueueNDRangeKernel(cmdq, | |
kernel, | |
static_cast<cl_uint>(global.size()), | |
nullptr, | |
global.data(), | |
nullptr, | |
0, nullptr, nullptr); | |
clFinish(cmdq); | |
std::vector<double> cl_data(N); | |
err = clEnqueueReadBuffer(cmdq, | |
m_out, | |
CL_TRUE, | |
0, | |
sizeof(double) * cl_data.size(), | |
cl_data.data(), | |
0, NULL, NULL ); | |
assert(err == CL_SUCCESS); | |
for (int i = 0; i < N; i++) { | |
const double y = exp(dx[i]); | |
std::cout << "e^" << dx[i] << " = "; | |
std::cout << y << "\t(host) " << cl_data[i] << "\t(cl) [(host - cl) "; | |
std::cout << y - cl_data[i]; | |
std::cout << "]" << std::endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment