Skip to content

Instantly share code, notes, and snippets.

@simgt
Last active December 18, 2015 16:39
Show Gist options
  • Save simgt/5812728 to your computer and use it in GitHub Desktop.
Save simgt/5812728 to your computer and use it in GitHub Desktop.
Host-buffers to device transfer and mapping failure with an AMD GCN device
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <cassert>
#include <cstdlib>
#include <cstring>
#define STRINGIFY(s) #s
#define CL_CHECK(code) if (code != CL_SUCCESS) { \
std::cerr << "file '" __FILE__ \
<< "', line " << __LINE__ \
<< ": OpenCL error " << code << std::endl;\
exit(code); \
}
cl_program ocl_build(cl_context context, const std::string& src);
int main(int argc, char** argv) {
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " [num host buffers]" << std::endl;
std::cerr << "Each buffer has a size of 64 MB" << std::endl;
return 1;
}
cl_int error;
cl_context context;
cl_command_queue queue;
// platform
cl_uint num_platforms;
error = clGetPlatformIDs(0, 0, &num_platforms);
std::vector<cl_platform_id> platform_ids(num_platforms);
error = clGetPlatformIDs(num_platforms, platform_ids.data(), 0); CL_CHECK(error);
// device
cl_uint num_devices;
error = clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, 0, 0, &num_devices);
std::vector<cl_device_id> device_ids(num_devices);
error = clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, num_devices,
device_ids.data(), 0); CL_CHECK(error);
cl_device_id device = device_ids[0];
// context
context = clCreateContext(0, 1, &device, 0, 0, &error); CL_CHECK(error);
queue = clCreateCommandQueue(context,
device,
0,
&error); CL_CHECK(error);
// program
std::string src = STRINGIFY(
__attribute__((req_work_group_size(256, 1, 1)))
kernel void foo(global int* bar, uint n) {
const uint lid = get_local_id(0);
const uint block_size = get_local_size(0);
int sum = 0;
for (uint i = lid; i < n; i += block_size)
sum += bar[i];
bar[lid] = sum;
}
);
cl_program program = ocl_build(context, src);
cl_kernel kernel = clCreateKernel(program, "foo", &error); CL_CHECK(error);
// alloc buffers
const uint buffer_size = 1 << 24;
const uint num_host_buffers = std::atoi(argv[1]);
cl_mem host_buffers[num_host_buffers];
std::cout << "Host memory: " << num_host_buffers
<< " * " << buffer_size * sizeof(int) / (1 << 20)
<< " = " << num_host_buffers * buffer_size * sizeof(int) / (1 << 20) << "MB"
<< std::endl;
int* init = new int[buffer_size];
for (uint k = 0; k < buffer_size; k++)
init[k] = std::rand();
for (uint i = 0; i < num_host_buffers; i++) {
// buffer
host_buffers[i] = clCreateBuffer(
context,
CL_MEM_READ_ONLY
| CL_MEM_HOST_WRITE_ONLY
| CL_MEM_ALLOC_HOST_PTR
| CL_MEM_COPY_HOST_PTR,
buffer_size * sizeof(int),
(void*)init,
&error
); CL_CHECK(error)
}
delete[] init;
// alloc device buffers
cl_mem device_buffer = clCreateBuffer(
context,
CL_MEM_READ_ONLY
| CL_MEM_HOST_NO_ACCESS,
buffer_size * sizeof(int), 0, &error
); CL_CHECK(error);
// kernel execution
std::cout << "Running kernels... "; std::cout.flush();
const size_t global[] = { 256 };
const size_t local[] = { 256 };
error = clSetKernelArg(
kernel, 0, sizeof(cl_mem),
&device_buffer
); CL_CHECK(error);
error = clSetKernelArg(kernel, 1, sizeof(uint), &buffer_size); CL_CHECK(error);
for (uint i = 0; i < num_host_buffers; i++) {
clEnqueueCopyBuffer(
queue, host_buffers[i], device_buffer,
0, 0, buffer_size * sizeof(int),
0, 0, 0
);
error = clEnqueueNDRangeKernel(
queue, kernel,
1, 0, global, local,
0, 0, 0
); CL_CHECK(error);
}
clFinish(queue);
std::cout << "done" << std::endl;
std::cout << "Mapping host buffers... "; std::cout.flush();
for (uint i = 0; i < num_host_buffers; i++) {
int* mem = (int*)clEnqueueMapBuffer(
queue, host_buffers[i], true,
CL_MAP_WRITE_INVALIDATE_REGION,
0, buffer_size * sizeof(int),
0, 0, 0, &error
); // !! CL_MAP_FAILURE here !!
if (error != CL_SUCCESS)
break;
// ... do something with 'mem' here ...
error = clEnqueueUnmapMemObject(queue, host_buffers[i], mem, 0, 0, 0);
}
if (error == CL_SUCCESS)
std::cout << "done" << std::endl;
else
std::cout << "failed with code " << error << std::endl;
// release
clReleaseMemObject(device_buffer);
for (uint i = 0; i < num_host_buffers; i++)
clReleaseMemObject(host_buffers[i]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
cl_program ocl_build(cl_context context, const std::string& src) {
cl_int error;
const char* src_str = src.c_str();
cl_program program = clCreateProgramWithSource(context, 1, &src_str, 0, &error); CL_CHECK(error);
const char otps[] = "-cl-mad-enable -cl-no-signed-zeros";
error = clBuildProgram(program, 0, 0, otps, 0, 0);
if (error != CL_SUCCESS) {
size_t len;
char buffer[1 << 13];
std::cerr << "error (" << error << ") : failed to build program executable" << std::endl;
uint num_devices;
clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(uint), (void*)&num_devices, 0);
cl_device_id device_ids[num_devices];
clGetContextInfo(context, CL_CONTEXT_DEVICES, num_devices * sizeof(cl_device_id), device_ids, 0);
for (uint i = 0; i < num_devices; i++) {
clGetProgramBuildInfo(program, device_ids[i], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
std::cerr << buffer << std::endl << std::endl;
}
exit(error);
}
return program;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment