Last active
December 18, 2015 16:39
-
-
Save simgt/5812728 to your computer and use it in GitHub Desktop.
Host-buffers to device transfer and mapping failure with an AMD GCN device
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <CL/cl.h> | |
#include <iostream> | |
#include <vector> | |
#include <cassert> | |
#include <cstdlib> | |
#include <cstring> | |
#define STRINGIFY(s) #s | |
#define CL_CHECK(code) if (code != CL_SUCCESS) { \ | |
std::cerr << "file '" __FILE__ \ | |
<< "', line " << __LINE__ \ | |
<< ": OpenCL error " << code << std::endl;\ | |
exit(code); \ | |
} | |
cl_program ocl_build(cl_context context, const std::string& src); | |
int main(int argc, char** argv) { | |
if (argc < 2) { | |
std::cerr << "Usage: " << argv[0] << " [num host buffers]" << std::endl; | |
std::cerr << "Each buffer has a size of 64 MB" << std::endl; | |
return 1; | |
} | |
cl_int error; | |
cl_context context; | |
cl_command_queue queue; | |
// platform | |
cl_uint num_platforms; | |
error = clGetPlatformIDs(0, 0, &num_platforms); | |
std::vector<cl_platform_id> platform_ids(num_platforms); | |
error = clGetPlatformIDs(num_platforms, platform_ids.data(), 0); CL_CHECK(error); | |
// device | |
cl_uint num_devices; | |
error = clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, 0, 0, &num_devices); | |
std::vector<cl_device_id> device_ids(num_devices); | |
error = clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, num_devices, | |
device_ids.data(), 0); CL_CHECK(error); | |
cl_device_id device = device_ids[0]; | |
// context | |
context = clCreateContext(0, 1, &device, 0, 0, &error); CL_CHECK(error); | |
queue = clCreateCommandQueue(context, | |
device, | |
0, | |
&error); CL_CHECK(error); | |
// program | |
std::string src = STRINGIFY( | |
__attribute__((req_work_group_size(256, 1, 1))) | |
kernel void foo(global int* bar, uint n) { | |
const uint lid = get_local_id(0); | |
const uint block_size = get_local_size(0); | |
int sum = 0; | |
for (uint i = lid; i < n; i += block_size) | |
sum += bar[i]; | |
bar[lid] = sum; | |
} | |
); | |
cl_program program = ocl_build(context, src); | |
cl_kernel kernel = clCreateKernel(program, "foo", &error); CL_CHECK(error); | |
// alloc buffers | |
const uint buffer_size = 1 << 24; | |
const uint num_host_buffers = std::atoi(argv[1]); | |
cl_mem host_buffers[num_host_buffers]; | |
std::cout << "Host memory: " << num_host_buffers | |
<< " * " << buffer_size * sizeof(int) / (1 << 20) | |
<< " = " << num_host_buffers * buffer_size * sizeof(int) / (1 << 20) << "MB" | |
<< std::endl; | |
int* init = new int[buffer_size]; | |
for (uint k = 0; k < buffer_size; k++) | |
init[k] = std::rand(); | |
for (uint i = 0; i < num_host_buffers; i++) { | |
// buffer | |
host_buffers[i] = clCreateBuffer( | |
context, | |
CL_MEM_READ_ONLY | |
| CL_MEM_HOST_WRITE_ONLY | |
| CL_MEM_ALLOC_HOST_PTR | |
| CL_MEM_COPY_HOST_PTR, | |
buffer_size * sizeof(int), | |
(void*)init, | |
&error | |
); CL_CHECK(error) | |
} | |
delete[] init; | |
// alloc device buffers | |
cl_mem device_buffer = clCreateBuffer( | |
context, | |
CL_MEM_READ_ONLY | |
| CL_MEM_HOST_NO_ACCESS, | |
buffer_size * sizeof(int), 0, &error | |
); CL_CHECK(error); | |
// kernel execution | |
std::cout << "Running kernels... "; std::cout.flush(); | |
const size_t global[] = { 256 }; | |
const size_t local[] = { 256 }; | |
error = clSetKernelArg( | |
kernel, 0, sizeof(cl_mem), | |
&device_buffer | |
); CL_CHECK(error); | |
error = clSetKernelArg(kernel, 1, sizeof(uint), &buffer_size); CL_CHECK(error); | |
for (uint i = 0; i < num_host_buffers; i++) { | |
clEnqueueCopyBuffer( | |
queue, host_buffers[i], device_buffer, | |
0, 0, buffer_size * sizeof(int), | |
0, 0, 0 | |
); | |
error = clEnqueueNDRangeKernel( | |
queue, kernel, | |
1, 0, global, local, | |
0, 0, 0 | |
); CL_CHECK(error); | |
} | |
clFinish(queue); | |
std::cout << "done" << std::endl; | |
std::cout << "Mapping host buffers... "; std::cout.flush(); | |
for (uint i = 0; i < num_host_buffers; i++) { | |
int* mem = (int*)clEnqueueMapBuffer( | |
queue, host_buffers[i], true, | |
CL_MAP_WRITE_INVALIDATE_REGION, | |
0, buffer_size * sizeof(int), | |
0, 0, 0, &error | |
); // !! CL_MAP_FAILURE here !! | |
if (error != CL_SUCCESS) | |
break; | |
// ... do something with 'mem' here ... | |
error = clEnqueueUnmapMemObject(queue, host_buffers[i], mem, 0, 0, 0); | |
} | |
if (error == CL_SUCCESS) | |
std::cout << "done" << std::endl; | |
else | |
std::cout << "failed with code " << error << std::endl; | |
// release | |
clReleaseMemObject(device_buffer); | |
for (uint i = 0; i < num_host_buffers; i++) | |
clReleaseMemObject(host_buffers[i]); | |
clReleaseKernel(kernel); | |
clReleaseProgram(program); | |
clReleaseCommandQueue(queue); | |
clReleaseContext(context); | |
return 0; | |
} | |
cl_program ocl_build(cl_context context, const std::string& src) { | |
cl_int error; | |
const char* src_str = src.c_str(); | |
cl_program program = clCreateProgramWithSource(context, 1, &src_str, 0, &error); CL_CHECK(error); | |
const char otps[] = "-cl-mad-enable -cl-no-signed-zeros"; | |
error = clBuildProgram(program, 0, 0, otps, 0, 0); | |
if (error != CL_SUCCESS) { | |
size_t len; | |
char buffer[1 << 13]; | |
std::cerr << "error (" << error << ") : failed to build program executable" << std::endl; | |
uint num_devices; | |
clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(uint), (void*)&num_devices, 0); | |
cl_device_id device_ids[num_devices]; | |
clGetContextInfo(context, CL_CONTEXT_DEVICES, num_devices * sizeof(cl_device_id), device_ids, 0); | |
for (uint i = 0; i < num_devices; i++) { | |
clGetProgramBuildInfo(program, device_ids[i], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); | |
std::cerr << buffer << std::endl << std::endl; | |
} | |
exit(error); | |
} | |
return program; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment