Skip to content

Instantly share code, notes, and snippets.

@cjxgm
Created September 24, 2015 16:34
Show Gist options
  • Save cjxgm/b6377aff89bfc48527d9 to your computer and use it in GitHub Desktop.
Save cjxgm/b6377aff89bfc48527d9 to your computer and use it in GitHub Desktop.
opencl helloworld
// The official OpenCL C++ wrapper is just a rubbish.
// It doesn't mean mine is good.
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <CL/cl.h> // ml:lib += OpenCL
#include <type_traits>
#include <stdexcept>
#include <memory>
#include <string>
#include <vector>
#include <array>
namespace cl
{
inline namespace exception_
{
struct fatal : std::logic_error { using logic_error::logic_error; };
inline bool die(std::string func, std::string const& info="")
{
func = "\e[1;31mfatal: \e[1;35m" + func + "\e[0m\n";
if (info != "") func += " \e[0;34m" + info + "\e[0m\n";
throw fatal{func};
}
#define OR_DIE(INFO...) == CL_SUCCESS || die(__PRETTY_FUNCTION__, #INFO)
}
inline namespace type_
{
using uint = cl_uint;
using sint = cl_int;
using platform_id = cl_platform_id;
using device_id = cl_device_id;
inline namespace resource_
{
template <class POINTER>
using resource = std::unique_ptr<
std::remove_pointer_t<POINTER>,
sint (*)(POINTER)>;
using context = resource<cl_context>;
using mem = resource<cl_mem>;
using program = resource<cl_program>;
using kernel = resource<cl_kernel>;
using queue = resource<cl_command_queue>;
}
}
inline namespace platform_
{
inline auto nplatform()
{
uint n;
clGetPlatformIDs(0, nullptr, &n) OR_DIE(get-platform-ids);
return n;
}
inline auto platforms()
{
std::vector<platform_id> ps(nplatform());
clGetPlatformIDs(ps.size(), ps.data(), nullptr) OR_DIE(get-platform-ids);
return ps;
}
inline auto platform_info_size_(platform_id p, cl_platform_info name)
{
size_t n;
clGetPlatformInfo(p, name, 0, nullptr, &n) OR_DIE(get-platform-info);
return n;
}
inline auto platform_info_(platform_id p, cl_platform_info name)
{
std::string result(platform_info_size_(p, name), '\0');
clGetPlatformInfo(p, name, result.size(), &result[0], nullptr) OR_DIE(get-platform-info);
return result;
}
struct platform_info
{
std::string profile;
std::string version;
std::string name;
std::string vendor;
std::string extensions;
platform_info(platform_id p)
: profile{platform_info_(p, CL_PLATFORM_PROFILE)}
, version{platform_info_(p, CL_PLATFORM_VERSION)}
, name {platform_info_(p, CL_PLATFORM_NAME )}
, vendor {platform_info_(p, CL_PLATFORM_VENDOR )}
, extensions{platform_info_(p, CL_PLATFORM_EXTENSIONS)}
{}
};
}
inline namespace device_
{
inline auto ndevice(platform_id p)
{
uint n;
clGetDeviceIDs(p, CL_DEVICE_TYPE_ALL, 0, nullptr, &n) OR_DIE(get-device-ids);
return n;
}
inline auto devices(platform_id p)
{
std::vector<device_id> ds(ndevice(p));
clGetDeviceIDs(p, CL_DEVICE_TYPE_ALL, ds.size(), ds.data(), nullptr) OR_DIE(get-device-ids);
return ds;
}
}
inline namespace context_
{
inline auto make_context(platform_id p, std::vector<device_id> const& ds)
{
cl_context_properties props[] {
CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(p),
0,
};
sint success;
auto ctx = clCreateContext(props,
ds.size(), ds.data(),
nullptr, nullptr,
&success);
success OR_DIE(create-context);
return context{ctx, &clReleaseContext};
}
}
inline namespace buffer_
{
inline auto make_buffer(context const& ctx, cl_mem_flags flags, size_t size, void * ptr = {})
{
sint success;
auto buf = clCreateBuffer(ctx.get(),
flags, size, ptr,
&success);
success OR_DIE(create-buffer);
return mem{buf, &clReleaseMemObject};
}
}
inline namespace program_
{
inline auto make_program(context const& ctx, std::string const& src)
{
sint success;
auto src_data = src.data();
auto src_size = src.size();
auto prog = clCreateProgramWithSource(ctx.get(),
1, &src_data, &src_size,
&success);
success OR_DIE(create-program-with-source);
return program{prog, &clReleaseProgram};
}
inline auto build_program(program const& prog,
std::vector<device_id> const& ds,
std::string const& options = {})
{
clBuildProgram(prog.get(),
ds.size(), ds.data(),
options.data(),
nullptr, nullptr) OR_DIE(build-program);
}
inline auto make_program(context const& ctx,
std::vector<device_id> const& ds,
std::string const& src,
std::string const& options = {})
{
auto prog = make_program(ctx, src);
build_program(prog, ds, options);
return prog;
}
}
inline namespace kernel_
{
inline auto make_kernel(program const& prog, std::string const& name)
{
sint success;
auto kern = clCreateKernel(prog.get(), name.data(), &success);
success OR_DIE(create-kernel);
return kernel{kern, &clReleaseKernel};
}
inline auto kernel_arg(kernel const& kern, uint i, size_t size, void const* arg)
{
clSetKernelArg(kern.get(), i, size, arg) OR_DIE(set-kernel-arg);
}
template <class T>
inline auto kernel_arg(kernel const& kern, uint i, T const& arg)
{
kernel_arg(kern, i, sizeof(T), reinterpret_cast<void const*>(&arg));
}
template <class T>
inline auto kernel_arg(kernel const& kern, uint i, resource<T> const& arg)
{
auto ptr = arg.get();
kernel_arg(kern, i, sizeof(T), reinterpret_cast<void const*>(&ptr));
}
}
inline namespace queue_
{
inline auto make_queue(context const& ctx, device_id d,
cl_command_queue_properties props = {})
{
sint success;
auto q = clCreateCommandQueue(ctx.get(), d, props, &success);
success OR_DIE(create-command-queue);
return queue{q, &clReleaseCommandQueue};
}
template <uint DIMENSION>
inline auto enqueue_kernel(queue const& q, kernel const& kern,
std::array<size_t, DIMENSION> const& size,
std::array<size_t, DIMENSION> const& offset = {})
{
clEnqueueNDRangeKernel(q.get(), kern.get(),
DIMENSION, &offset[0], &size[0],
nullptr, 0, nullptr, nullptr) OR_DIE(enqueue-nd-range-kernel);
}
inline auto finish(queue const& q)
{
clFinish(q.get()) OR_DIE(finish);
}
inline auto enqueue_sync_buffer(queue const& q, mem const& buf,
cl_map_flags flags, size_t size, size_t offset = 0)
{
sint success;
auto m = clEnqueueMapBuffer(q.get(), buf.get(),
true, flags, offset, size,
0, nullptr, nullptr,
&success);
success OR_DIE(enqueue-map-buffer);
clEnqueueUnmapMemObject(q.get(), buf.get(), m,
0, nullptr, nullptr) OR_DIE(enqueue-unmap-mem-object);
}
}
#undef OR_DIE
}
#include "cl.hh"
#include <iostream>
namespace
{
#define ARRAY_LENGTH(A) sizeof(A) / sizeof((A)[0])
auto test(cl::platform_id p, cl::device_id d)
{
auto ctx = cl::make_context(p, {d});
alignas(sizeof(int)) int raw_buf[32]{};
auto buf = cl::make_buffer(ctx,
CL_MEM_WRITE_ONLY |
CL_MEM_USE_HOST_PTR |
CL_MEM_HOST_READ_ONLY,
sizeof(raw_buf), raw_buf);
auto prog = cl::make_program(ctx, {d}, R"kernel(
__kernel void fill(__global int * dst)
{
int i = get_global_id(0);
dst[i] = i + 1;
}
)kernel");
auto kern = cl::make_kernel(prog, "fill");
cl::kernel_arg(kern, 0, buf);
auto q = cl::make_queue(ctx, d);
cl::enqueue_kernel<1>(q, kern, {{ ARRAY_LENGTH(raw_buf) }});
cl::enqueue_sync_buffer(q, buf, CL_MAP_READ, sizeof(raw_buf));
cl::finish(q);
std::cerr << " >>";
for (auto x: raw_buf) std::cerr << " " << x;
std::cerr << "\n";
}
#undef ARRAY_LENGTH
}
int main()
{
std::cerr << "platforms:\n";
auto ps = cl::platforms();
for (auto p: ps) {
cl::platform_info info{p};
auto ds = cl::devices(p);
std::cerr << " " << info.name << " :: " << info.version << "\n";
for (auto d: ds) std::cerr << " - device " << d << "\n";
test(p, ds[0]);
}
}
$ ml -t hello.cc
dirty: cl.hh
dirty: hello.cc
compiling hello.cc...
dirty binary: hello /tmp/build/hello.o
linking hello from /tmp/build/hello.o...
running hello...
platforms:
Intel Gen OCL Driver :: OpenCL 1.2 beignet 1.2 (git-f9094e5)
- device 0x7f4a5a5ca800
>> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
NVIDIA CUDA :: OpenCL 1.2 CUDA 7.5.18
- device 0x1d91c60
>> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
Intel(R) OpenCL :: OpenCL 1.2 LINUX
- device 0x1d97f88
>> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
program hello returned [0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment