Skip to content

Instantly share code, notes, and snippets.

@tanayseven
Created November 21, 2015 16:07
Show Gist options
  • Save tanayseven/be04c7e0d1d1e95cc736 to your computer and use it in GitHub Desktop.
Save tanayseven/be04c7e0d1d1e95cc736 to your computer and use it in GitHub Desktop.
OpenCL program to copy a given string n times
#include <stdio.h>
#include "string.h"
#include <fcntl.h>
#include "CL/cl.h"
// #define DATA_SIZE 10
#define BUF_SIZE 256
#define ERROR -1
#define END 0
char *KernelSource = "";
char* read_file(char* filename, int* len) {
int ip_file, rd_cnt, tot_cnt = 0;
int buf_size = BUF_SIZE;
char * tmp_buf = malloc(buf_size * sizeof(char));
char * offset = tmp_buf;
ip_file = open(filename, O_RDONLY);
if ( ip_file == ERROR ) {
perror("Error accessing file.");
return (char*)-1;
}
do {
rd_cnt = read(ip_file,offset,BUF_SIZE);
offset += rd_cnt;
tot_cnt += rd_cnt;
}while(rd_cnt != END);
(*len) = tot_cnt;
return tmp_buf;
}
const char *getErrorString(cl_int error)
{
switch(error){
// run-time and JIT compiler errors
case 0: return "CL_SUCCESS";
case -1: return "CL_DEVICE_NOT_FOUND";
case -2: return "CL_DEVICE_NOT_AVAILABLE";
case -3: return "CL_COMPILER_NOT_AVAILABLE";
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case -5: return "CL_OUT_OF_RESOURCES";
case -6: return "CL_OUT_OF_HOST_MEMORY";
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case -8: return "CL_MEM_COPY_OVERLAP";
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case -11: return "CL_BUILD_PROGRAM_FAILURE";
case -12: return "CL_MAP_FAILURE";
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
case -16: return "CL_LINKER_NOT_AVAILABLE";
case -17: return "CL_LINK_PROGRAM_FAILURE";
case -18: return "CL_DEVICE_PARTITION_FAILED";
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
// compile-time errors
case -30: return "CL_INVALID_VALUE";
case -31: return "CL_INVALID_DEVICE_TYPE";
case -32: return "CL_INVALID_PLATFORM";
case -33: return "CL_INVALID_DEVICE";
case -34: return "CL_INVALID_CONTEXT";
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
case -36: return "CL_INVALID_COMMAND_QUEUE";
case -37: return "CL_INVALID_HOST_PTR";
case -38: return "CL_INVALID_MEM_OBJECT";
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case -40: return "CL_INVALID_IMAGE_SIZE";
case -41: return "CL_INVALID_SAMPLER";
case -42: return "CL_INVALID_BINARY";
case -43: return "CL_INVALID_BUILD_OPTIONS";
case -44: return "CL_INVALID_PROGRAM";
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
case -46: return "CL_INVALID_KERNEL_NAME";
case -47: return "CL_INVALID_KERNEL_DEFINITION";
case -48: return "CL_INVALID_KERNEL";
case -49: return "CL_INVALID_ARG_INDEX";
case -50: return "CL_INVALID_ARG_VALUE";
case -51: return "CL_INVALID_ARG_SIZE";
case -52: return "CL_INVALID_KERNEL_ARGS";
case -53: return "CL_INVALID_WORK_DIMENSION";
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
case -56: return "CL_INVALID_GLOBAL_OFFSET";
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
case -58: return "CL_INVALID_EVENT";
case -59: return "CL_INVALID_OPERATION";
case -60: return "CL_INVALID_GL_OBJECT";
case -61: return "CL_INVALID_BUFFER_SIZE";
case -62: return "CL_INVALID_MIP_LEVEL";
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
case -64: return "CL_INVALID_PROPERTY";
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
case -66: return "CL_INVALID_COMPILER_OPTIONS";
case -67: return "CL_INVALID_LINKER_OPTIONS";
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
// extension errors
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
default: return "Unknown OpenCL error";
}
}
int main(void)
{
cl_context context;
cl_context_properties properties[3];
cl_kernel kernel;
cl_command_queue command_queue;
cl_program program;
cl_int err;
cl_uint num_of_platforms=0;
cl_platform_id platform_id;
cl_device_id device_id;
cl_uint num_of_devices=0;
cl_mem input, output, n;
size_t global;
// size_t local;
int result;
int len = 0, i;
KernelSource = read_file("str_cpy.cl",&len);
char inputData[]="hello";
int DATA_SIZE = strlen(inputData);
int inp_len = sizeof(char) * DATA_SIZE;
const int N = 10;
int op_size = sizeof(char)*DATA_SIZE*N;
char *results;
results = (char*)malloc(op_size*2);
char *results2;
results2 = (char*)malloc(op_size);
printf("%s\n",inputData);
i = 0;
// retreives a list of platforms available
if (clGetPlatformIDs(1, &platform_id, &num_of_platforms)!= CL_SUCCESS)
{
printf("Unable to get platform_id\n");
return 1;
}
// try to get a supported GPU device
if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 1, &device_id, &num_of_devices) != CL_SUCCESS)
{
printf("Unable to get device_id\n");
return 1;
}
// context properties list - must be terminated with 0
properties[0]= CL_CONTEXT_PLATFORM;
properties[1]= (cl_context_properties) platform_id;
properties[2]= 0;
// create a context with the GPU device
context = clCreateContext(properties,1,&device_id,NULL,NULL,&err);
// create command queue using the context and device
command_queue = clCreateCommandQueue(context, device_id, 0, &err);
// create a program from the kernel source code
program = clCreateProgramWithSource(context,1,(const char **) &KernelSource, NULL, &err);
// compile the program
if (result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL) != CL_SUCCESS)
{
printf("Error building program\n");
printf("%s\n", getErrorString(result));
return 1;
}
// specify which kernel from the program to execute
kernel = clCreateKernel(program, "str_cpy", &err);
// create buffers for the input and ouput
input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(char) * DATA_SIZE, NULL, NULL);
n = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, op_size, NULL, NULL);
// output2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(char) * DATA_SIZE, NULL, NULL);
// load data into the input buffer
clEnqueueWriteBuffer(command_queue, input, CL_TRUE, 0, sizeof(char) * DATA_SIZE, inputData, 0, NULL, NULL);
clEnqueueWriteBuffer(command_queue, n, CL_TRUE, 0, sizeof(int), &DATA_SIZE, 0, NULL, NULL);
// set the argument list for the kernel command
clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 1, sizeof(int), &DATA_SIZE);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &output);
// clSetKernelArg(kernel, 3, sizeof(cl_mem), &output2);
global = op_size;
// enqueue the kernel command for execution
clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
clFinish(command_queue);
// copy the results from out of the output buffer
clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, op_size, results, 0, NULL, NULL);
// print the results
printf("output 1: \n");
for(i=0;i<op_size;i++)
{
printf("%c",results[i]);
}
// cleanup - release OpenCL resources
clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
printf("\n");
// for (i = 0 ; i < N ; ++i )
// free(results[i]);
// free(results);
return 0;
}
__kernel void str_cpy(__global char *A, int n, __global char *B)
{
int i = get_global_id(0);
B[i] = A[i%n];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment