Created
April 1, 2014 18:32
-
-
Save jatesy/9920155 to your computer and use it in GitHub Desktop.
Graphic Processing Units(GPU) programming: Sorting, implemented parallel in OpenCL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__kernel void vector_sort(__global int *A, __global int *B) { | |
//Get the index of the current element | |
int i = get_global_id(0); | |
int size = 1024; //We can change the size of array to 2048, 4096, 8192, 16384 and 32768 | |
int j = 0; | |
// Do the operation of sorting | |
int p = 0; | |
for(j = 0; j < size; j++) | |
{ | |
if(A[i] > A[j]) | |
p++; | |
} | |
while(B[p] != 0) | |
p++; | |
B[p]=A[i]; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#define LIST_SIZE 1024 //We can change the size of array to 2048, 4096, 8192, 16384 and 32768 | |
#ifdef __APPLE__ | |
#include <OpenCL/opencl.h> | |
#else | |
#include <CL/cl.h> | |
#endif | |
#define MAX_SOURCE_SIZE (0x100000) | |
int main(void) { | |
// Create the two vectors | |
int i; | |
int *A = (int*)malloc(sizeof(int)*LIST_SIZE); | |
int *B = (int*)malloc(sizeof(int)*LIST_SIZE); | |
for(i = 0; i < LIST_SIZE; i++) { | |
A[i] = (rand() % 10000) + 1; | |
B[i] = 0; | |
} | |
// Load the kernel source code into the array source_str | |
FILE *fp; | |
char *source_str; | |
size_t source_size; | |
fp = fopen("q2-kernel.cl", "r"); | |
if (!fp) { | |
fprintf(stderr, "Failed to load kernel.\n"); | |
exit(1); | |
} | |
source_str = (char*)malloc(MAX_SOURCE_SIZE); | |
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp); | |
fclose( fp ); | |
// Get platform and device information | |
cl_platform_id platform_id = NULL; | |
cl_device_id device_id = NULL; | |
cl_uint ret_num_devices; | |
cl_uint ret_num_platforms; | |
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); | |
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_ALL, 1, | |
&device_id, &ret_num_devices); | |
// Create an OpenCL context | |
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); | |
// Create a command queue | |
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); | |
// Create memory buffers on the device for each vector | |
cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, | |
LIST_SIZE * sizeof(int), NULL, &ret); | |
cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, | |
LIST_SIZE * sizeof(int), NULL, &ret); | |
//cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, | |
// LIST_SIZE * sizeof(int), NULL, &ret); | |
// Copy the lists A and B to their respective memory buffers | |
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, | |
LIST_SIZE * sizeof(int), A, 0, NULL, NULL); | |
ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, | |
LIST_SIZE * sizeof(int), B, 0, NULL, NULL); | |
// Create a program from the kernel source | |
cl_program program = clCreateProgramWithSource(context, 1, | |
(const char **)&source_str, (const size_t *)&source_size, &ret); | |
// Build the program | |
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); | |
// Create the OpenCL kernel | |
cl_kernel kernel = clCreateKernel(program, "vector_sort", &ret); | |
// Set the arguments of the kernel | |
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj); | |
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj); | |
//ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj); | |
// Execute the OpenCL kernel on the list | |
size_t global_item_size = LIST_SIZE; // Process the entire lists | |
size_t local_item_size = 64; // Process in groups of 64 | |
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | |
&global_item_size, &local_item_size, 0, NULL, NULL); | |
// Read the memory buffer C on the device to the local variable C | |
//int *C = (int*)malloc(sizeof(int)*LIST_SIZE); | |
ret = clEnqueueReadBuffer(command_queue, b_mem_obj, CL_TRUE, 0, | |
LIST_SIZE * sizeof(int), B, 0, NULL, NULL); | |
// Display the result to the screen | |
for(i = 0; i < LIST_SIZE; i++) | |
printf("%5d , %5d\n", A[i], B[i]); | |
// Clean up | |
ret = clFlush(command_queue); | |
ret = clFinish(command_queue); | |
ret = clReleaseKernel(kernel); | |
ret = clReleaseProgram(program); | |
ret = clReleaseMemObject(a_mem_obj); | |
ret = clReleaseMemObject(b_mem_obj); | |
ret = clReleaseCommandQueue(command_queue); | |
ret = clReleaseContext(context); | |
free(A); | |
free(B); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment