Last active
July 26, 2016 17:07
-
-
Save pratikone/993a50000d852fda085256075de4389c to your computer and use it in GitHub Desktop.
OpenCL knapsack implementation - host and kernel files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The code is based on needle.c for Needleman-Wunsch algorithm at | |
// https://github.com/vtsynergy/OpenDwarfs/blob/master/dynamic-programming/nw/needle.c | |
#define LIMIT -999 | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <math.h> | |
#include "needle.h" | |
#include <sys/time.h> | |
#include "../../include/rdtsc.h" | |
#include "../../include/common_args.h" | |
#include <malloc.h> | |
#define AOCL_ALIGNMENT 64 | |
//#define TRACE | |
void runTest( int argc, char** argv); | |
double gettime() { | |
struct timeval t; | |
gettimeofday(&t,NULL); | |
return t.tv_sec+t.tv_usec*1e-6; | |
} | |
int | |
maximum( int a, | |
int b, | |
int c){ | |
int k; | |
if( a <= b ) | |
k = b; | |
else | |
k = a; | |
if( k <=c ) | |
return(c); | |
else | |
return(k); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
// Program main | |
//////////////////////////////////////////////////////////////////////////////// | |
int | |
main( int argc, char** argv) | |
{ | |
ocd_init(&argc, &argv, NULL); | |
ocd_initCL(); | |
runTest( argc, argv); | |
ocd_finalize(); | |
return EXIT_SUCCESS; | |
} | |
void usage(int argc, char **argv) | |
{ | |
fprintf(stderr, "Usage: %s <max_rows/max_cols> <penalty> \n", argv[0]); | |
fprintf(stderr, "\t<dimension> - x and y dimensions\n"); | |
fprintf(stderr, "\t<penalty> - penalty(positive integer)\n"); | |
exit(1); | |
} | |
void runTest( int argc, char** argv) | |
{ | |
int max_rows, max_cols, penalty; | |
int *input_itemsets, *weigths, *values; | |
cl_mem matrix_cuda,weigths_cuda, values_cuda; | |
int size; | |
int i, j; | |
cl_int errcode; | |
// the lengths of the two sequences should be able to divided by 16. | |
// And at current stage max_rows needs to equal max_cols | |
if (argc == 3) | |
{ | |
max_rows = atoi(argv[1]); //no of items | |
max_cols = atoi(argv[2]); // maximum capacity , column range will be W | |
} | |
else{ | |
usage(argc, argv); | |
} | |
if(atoi(argv[1])%16!=0){ | |
fprintf(stderr,"The dimension values must be a multiple of 16\n"); | |
exit(1); | |
} | |
max_rows = max_rows + 1; | |
max_cols = max_cols + 1; | |
if(_deviceType == 3) { | |
weigths = (int *)memalign(AOCL_ALIGNMENT, max_rows * sizeof(int) ); | |
values = (int *)memalign(AOCL_ALIGNMENT, max_rows * sizeof(int) ); | |
input_itemsets = (int *)memalign(AOCL_ALIGNMENT, max_rows * max_cols * sizeof(int) ); | |
} else { | |
weigths = (int *)malloc( max_rows * sizeof(int) ); | |
values = (int *)malloc( max_rows * sizeof(int) ); | |
input_itemsets = (int *)malloc(max_rows * max_cols * sizeof(int) ); | |
} | |
if (!input_itemsets) | |
fprintf(stderr, "error: can not allocate memory"); | |
srand ( 7 ); //TODO | |
//Initialize to zero | |
for (i = 0 ; i < max_cols; i++){ | |
for (j = 0 ; j < max_rows; j++){ | |
input_itemsets[i*max_cols+j] = 0; | |
} | |
} | |
//Used for input itemsets, then after the weigths matrix is constructed it is used/rewritten to store scores. | |
for(i=0; i< max_rows ; i++){ //please define your own sequence. | |
weigths[i] = rand() % 10; | |
values[i] = rand() % 10; | |
} | |
//cl_device_id device_id; | |
//cl_context context; | |
//cl_command_queue commands; | |
cl_program clProgram; | |
cl_kernel clKernel_nw1; | |
cl_kernel clKernel_nw2; | |
FILE *kernelFile; | |
char *kernelSource; | |
size_t kernelLength; | |
char* kernel_files; | |
int num_kernels = 1; | |
kernel_files = (char*) malloc(sizeof(char*)*num_kernels); | |
strcpy(kernel_files,"knapsack_kernel"); | |
clProgram = ocdBuildProgramFromFile(context,device_id, kernel_files, NULL); | |
clKernel_nw1 = clCreateKernel(clProgram, "knapsack_opencl_shared_1", &errcode); | |
CHKERR(errcode, "Failed to create kernel!"); | |
size = max_cols * max_rows; | |
weigths_cuda = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*max_rows, NULL, &errcode); | |
CHKERR(errcode, "Failed to create buffer!"); | |
values_cuda = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*max_rows, NULL, &errcode); | |
CHKERR(errcode, "Failed to create buffer!"); | |
matrix_cuda = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size, NULL, &errcode); | |
CHKERR(errcode, "Failed to create buffer!"); | |
errcode = clEnqueueWriteBuffer(commands, weigths_cuda, CL_TRUE, 0, sizeof(int)*max_rows, (void *) weigths, 0, NULL, &ocdTempEvent); | |
clFinish(commands); | |
START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "NW Reference Copy", ocdTempTimer) | |
END_TIMER(ocdTempTimer) | |
CHKERR(errcode, "Failed to enqueue write buffer!"); | |
errcode = clEnqueueWriteBuffer(commands, values_cuda, CL_TRUE, 0, sizeof(int)*max_rows, (void *) values, 0, NULL, &ocdTempEvent); | |
clFinish(commands); | |
START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "NW Reference Copy", ocdTempTimer) | |
END_TIMER(ocdTempTimer) | |
CHKERR(errcode, "Failed to enqueue write buffer!"); | |
errcode = clEnqueueWriteBuffer(commands, matrix_cuda, CL_TRUE, 0, sizeof(int)*size, (void *) input_itemsets, 0, NULL, &ocdTempEvent); | |
clFinish(commands); | |
START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "Knapsack table Set Copy", ocdTempTimer) | |
END_TIMER(ocdTempTimer) | |
CHKERR(errcode, "Failed to enqueue write buffer!"); | |
size_t localWorkSize[2] = {BLOCK_SIZE, 1}; //BLOCK_SIZE work items per work-group in 1D only. | |
size_t globalWorkSize[2]; | |
int block_width = ( max_cols - 1 )/BLOCK_SIZE; | |
// Processing the whole Knapsack table row by row here | |
for(i = 1 ; i < max_rows ; i++){ | |
globalWorkSize[0] = max_cols - 1; | |
globalWorkSize[1] = 1*localWorkSize[1]; | |
//printf("%s %d %s %d\n", "running as wave front, work-group nos ", globalWorkSize[0]/localWorkSize[0] , "with globalWorkSize", globalWorkSize[0]); | |
errcode = clSetKernelArg(clKernel_nw1, 0, sizeof(cl_mem), (void *) &weigths_cuda); | |
errcode = clSetKernelArg(clKernel_nw1, 1, sizeof(cl_mem), (void *) &values_cuda); | |
errcode |= clSetKernelArg(clKernel_nw1, 2, sizeof(cl_mem), (void *) &matrix_cuda); | |
errcode |= clSetKernelArg(clKernel_nw1, 3, sizeof(int), (void *) &max_cols); | |
errcode |= clSetKernelArg(clKernel_nw1, 4, sizeof(int), (void *) &i); | |
CHKERR(errcode, "Failed to set kernel arguments!"); | |
errcode = clEnqueueNDRangeKernel(commands, clKernel_nw1, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent); | |
clFinish(commands); | |
START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "NW Kernel nw1", ocdTempTimer) | |
END_TIMER(ocdTempTimer) | |
CHKERR(errcode, "Failed to enqueue kernel!"); | |
} | |
errcode = clEnqueueReadBuffer(commands, matrix_cuda, CL_TRUE, 0, sizeof(float)*size, (void *) input_itemsets, 0, NULL, &ocdTempEvent); | |
clFinish(commands); | |
START_TIMER(ocdTempEvent, OCD_TIMER_D2H, "knapsack table Set Copy", ocdTempTimer) | |
END_TIMER(ocdTempTimer) | |
CHKERR(errcode, "Failed to enqueue read buffer!"); | |
for (i = 0; i < size; ++i) | |
{ | |
printf("%d ", input_itemsets[ i ] ); | |
} | |
clReleaseMemObject(weigths_cuda); | |
clReleaseMemObject(matrix_cuda); | |
clReleaseKernel(clKernel_nw1); | |
clReleaseProgram(clProgram); | |
clReleaseCommandQueue(commands); | |
clReleaseContext(context); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define BLOCK_SIZE 16 | |
int | |
maximum( int a, | |
int b){ | |
if( a <= b ) | |
return (b); | |
else | |
return (a); | |
} | |
__kernel void | |
knapsack_opencl_shared_1( __global int* weights, | |
__global int* values, | |
__global int* matrix_opencl, | |
int cols, | |
int i) | |
{ | |
int bx = get_group_id(0); | |
int tx = get_local_id(0); | |
//printf("@@@ kernel group-id %d local-id %d\n", bx, tx); | |
int b_index_row = i; | |
int b_index_col = BLOCK_SIZE * bx + tx; // j index in a W.G. | |
int index = b_index_row * cols + b_index_col; | |
int universal_row = ref_x/cols; | |
int universal_col = ref_x % cols; | |
if (weights[ b_index_row - 1 ] > universal_col ) //copy previous value if weight of item is more than current capacity | |
matrix_opencl[index] = matrix_opencl[index - cols] ; | |
else{ | |
matrix_opencl[index] = maximum( matrix_opencl[index - (cols + weights[b_index_row] )] + values[b_index_row], | |
matrix_opencl[index - cols]); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment