Created
April 19, 2010 02:23
-
-
Save mikejs/370705 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <math.h> | |
#include <OpenCL/opencl.h> | |
const char *KernelSource = "\n" \ | |
"#define DELTA 0x9e3779b9 \n" \ | |
"#define ENC_ROUND(sum)" \ | |
"{ " \ | |
" v.s0 += ((v.s1 << 4) + key.s0) ^ (v.s1 + sum) ^ ((v.s1 >> 5) + key.s1); " \ | |
" v.s1 += ((v.s0 << 4) + key.s2) ^ (v.s0 + sum) ^ ((v.s0 >> 5) + key.s3); " \ | |
"} \n" \ | |
"__kernel void encrypt(__global uint* input, \n" \ | |
" const uint4 key, \n" \ | |
" const uint blocks) \n" \ | |
"{ \n" \ | |
" int i; int id = get_global_id(0); \n" \ | |
" if (id > blocks) return; \n" \ | |
" uint2 v = {input[id * 2], input[(id * 2) + 1]}; \n" \ | |
" // no unroll pragma? \n" \ | |
" ENC_ROUND(DELTA); \n" \ | |
" ENC_ROUND(DELTA * 2); ENC_ROUND(DELTA * 3); \n" \ | |
" ENC_ROUND(DELTA * 4); ENC_ROUND(DELTA * 5); \n" \ | |
" ENC_ROUND(DELTA * 6); ENC_ROUND(DELTA * 7); \n" \ | |
" ENC_ROUND(DELTA * 8); ENC_ROUND(DELTA * 9); \n" \ | |
" ENC_ROUND(DELTA * 10); ENC_ROUND(DELTA * 11); \n" \ | |
" ENC_ROUND(DELTA * 12); ENC_ROUND(DELTA * 13); \n" \ | |
" ENC_ROUND(DELTA * 14); ENC_ROUND(DELTA * 15); \n" \ | |
" ENC_ROUND(DELTA * 16); ENC_ROUND(DELTA * 17); \n" \ | |
" ENC_ROUND(DELTA * 18); ENC_ROUND(DELTA * 19); \n" \ | |
" ENC_ROUND(DELTA * 20); ENC_ROUND(DELTA * 21); \n" \ | |
" ENC_ROUND(DELTA * 22); ENC_ROUND(DELTA * 23); \n" \ | |
" ENC_ROUND(DELTA * 24); ENC_ROUND(DELTA * 25); \n" \ | |
" ENC_ROUND(DELTA * 26); ENC_ROUND(DELTA * 27); \n" \ | |
" ENC_ROUND(DELTA * 28); ENC_ROUND(DELTA * 29); \n" \ | |
" ENC_ROUND(DELTA * 30); ENC_ROUND(DELTA * 31); \n" \ | |
" ENC_ROUND(DELTA * 32); \n" \ | |
" input[id * 2] = v.s0; input[(id * 2) + 1] = v.s1; \n" \ | |
"} \n" \ | |
"#define DEC_SUM 0xC6EF3720 \n" \ | |
"#define DEC_ROUND(sum)" \ | |
"{ " \ | |
" v.s1 -= ((v.s0 << 4) + key.s2) ^ (v.s0 + sum) ^ ((v.s0 >> 5) + key.s3); " \ | |
" v.s0 -= ((v.s1 << 4) + key.s0) ^ (v.s1 + sum) ^ ((v.s1 >> 5) + key.s1); " \ | |
"} \n" \ | |
"__kernel void decrypt(__global uint* input, \n" \ | |
" const uint4 key, \n" \ | |
" const uint blocks) \n" \ | |
"{ \n" \ | |
" int i; \n" | |
" int id = get_global_id(0); \n" \ | |
" if (id > blocks) return; \n" \ | |
" uint2 v = {input[id * 2], input[(id * 2) + 1]}; \n" \ | |
" DEC_ROUND(DEC_SUM); \n" \ | |
" DEC_ROUND(DEC_SUM - DELTA * 1); DEC_ROUND(DEC_SUM - DELTA * 2); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 3); DEC_ROUND(DEC_SUM - DELTA * 4); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 5); DEC_ROUND(DEC_SUM - DELTA * 6); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 7); DEC_ROUND(DEC_SUM - DELTA * 8); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 9); DEC_ROUND(DEC_SUM - DELTA * 10); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 11); DEC_ROUND(DEC_SUM - DELTA * 12); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 13); DEC_ROUND(DEC_SUM - DELTA * 14); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 15); DEC_ROUND(DEC_SUM - DELTA * 16); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 17); DEC_ROUND(DEC_SUM - DELTA * 18); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 19); DEC_ROUND(DEC_SUM - DELTA * 20); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 21); DEC_ROUND(DEC_SUM - DELTA * 22); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 23); DEC_ROUND(DEC_SUM - DELTA * 24); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 25); DEC_ROUND(DEC_SUM - DELTA * 26); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 27); DEC_ROUND(DEC_SUM - DELTA * 28); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 29); DEC_ROUND(DEC_SUM - DELTA * 30); " \ | |
" DEC_ROUND(DEC_SUM - DELTA * 31); " \ | |
" input[id * 2] = v.s0; input[(id * 2) + 1] = v.s1; \n" \ | |
"} \n" \ | |
"\n"; | |
int main() { | |
cl_device_id device_id; | |
cl_context context; | |
cl_command_queue commands; | |
cl_program program; | |
cl_kernel enc_kernel, dec_kernel; | |
cl_int err; | |
cl_mem data_buf; | |
const unsigned int BLOCKS = pow(2, 24); | |
size_t global = BLOCKS; | |
cl_uint *data = calloc(BLOCKS * 2, sizeof(cl_uint)); | |
cl_uint4 key = {1, 2, 3, 4}; | |
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL); | |
if (err != CL_SUCCESS) { | |
printf("Error getting device ID.\n"); | |
exit(1); | |
} | |
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); | |
if (!context) { | |
printf("Error creating context.\n"); | |
exit(1); | |
} | |
commands = clCreateCommandQueue(context, device_id, 0, &err); | |
if (!commands) { | |
printf("Error creating command queue.\n"); | |
exit(1); | |
} | |
program = clCreateProgramWithSource(context, 1, | |
(const char **)&KernelSource, | |
NULL, &err); | |
if (!program) { | |
printf("Error creating program.\n"); | |
exit(1); | |
} | |
err = clBuildProgram(program, 0, NULL, "-Werror", NULL, NULL); | |
if (err != CL_SUCCESS) { | |
size_t len; | |
char buffer[2048]; | |
printf("Error: Failed to build program executable!\n"); | |
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, | |
sizeof(buffer), buffer, &len); | |
printf("%s\n", buffer); | |
exit(1); | |
} | |
enc_kernel = clCreateKernel(program, "encrypt", &err); | |
if (!enc_kernel || err != CL_SUCCESS) { | |
printf("Error creating kernel.\n"); | |
exit(1); | |
} | |
dec_kernel = clCreateKernel(program, "decrypt", &err); | |
if (!dec_kernel || err != CL_SUCCESS) { | |
printf("Error creating decrypt kernel.\n"); | |
exit(1); | |
} | |
data_buf = clCreateBuffer(context, CL_MEM_READ_WRITE, | |
sizeof(cl_uint) * BLOCKS * 2, NULL, NULL); | |
if (!data_buf) { | |
printf("Error creating buffer.\n"); | |
exit(1); | |
} | |
err = clEnqueueWriteBuffer(commands, data_buf, CL_TRUE, 0, | |
sizeof(cl_uint) * BLOCKS * 2, data, | |
0, NULL, NULL); | |
if (err != CL_SUCCESS) { | |
printf("Error writing data buffer.\n"); | |
exit(1); | |
} | |
err = 0; | |
err = clSetKernelArg(enc_kernel, 0, sizeof(cl_mem), &data_buf); | |
err |= clSetKernelArg(enc_kernel, 1, sizeof(cl_uint4), &key); | |
err |= clSetKernelArg(enc_kernel, 2, sizeof(unsigned int), &BLOCKS); | |
if (err != CL_SUCCESS) { | |
printf("Error setting args.\n"); | |
exit(1); | |
} | |
err = clEnqueueNDRangeKernel(commands, enc_kernel, 1, NULL, &global, | |
NULL, 0, NULL, NULL); | |
if (err) { | |
printf("Error executing encrypt kernel: %d.\n", err); | |
exit(1); | |
} | |
clEnqueueBarrier(commands); | |
err = 0; | |
err = clSetKernelArg(dec_kernel, 0, sizeof(cl_mem), &data_buf); | |
err |= clSetKernelArg(dec_kernel, 1, sizeof(cl_uint4), &key); | |
err |= clSetKernelArg(dec_kernel, 2, sizeof(unsigned int), &BLOCKS); | |
if (err != CL_SUCCESS) { | |
printf("Error setting args.\n"); | |
exit(1); | |
} | |
err = clEnqueueNDRangeKernel(commands, dec_kernel, 1, NULL, &global, | |
NULL, 0, NULL, NULL); | |
if (err) { | |
printf("Error executing decrypt kernel: %d\n", err); | |
exit(1); | |
} | |
clFinish(commands); | |
err = clEnqueueReadBuffer(commands, data_buf, CL_TRUE, 0, | |
sizeof(cl_uint) * BLOCKS * 2, | |
data, 0, NULL, NULL); | |
if (err != CL_SUCCESS) { | |
printf("Error reading buffer.\n"); | |
exit(1); | |
} | |
printf("%u %u\n", data[0], data[1]); | |
clReleaseMemObject(data_buf); | |
clReleaseProgram(program); | |
clReleaseKernel(enc_kernel); | |
clReleaseKernel(dec_kernel); | |
clReleaseCommandQueue(commands); | |
clReleaseContext(context); | |
free(data); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment