-
-
Save coventry/291aedc894082d8794ef to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* drivertest.cpp | |
* Vector addition (host code) | |
* | |
* Andrei de A. Formiga, 2012-06-04 | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <assert.h> | |
#include <cuda.h> | |
#include <builtin_types.h> | |
#include "matSumKernel.h" | |
// This will output the proper CUDA error strings | |
// in the event that a CUDA host call returns an error | |
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) | |
inline void __checkCudaErrors( CUresult err, const char *file, const int line ) | |
{ | |
if( CUDA_SUCCESS != err) { | |
fprintf(stderr, | |
"CUDA Driver API error = %04d from file <%s>, line %i.\n", | |
err, file, line ); | |
exit(-1); | |
} | |
} | |
// --- global variables ---------------------------------------------------- | |
CUdevice device; | |
CUcontext context; | |
CUmodule module; | |
CUfunction function; | |
size_t totalGlobalMem; | |
char *module_file = (char*) "matSumKernel.ptx"; | |
char *kernel_name = (char*) "matSum"; | |
// --- functions ----------------------------------------------------------- | |
void initCUDA() | |
{ | |
int deviceCount = 0; | |
CUresult err = cuInit(0); | |
int major = 0, minor = 0; | |
FILE *blob; | |
char cubin[100000]; | |
int numread, size; | |
if (err == CUDA_SUCCESS) | |
checkCudaErrors(cuDeviceGetCount(&deviceCount)); | |
if (deviceCount == 0) { | |
fprintf(stderr, "Error: no devices supporting CUDA\n"); | |
exit(-1); | |
} | |
// get first CUDA device | |
checkCudaErrors(cuDeviceGet(&device, 0)); | |
char name[100]; | |
cuDeviceGetName(name, 100, device); | |
printf("> Using device 0: %s\n", name); | |
// get compute capabilities and the devicename | |
checkCudaErrors( cuDeviceComputeCapability(&major, &minor, device) ); | |
printf("> GPU Device has SM %d.%d compute capability\n", major, minor); | |
checkCudaErrors( cuDeviceTotalMem(&totalGlobalMem, device) ); | |
printf(" Total amount of global memory: %llu bytes\n", | |
(unsigned long long)totalGlobalMem); | |
printf(" 64-bit Memory Address: %s\n", | |
(totalGlobalMem > (unsigned long long)4*1024*1024*1024L)? | |
"YES" : "NO"); | |
err = cuCtxCreate(&context, 0, device); | |
if (err != CUDA_SUCCESS) { | |
fprintf(stderr, "* Error initializing the CUDA context.\n"); | |
cuCtxDetach(context); | |
exit(-1); | |
} | |
blob = fopen("matsum.cubin", "r"); | |
assert(blob); | |
fseek(blob, 0L, SEEK_END); | |
size = ftell(blob); | |
fseek(blob, 0L, SEEK_SET); | |
numread = fread(cubin, size, 1, blob); | |
fprintf(stderr, "Read %i\n", numread); | |
assert(numread == 1); | |
// err = cuModuleLoad(&module, module_file); | |
err = cuModuleLoadData(&module, cubin); | |
if (err != CUDA_SUCCESS) { | |
fprintf(stderr, "* Error number %i loading the module %s\n", err, module_file); | |
cuCtxDetach(context); | |
exit(-1); | |
} | |
err = cuModuleGetFunction(&function, module, kernel_name); | |
if (err != CUDA_SUCCESS) { | |
fprintf(stderr, "* Error getting kernel function %s\n", kernel_name); | |
cuCtxDetach(context); | |
exit(-1); | |
} | |
} | |
void finalizeCUDA() | |
{ | |
cuCtxDetach(context); | |
} | |
void setupDeviceMemory(CUdeviceptr *d_a, CUdeviceptr *d_b, CUdeviceptr *d_c) | |
{ | |
checkCudaErrors( cuMemAlloc(d_a, sizeof(int) * N) ); | |
checkCudaErrors( cuMemAlloc(d_b, sizeof(int) * N) ); | |
checkCudaErrors( cuMemAlloc(d_c, sizeof(int) * N) ); | |
} | |
void releaseDeviceMemory(CUdeviceptr d_a, CUdeviceptr d_b, CUdeviceptr d_c) | |
{ | |
checkCudaErrors( cuMemFree(d_a) ); | |
checkCudaErrors( cuMemFree(d_b) ); | |
checkCudaErrors( cuMemFree(d_c) ); | |
} | |
void runKernel(CUdeviceptr d_a, CUdeviceptr d_b, CUdeviceptr d_c) | |
{ | |
void *args[3] = { &d_a, &d_b, &d_c }; | |
// grid for kernel: <<<N, 1>>> | |
checkCudaErrors( cuLaunchKernel(function, N, 1, 1, // Nx1x1 blocks | |
1, 1, 1, // 1x1x1 threads | |
0, 0, args, 0) ); | |
} | |
int main(int argc, char **argv) | |
{ | |
int a[N], b[N], c[N]; | |
CUdeviceptr d_a, d_b, d_c; | |
// initialize host arrays | |
for (int i = 0; i < N; ++i) { | |
a[i] = N - i; | |
b[i] = i * i; | |
} | |
// initialize | |
printf("- Initializing...\n"); | |
initCUDA(); | |
// allocate memory | |
setupDeviceMemory(&d_a, &d_b, &d_c); | |
// copy arrays to device | |
checkCudaErrors( cuMemcpyHtoD(d_a, a, sizeof(int) * N) ); | |
checkCudaErrors( cuMemcpyHtoD(d_b, b, sizeof(int) * N) ); | |
// run | |
printf("# Running the kernel...\n"); | |
runKernel(d_a, d_b, d_c); | |
printf("# Kernel complete.\n"); | |
// copy results to host and report | |
checkCudaErrors( cuMemcpyDtoH(c, d_c, sizeof(int) * N) ); | |
for (int i = 0; i < N; ++i) { | |
if (c[i] != a[i] + b[i]) | |
printf("* Error at array position %d: Expected %d, Got %d\n", | |
i, a[i]+b[i], c[i]); | |
} | |
printf("*** All checks complete.\n"); | |
// finish | |
printf("- Finalizing...\n"); | |
releaseDeviceMemory(d_a, d_b, d_c); | |
finalizeCUDA(); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Vector addition (device code) | |
#include "matSumKernel.h" | |
extern "C" __global__ void matSum(int *a, int *b, int *c) | |
{ | |
int tid = blockIdx.x; | |
if (tid < N) | |
c[tid] = a[tid] + b[tid]; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __MATSUMKERNEL_H | |
#define __MATSUMKERNEL_H | |
// size of the vectors to sum | |
#define N 100 | |
#endif // __MATSUMKERNEL_H |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment