Created
October 25, 2016 06:40
-
-
Save kyleniemeyer/dfc884362703fd44ed252359e822c745 to your computer and use it in GitHub Desktop.
Basic GPU/CUDA test code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "timer.h" | |
/** CUDA libraries */ | |
#include <cuda.h> | |
#include <helper_cuda.h> | |
__global__ void kernel () { | |
} | |
int main (int argc, char *argv[]) { | |
int num = 10000; | |
// set & initialize device using command line argument (if any) | |
cudaDeviceProp devProp; | |
if (argc == 1) { | |
// default device id is 0 | |
checkCudaErrors (cudaSetDevice (0) ); | |
checkCudaErrors (cudaGetDeviceProperties(&devProp, 0)); | |
} else { | |
// use second argument for number | |
// get number of devices | |
int num_devices; | |
cudaGetDeviceCount(&num_devices); | |
// first check if is number | |
int id = *(argv[1]) - '0'; | |
if ((id >= 0) && (id < num_devices)) { | |
checkCudaErrors (cudaSetDevice (id) ); | |
} else { | |
// not in range, error | |
printf("Error: GPU device number not in correct range\n"); | |
printf("Provide number between 0 and %i\n", num_devices - 1); | |
exit(1); | |
} | |
checkCudaErrors (cudaGetDeviceProperties(&devProp, id)); | |
} | |
// block and grid dimensions | |
dim3 dimBlock ( 1, 1 ); | |
dim3 dimGrid ( 1, 1 ); | |
// Allocate device memory | |
size_t size = sizeof(int); | |
int* y_host; | |
y_host = (int *) malloc (size); | |
int* y_device; | |
cudaMalloc ((void**) &y_device, size); | |
////////////////////////////// | |
// start timer | |
StartTimer(); | |
////////////////////////////// | |
for (int i = 0; i < num; ++i) { | |
cudaMemcpy (y_device, y_host, size, cudaMemcpyHostToDevice); | |
cudaMemcpy (y_host, y_device, size, cudaMemcpyDeviceToHost); | |
} | |
///////////////////////////////// | |
// end timer | |
double runtime = GetTimer(); | |
///////////////////////////////// | |
// convert to sec | |
runtime /= 1000.0; | |
runtime = 1.0e6 * runtime / ((Real)(num)); | |
printf("GPU communication latency: %e microseconds\n", runtime); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment