Last active
October 8, 2018 00:21
-
-
Save epk/7f2262a34c61249510b5bf9acb6e0132 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//--------------------------------------------------------------------------------- | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
#include <iostream> | |
//--------------------------------------------------------------------------------- | |
static const int WORK_SIZE = 200000000; | |
static const int BLK_SIZE = 256; | |
using namespace std; | |
//--------------------------------------------------------------------------------- | |
/** | |
* This macro checks return value of the CUDA runtime call and exits | |
* the application if the call failed. | |
*/ | |
#define CUDA_CHECK_RETURN(value) { \ | |
cudaError_t _m_cudaStat = value; \ | |
if (_m_cudaStat != cudaSuccess) { \ | |
fprintf(stderr, "Error %s at line %d in file %s\n", \ | |
cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \ | |
exit(1); \ | |
} } | |
//--------------------------------------------------------------------------------- | |
__global__ void vecAdd(unsigned int* A_d, unsigned int *B_d, | |
unsigned int *C_d, int n) { | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
// **** Populate vecADD function **** | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
} | |
//--------------------------------------------------------------------------------- | |
int main(void) { | |
unsigned int *A_h; | |
unsigned int *A_d; | |
unsigned int *B_h; | |
unsigned int *B_d; | |
unsigned int *C_h; | |
unsigned int *C_d; | |
//Set Device | |
CUDA_CHECK_RETURN(cudaSetDevice(0)); | |
//See random number generator | |
srand(time(NULL)); | |
//Clear command prompt | |
cout << "\033[2J\033[1;1H"; | |
cout << "Allocating arrays on host ... "; | |
A_h = new unsigned int[WORK_SIZE]; | |
B_h = new unsigned int[WORK_SIZE]; | |
C_h = new unsigned int[WORK_SIZE]; | |
cout << "done.\nPopluating arrays on host ... "; | |
for (int i = 0; i < WORK_SIZE; i++) { | |
A_h[i] = rand(); | |
B_h[i] = rand(); | |
} | |
cout << "done.\nAllocating arrays on device ... "; | |
CUDA_CHECK_RETURN( | |
cudaMalloc((void** ) &A_d, sizeof(unsigned int) * WORK_SIZE)); | |
CUDA_CHECK_RETURN( | |
cudaMalloc((void** ) &B_d, sizeof(unsigned int) * WORK_SIZE)); | |
CUDA_CHECK_RETURN( | |
cudaMalloc((void** ) &C_d, sizeof(unsigned int) * WORK_SIZE)); | |
cout << "done.\nCopying arrays from host to device ... "; | |
CUDA_CHECK_RETURN( | |
cudaMemcpy(A_d, A_h, sizeof(int) * WORK_SIZE, | |
cudaMemcpyHostToDevice)); | |
CUDA_CHECK_RETURN( | |
cudaMemcpy(B_d, B_h, sizeof(int) * WORK_SIZE, | |
cudaMemcpyHostToDevice)); | |
cout << "done.\nLaunching kernel ... "; | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
// **** define kernel launch parameters **** | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
//Time kernel launch | |
//Time kernel launch | |
cudaEvent_t start, stop; | |
CUDA_CHECK_RETURN(cudaEventCreate(&start)); | |
CUDA_CHECK_RETURN(cudaEventCreate(&stop)); | |
float elapsedTime; | |
CUDA_CHECK_RETURN(cudaEventRecord(start, 0)); | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
// **** Add kernel call here **** | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
CUDA_CHECK_RETURN(cudaEventRecord(stop, 0)); | |
CUDA_CHECK_RETURN(cudaEventSynchronize(stop)); | |
CUDA_CHECK_RETURN(cudaEventElapsedTime(&elapsedTime, start, stop)); | |
CUDA_CHECK_RETURN(cudaThreadSynchronize()); // Wait for the GPU launched work to complete | |
CUDA_CHECK_RETURN(cudaGetLastError()); //Check if an error occurred in device code | |
CUDA_CHECK_RETURN(cudaEventDestroy(start)); | |
CUDA_CHECK_RETURN(cudaEventDestroy(stop)); | |
cout << "done.\nElapsed kernel time: " << elapsedTime << " ms\n"; | |
cout << "Copying results back to host .... "; | |
CUDA_CHECK_RETURN( | |
cudaMemcpy(C_h, C_d, sizeof(int) * WORK_SIZE, | |
cudaMemcpyDeviceToHost)); | |
cout << "done.\nVerifying results on host ... "; | |
//Add code to time host calculations | |
clock_t st, ed; | |
st = clock(); | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
// **** Add validation code here **** | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
cout << "done\n"; | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
// **** Output whether results are valid **** | |
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
ed = clock() - st; | |
cout << "Elapsed time on host: " << ((float) ed) / CLOCKS_PER_SEC * 1000 | |
<< " ms" << endl; | |
cout << "Freeing memory on device ... "; | |
CUDA_CHECK_RETURN(cudaFree((void* ) A_d)); | |
CUDA_CHECK_RETURN(cudaFree((void* ) B_d)); | |
CUDA_CHECK_RETURN(cudaFree((void* ) C_d)); | |
CUDA_CHECK_RETURN(cudaDeviceReset()); | |
cout << "done.\nFreeing memory on host ... "; | |
delete[] A_h; | |
delete[] B_h; | |
delete[] C_h; | |
cout << "done.\nExiting program.\n"; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment