Skip to content

Instantly share code, notes, and snippets.

@epk
Last active October 8, 2018 00:21
Show Gist options
  • Save epk/7f2262a34c61249510b5bf9acb6e0132 to your computer and use it in GitHub Desktop.
Save epk/7f2262a34c61249510b5bf9acb6e0132 to your computer and use it in GitHub Desktop.
//---------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
//---------------------------------------------------------------------------------
static const int WORK_SIZE = 200000000;
static const int BLK_SIZE = 256;
using namespace std;
//---------------------------------------------------------------------------------
/**
* This macro checks return value of the CUDA runtime call and exits
* the application if the call failed.
*/
#define CUDA_CHECK_RETURN(value) { \
cudaError_t _m_cudaStat = value; \
if (_m_cudaStat != cudaSuccess) { \
fprintf(stderr, "Error %s at line %d in file %s\n", \
cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \
exit(1); \
} }
//---------------------------------------------------------------------------------
__global__ void vecAdd(unsigned int* A_d, unsigned int *B_d,
unsigned int *C_d, int n) {
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
// **** Populate vecADD function ****
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
}
//---------------------------------------------------------------------------------
int main(void) {
unsigned int *A_h;
unsigned int *A_d;
unsigned int *B_h;
unsigned int *B_d;
unsigned int *C_h;
unsigned int *C_d;
//Set Device
CUDA_CHECK_RETURN(cudaSetDevice(0));
//See random number generator
srand(time(NULL));
//Clear command prompt
cout << "\033[2J\033[1;1H";
cout << "Allocating arrays on host ... ";
A_h = new unsigned int[WORK_SIZE];
B_h = new unsigned int[WORK_SIZE];
C_h = new unsigned int[WORK_SIZE];
cout << "done.\nPopluating arrays on host ... ";
for (int i = 0; i < WORK_SIZE; i++) {
A_h[i] = rand();
B_h[i] = rand();
}
cout << "done.\nAllocating arrays on device ... ";
CUDA_CHECK_RETURN(
cudaMalloc((void** ) &A_d, sizeof(unsigned int) * WORK_SIZE));
CUDA_CHECK_RETURN(
cudaMalloc((void** ) &B_d, sizeof(unsigned int) * WORK_SIZE));
CUDA_CHECK_RETURN(
cudaMalloc((void** ) &C_d, sizeof(unsigned int) * WORK_SIZE));
cout << "done.\nCopying arrays from host to device ... ";
CUDA_CHECK_RETURN(
cudaMemcpy(A_d, A_h, sizeof(int) * WORK_SIZE,
cudaMemcpyHostToDevice));
CUDA_CHECK_RETURN(
cudaMemcpy(B_d, B_h, sizeof(int) * WORK_SIZE,
cudaMemcpyHostToDevice));
cout << "done.\nLaunching kernel ... ";
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
// **** define kernel launch parameters ****
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
//Time kernel launch
//Time kernel launch
cudaEvent_t start, stop;
CUDA_CHECK_RETURN(cudaEventCreate(&start));
CUDA_CHECK_RETURN(cudaEventCreate(&stop));
float elapsedTime;
CUDA_CHECK_RETURN(cudaEventRecord(start, 0));
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
// **** Add kernel call here ****
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
CUDA_CHECK_RETURN(cudaEventRecord(stop, 0));
CUDA_CHECK_RETURN(cudaEventSynchronize(stop));
CUDA_CHECK_RETURN(cudaEventElapsedTime(&elapsedTime, start, stop));
CUDA_CHECK_RETURN(cudaThreadSynchronize()); // Wait for the GPU launched work to complete
CUDA_CHECK_RETURN(cudaGetLastError()); //Check if an error occurred in device code
CUDA_CHECK_RETURN(cudaEventDestroy(start));
CUDA_CHECK_RETURN(cudaEventDestroy(stop));
cout << "done.\nElapsed kernel time: " << elapsedTime << " ms\n";
cout << "Copying results back to host .... ";
CUDA_CHECK_RETURN(
cudaMemcpy(C_h, C_d, sizeof(int) * WORK_SIZE,
cudaMemcpyDeviceToHost));
cout << "done.\nVerifying results on host ... ";
//Add code to time host calculations
clock_t st, ed;
st = clock();
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
// **** Add validation code here ****
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
cout << "done\n";
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
// **** Output whether results are valid ****
//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
ed = clock() - st;
cout << "Elapsed time on host: " << ((float) ed) / CLOCKS_PER_SEC * 1000
<< " ms" << endl;
cout << "Freeing memory on device ... ";
CUDA_CHECK_RETURN(cudaFree((void* ) A_d));
CUDA_CHECK_RETURN(cudaFree((void* ) B_d));
CUDA_CHECK_RETURN(cudaFree((void* ) C_d));
CUDA_CHECK_RETURN(cudaDeviceReset());
cout << "done.\nFreeing memory on host ... ";
delete[] A_h;
delete[] B_h;
delete[] C_h;
cout << "done.\nExiting program.\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment