Skip to content

Instantly share code, notes, and snippets.

@moznion
Created September 4, 2014 10:38
Show Gist options
  • Save moznion/2daca45af3f9e5307af3 to your computer and use it in GitHub Desktop.
Save moznion/2daca45af3f9e5307af3 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#define DATA_NUM 1024
#define DATA_SIZE sizeof(double) * DATA_NUM
#define BLOCK_SIZE 128
#define TRIALS 100
void cudaSetDeviceSafe(int);
void cudaMallocSafe(void*, int);
void cudaMallocHostSafe(void*, int);
void cudaMemcpySafe(void*, const void*, size_t, cudaMemcpyKind);
void cudaMemcpyPeerSafe(void*, int, const void*, int, size_t);
void cudaFreeSafe(void*);
void cudaFreeHostSafe(void*);
__global__ void vadd_kernel(double* A, double* B, double* C) {
int id = blockIdx.x * blockDim.x + threadIdx.x;
C[id] = A[id] + B[id];
}
int vadd(double* host_dataA, double* host_dataB, double* host_result_data) {
cudaError_t err;
int device0 = 0;
int device1 = 2;
// Check peer connection is available
int can_access_0_to_1, can_access_1_to_0;
err = cudaDeviceCanAccessPeer(&can_access_0_to_1, device0, device1);
err = cudaDeviceCanAccessPeer(&can_access_1_to_0, device1, device0);
if (can_access_0_to_1 != 1 || can_access_1_to_0 != 1) {
fprintf(stderr, "Cannot connect via peer between device0 and device1\n");
return 1;
}
// Check UVA is available
cudaDeviceProp device_prop;
int uva0;
cudaSetDeviceSafe(device0);
err = cudaGetDeviceProperties(&device_prop, device0);
uva0 = device_prop.unifiedAddressing;
err = cudaDeviceEnablePeerAccess(device1, 0);
int uva1;
cudaSetDeviceSafe(device1);
err = cudaGetDeviceProperties(&device_prop, device1);
uva1 = device_prop.unifiedAddressing;
err = cudaDeviceEnablePeerAccess(device0, 0);
if (uva0 != 1 || uva1 != 1) {
fprintf(stderr, "Cannot use UVA on your device\n");
return 1;
}
// Allocate array variables on device0
cudaSetDeviceSafe(device0);
double *device_dataA_0, *device_dataB_0;
cudaMallocSafe(&device_dataA_0, DATA_SIZE);
cudaMallocSafe(&device_dataB_0, DATA_SIZE);
cudaMemcpySafe(device_dataA_0, host_dataA, DATA_SIZE, cudaMemcpyHostToDevice);
cudaMemcpySafe(device_dataB_0, host_dataB, DATA_SIZE, cudaMemcpyHostToDevice);
// Allocate array variables on device1
cudaSetDeviceSafe(device1);
double *device_dataA_1, *device_dataB_1, *device_result_data_1;
cudaMallocSafe(&device_dataA_1, DATA_SIZE);
cudaMallocSafe(&device_dataB_1, DATA_SIZE);
cudaMallocSafe(&device_result_data_1, DATA_SIZE);
// core
dim3 block(BLOCK_SIZE);
dim3 grid(DATA_NUM / BLOCK_SIZE);
cudaSetDeviceSafe(device1);
for (int i = 0; i < TRIALS; i++) {
vadd_kernel<<<grid, block>>>(device_dataA_0, device_dataB_0, device_result_data_1);
}
cudaMemcpySafe(host_result_data, device_result_data_1, DATA_SIZE, cudaMemcpyDeviceToHost);
// Cleanup
cudaSetDeviceSafe(device0);
cudaFreeSafe(device_dataA_0);
cudaFreeSafe(device_dataB_0);
cudaSetDeviceSafe(device1);
cudaFreeSafe(device_dataA_1);
cudaFreeSafe(device_dataB_1);
cudaFreeSafe(device_result_data_1);
return 0;
}
int main(int argc, char** argv) {
double *host_dataA, *host_dataB, *host_result_data;
cudaMallocHostSafe(&host_dataA, DATA_SIZE);
cudaMallocHostSafe(&host_dataB, DATA_SIZE);
cudaMallocHostSafe(&host_result_data, DATA_SIZE);
for (int i = 0; i < DATA_NUM; i++) {
host_dataA[i] = i * 10;
host_dataB[i] = i * 20;
}
int err = vadd(host_dataA, host_dataB, host_result_data);
if (err) {
exit(err);
}
for (int i = 0; i < DATA_NUM; i++) {
printf("%lf\n", host_result_data[i]);
}
cudaFreeHostSafe(host_dataA);
cudaFreeHostSafe(host_dataB);
return 0;
}
void cudaSetDeviceSafe(int device_id) {
cudaError_t err = cudaSetDevice(device_id);
if (err) {
fprintf(stderr, "Cannot set a device (device: %d)\n", device_id);
exit(256);
}
}
void cudaMallocSafe(void* ptr, int data_size) {
cudaError_t err = cudaMalloc((void **)ptr, data_size);
if (err) {
fprintf(stderr, "Failed to allocate the memory on device\n");
}
}
void cudaMallocHostSafe(void* ptr, int data_size) {
cudaError_t err = cudaMallocHost((void **)ptr, data_size);
if (err) {
fprintf(stderr, "Failed to allocate the memory on host\n");
}
}
void cudaMemcpySafe(void* dst, const void* src, size_t data_size, cudaMemcpyKind kind) {
cudaError_t err = cudaMemcpy(dst, src, data_size, kind);
if (err) {
fprintf(stderr, "Failed to copy memory (direction: %s)\n", kind);
}
}
void cudaMemcpyPeerSafe(void* dst, int dst_device, const void* src, int src_device, size_t data_size) {
cudaError_t err = cudaMemcpyPeer(dst, dst_device, src, src_device, data_size);
if (err) {
fprintf(stderr, "Failed to copy memory via peer (direction: %d -> %d)\n", src_device, dst_device);
}
}
void cudaFreeSafe(void* ptr) {
cudaError_t err = cudaFree(ptr);
if (err) {
fprintf(stderr, "Failed to make device memory free\n");
}
}
void cudaFreeHostSafe(void* ptr) {
cudaError_t err = cudaFreeHost(ptr);
if (err) {
fprintf(stderr, "Failed to make host memory free\n");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment