Last active
August 29, 2015 14:06
-
-
Save moznion/ccbfc67a6679d09be43d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#define DATA_NUM 10 | |
#define DATA_SIZE sizeof(double) * DATA_NUM | |
#define TRIALS 50 | |
void cudaSetDeviceSafe(int); | |
void cudaMallocSafe(void*, int); | |
void cudaMallocHostSafe(void*, int); | |
void cudaMemcpySafe(void*, const void*, size_t, cudaMemcpyKind); | |
void cudaMemcpyPeerSafe(void*, int, const void*, int, size_t); | |
void cudaFreeSafe(void*); | |
void cudaFreeHostSafe(void*); | |
int is_deeply(double*, double*, int); | |
int ping_pong(double* host_dataA, double* host_dataB, double* host_dataA_rt, double* host_dataB_rt) { | |
cudaError_t err; | |
int device_count; | |
// 有効なデバイス数をチェック | |
err = cudaGetDeviceCount(&device_count); | |
if (err) { | |
fprintf(stderr, "Err: %d\n", err); | |
return 1; | |
} | |
int device0 = 0; | |
int device1 = 2; | |
// Device 0とDevice 1間でpeer通信ができるかどうかチェック | |
int can_access_A2B; | |
int can_access_B2A; | |
err = cudaDeviceCanAccessPeer(&can_access_A2B, device0, device1); | |
err = cudaDeviceCanAccessPeer(&can_access_B2A, device1, device0); | |
if (can_access_A2B != 1 || can_access_B2A != 1) { | |
fprintf(stderr, "Cannot connect via peer between device0 and device1\n"); | |
return 1; | |
} | |
// Device0とDevice1でUVAが有効かどうかチェック | |
cudaDeviceProp device_prop; | |
int uva0; | |
cudaSetDeviceSafe(device0); | |
err = cudaGetDeviceProperties(&device_prop, device0); | |
uva0 = device_prop.unifiedAddressing; | |
err = cudaDeviceEnablePeerAccess(device1, 0); | |
int uva1; | |
cudaSetDeviceSafe(device1); | |
err = cudaGetDeviceProperties(&device_prop, device1); | |
uva1 = device_prop.unifiedAddressing; | |
err = cudaDeviceEnablePeerAccess(device0, 0); | |
if (uva0 != 1 || uva1 != 1) { | |
fprintf(stderr, "Cannot use UVA on your device\n"); | |
return 1; | |
} | |
// device0上に配列をALLOCATE | |
cudaSetDeviceSafe(device0); | |
double *dev_dataA_0, *dev_dataB_0; | |
cudaMallocSafe(&dev_dataA_0, DATA_SIZE); | |
cudaMallocSafe(&dev_dataB_0, DATA_SIZE); | |
// host_dataA => dev_dataA_0 | |
cudaMemcpySafe(dev_dataA_0, host_dataA, DATA_SIZE, cudaMemcpyHostToDevice); | |
// device1上に配列をALLOCATE | |
cudaSetDeviceSafe(device1); | |
double *dev_dataA_1, *dev_dataB_1; | |
cudaMallocSafe(&dev_dataA_1, DATA_SIZE); | |
cudaMallocSafe(&dev_dataB_1, DATA_SIZE); | |
// host_dataB => dev_dataB_1 | |
cudaMemcpySafe(dev_dataB_1, host_dataB, DATA_SIZE, cudaMemcpyHostToDevice); | |
// ping-pong | |
for (int i = 0; i < TRIALS; i++) { | |
if (i % 2 == 0) { // even | |
cudaMemcpyPeerSafe(dev_dataA_1, device1, dev_dataA_0, device0, DATA_SIZE); | |
continue; | |
} | |
cudaMemcpyPeerSafe(dev_dataB_0, device0, dev_dataB_1, device1, DATA_SIZE); | |
} | |
cudaMemcpySafe(host_dataA_rt, dev_dataA_0, DATA_SIZE, cudaMemcpyDeviceToHost); | |
cudaMemcpySafe(host_dataB_rt, dev_dataB_1, DATA_SIZE, cudaMemcpyDeviceToHost); | |
// cleanup | |
cudaSetDeviceSafe(device0); | |
cudaFreeSafe(dev_dataA_0); | |
cudaFreeSafe(dev_dataB_0); | |
cudaDeviceDisablePeerAccess(device0); | |
cudaSetDeviceSafe(device1); | |
cudaFreeSafe(dev_dataA_1); | |
cudaFreeSafe(dev_dataB_1); | |
cudaDeviceDisablePeerAccess(device1); | |
return 0; | |
} | |
int main(int argc, char **argv) { | |
// Host上にデータ作る | |
double *host_dataA, *host_dataB; | |
cudaMallocHostSafe(&host_dataA, DATA_SIZE); | |
cudaMallocHostSafe(&host_dataB, DATA_SIZE); | |
for (int i = 0; i < DATA_NUM; i++) { // initialize | |
host_dataA[i] = 100.0; | |
host_dataB[i] = 200.0; | |
} | |
// for round trip | |
double *host_dataA_rt, *host_dataB_rt; | |
cudaMallocHostSafe(&host_dataA_rt, DATA_SIZE); | |
cudaMallocHostSafe(&host_dataB_rt, DATA_SIZE); | |
// この時点では違うはず | |
printf("Data A: %s\n", is_deeply(host_dataA_rt, host_dataA, DATA_NUM) ? "Same" : "Different"); | |
printf("Data B: %s\n", is_deeply(host_dataB_rt, host_dataB, DATA_NUM) ? "Same" : "Different"); | |
int err = ping_pong(host_dataA, host_dataB, host_dataA_rt, host_dataB_rt); | |
if (err) { | |
exit(err); | |
} | |
// ping_pongうまくいってたら一致する | |
printf("Data A: %s\n", is_deeply(host_dataA_rt, host_dataA, DATA_NUM) ? "Same" : "Different"); | |
printf("Data B: %s\n", is_deeply(host_dataB_rt, host_dataB, DATA_NUM) ? "Same" : "Different"); | |
cudaFreeHostSafe(host_dataA); | |
cudaFreeHostSafe(host_dataA_rt); | |
cudaFreeHostSafe(host_dataB); | |
cudaFreeHostSafe(host_dataB_rt); | |
return 0; | |
} | |
void cudaSetDeviceSafe(int device_id) { | |
cudaError_t err = cudaSetDevice(device_id); | |
if (err) { | |
fprintf(stderr, "Cannot set a device (device: %d)\n", device_id); | |
exit(1); | |
} | |
} | |
void cudaMallocSafe(void* ptr, int data_size) { | |
cudaError_t err = cudaMalloc((void **)ptr, data_size); | |
if (err) { | |
fprintf(stderr, "Failed to allocate the memory on device\n"); | |
exit(1); | |
} | |
} | |
void cudaMallocHostSafe(void* ptr, int data_size) { | |
cudaError_t err = cudaMallocHost((void **)ptr, data_size); | |
if (err) { | |
fprintf(stderr, "Failed to allocate the memory on host\n"); | |
exit(1); | |
} | |
} | |
void cudaMemcpySafe(void* dst, const void* src, size_t data_size, cudaMemcpyKind kind) { | |
cudaError_t err = cudaMemcpy(dst, src, data_size, kind); | |
if (err) { | |
fprintf(stderr, "Failed to copy memory (direction: %s)\n", kind); | |
exit(1); | |
} | |
} | |
void cudaMemcpyPeerSafe(void* dst, int dst_device, const void* src, int src_device, size_t data_size) { | |
cudaError_t err = cudaMemcpyPeer(dst, dst_device, src, src_device, data_size); | |
if (err) { | |
fprintf(stderr, "Failed to copy memory via peer (direction: %d -> %d)\n", src_device, dst_device); | |
exit(1); | |
} | |
} | |
void cudaFreeSafe(void* ptr) { | |
cudaError_t err = cudaFree(ptr); | |
if (err) { | |
fprintf(stderr, "Failed to make device memory free\n"); | |
exit(1); | |
} | |
} | |
void cudaFreeHostSafe(void* ptr) { | |
cudaError_t err = cudaFreeHost(ptr); | |
if (err) { | |
fprintf(stderr, "Failed to make host memory free\n"); | |
exit(1); | |
} | |
} | |
int is_deeply(double* got, double* expected, int data_num) { | |
for (int i = 0; i < data_num; i++) { | |
if (got[i] != expected[i]) { | |
return 0; | |
} | |
} | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment