Last active
September 22, 2015 21:15
-
-
Save epitron/b4577dd79f27cfc417f0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ nvcc --gpu-architecture=compute_20 --gpu-code=compute_20 -I/usr/local/cuda/include/ -DGPU --compiler-options "-Wall -Ofast -DGPU" -c ./src/convolutional_kernels.cu -o obj/convolutional_kernels.o | |
/usr/local/cuda/include/cuda_fp16.h:314:83: error: conflicting declaration of C function ‘__half __ldg(const __half*)’ | |
/usr/local/cuda/include/cuda_fp16.h:313:60: note: previous declaration ‘__half2 __ldg(const __half2*)’ | |
/usr/local/cuda/include/cuda_fp16.h: In function ‘__half2 __ldg(const __half2*)’: | |
/usr/local/cuda/include/cuda_fp16.h:1180:84: error: conflicting declaration of C function ‘__half2 __ldg(const __half2*)’ | |
/usr/local/cuda/include/cuda_fp16.h:314:59: note: previous declaration ‘__half __ldg(const __half*)’ | |
Makefile:54: recipe for target 'obj/convolutional_kernels.o' failed | |
make: *** [obj/convolutional_kernels.o] Error 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Line 84 | |
typedef struct __align__(2) { | |
unsigned short x; | |
} __half; | |
typedef struct __align__(4) { | |
unsigned int x; | |
} __half2; | |
// Line 103 | |
#define __CUDA_FP16_DECL__ static __device__ __inline__ | |
// Line 313 | |
#if defined(__cplusplus) && ( __CUDA_ARCH__ >=320 || !defined(__CUDA_ARCH__) ) | |
__CUDA_FP16_DECL__ __half2 __ldg(const __half2 *ptr); | |
__CUDA_FP16_DECL__ __half __ldg(const __half *ptr); | |
#endif /*defined(__cplusplus) && ( __CUDA_ARCH__ >=320 || !defined(__CUDA_ARCH__) )*/ | |
// Line 1180 | |
#if defined(__cplusplus) && (__CUDA_ARCH__ >= 320 || !defined(__CUDA_ARCH__)) | |
#define __LDG_PTR "r" | |
__CUDA_FP16_DECL__ __half2 __ldg(const __half2 *ptr) | |
{ | |
__half2 ret; | |
asm volatile ("ld.global.nc.b32 %0, [%1];" : "=r"(ret.x): __LDG_PTR (ptr)); | |
return ret; | |
} | |
__CUDA_FP16_DECL__ __half __ldg(const __half *ptr) | |
{ | |
__half ret; | |
asm volatile ("ld.global.nc.b16 %0, [%1];" : "=h"(ret.x) : __LDG_PTR (ptr)); | |
return ret; | |
} | |
#undef __LDG_PTR | |
#endif /*defined(__cplusplus) && (__CUDA_ARCH__ >= 320 || !defined(__CUDA_ARCH__))*/ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment