Skip to content

Instantly share code, notes, and snippets.

@epitron
Last active September 22, 2015 21:15
Show Gist options
  • Save epitron/b4577dd79f27cfc417f0 to your computer and use it in GitHub Desktop.
Save epitron/b4577dd79f27cfc417f0 to your computer and use it in GitHub Desktop.
$ nvcc --gpu-architecture=compute_20 --gpu-code=compute_20 -I/usr/local/cuda/include/ -DGPU --compiler-options "-Wall -Ofast -DGPU" -c ./src/convolutional_kernels.cu -o obj/convolutional_kernels.o
/usr/local/cuda/include/cuda_fp16.h:314:83: error: conflicting declaration of C function ‘__half __ldg(const __half*)’
/usr/local/cuda/include/cuda_fp16.h:313:60: note: previous declaration ‘__half2 __ldg(const __half2*)’
/usr/local/cuda/include/cuda_fp16.h: In function ‘__half2 __ldg(const __half2*)’:
/usr/local/cuda/include/cuda_fp16.h:1180:84: error: conflicting declaration of C function ‘__half2 __ldg(const __half2*)’
/usr/local/cuda/include/cuda_fp16.h:314:59: note: previous declaration ‘__half __ldg(const __half*)’
Makefile:54: recipe for target 'obj/convolutional_kernels.o' failed
make: *** [obj/convolutional_kernels.o] Error 1
// Line 84
typedef struct __align__(2) {
unsigned short x;
} __half;
typedef struct __align__(4) {
unsigned int x;
} __half2;
// Line 103
#define __CUDA_FP16_DECL__ static __device__ __inline__
// Line 313
#if defined(__cplusplus) && ( __CUDA_ARCH__ >=320 || !defined(__CUDA_ARCH__) )
__CUDA_FP16_DECL__ __half2 __ldg(const __half2 *ptr);
__CUDA_FP16_DECL__ __half __ldg(const __half *ptr);
#endif /*defined(__cplusplus) && ( __CUDA_ARCH__ >=320 || !defined(__CUDA_ARCH__) )*/
// Line 1180
#if defined(__cplusplus) && (__CUDA_ARCH__ >= 320 || !defined(__CUDA_ARCH__))
#define __LDG_PTR "r"
__CUDA_FP16_DECL__ __half2 __ldg(const __half2 *ptr)
{
__half2 ret;
asm volatile ("ld.global.nc.b32 %0, [%1];" : "=r"(ret.x): __LDG_PTR (ptr));
return ret;
}
__CUDA_FP16_DECL__ __half __ldg(const __half *ptr)
{
__half ret;
asm volatile ("ld.global.nc.b16 %0, [%1];" : "=h"(ret.x) : __LDG_PTR (ptr));
return ret;
}
#undef __LDG_PTR
#endif /*defined(__cplusplus) && (__CUDA_ARCH__ >= 320 || !defined(__CUDA_ARCH__))*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment