Last active
February 26, 2021 15:30
-
-
Save hertzsprung/589f6636401d46c9d74a544c23731520 to your computer and use it in GitHub Desktop.
CUDA 2D texture object with double values split into hi/lo int channels
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// inspired by https://devtalk.nvidia.com/default/topic/419190/texture-fetching-for-double-precision-floats/ | |
#include <cuda.h> | |
#include <cuda_runtime_api.h> | |
#include <cstdio> | |
#define cudaCheckErrors(ans) { gpuAssert((ans), __FILE__, __LINE__); } | |
inline void gpuAssert(cudaError_t code, const char *file, int line) | |
{ | |
if (code != cudaSuccess) | |
{ | |
fprintf(stderr, "%s %s %d\n", cudaGetErrorString(code), file, line); | |
exit(code); | |
} | |
} | |
__global__ void split_into_channels(int* dest, double* src, int width, int height) | |
{ | |
for (int j=0; j<height; j++) | |
{ | |
for (int i=0; i<width; i++) | |
{ | |
double value = src[j*width + i]; | |
dest[j*width*2 + 2*i] = __double2hiint(value); | |
dest[j*width*2 + (2*i)+1] = __double2loint(value); | |
} | |
} | |
} | |
__global__ void print(cudaTextureObject_t t) | |
{ | |
for (int j=0; j<8; j++) | |
{ | |
for (int i=0; i<8; i++) | |
{ | |
int2 v = tex1Dfetch<int2>(t, j*8 + i); | |
double value = __hiloint2double(v.x, v.y); | |
printf("GPU: %i %i %lf\n", i, j, value); | |
} | |
} | |
} | |
int main() | |
{ | |
const int width = 8; | |
const int height = 8; | |
double* source_data; | |
cudaCheckErrors(cudaMallocManaged(&source_data, width*height*sizeof(double)));; | |
source_data[0*width + 0] = 1.3; | |
source_data[0*width + 1] = 2.3; | |
source_data[1*width + 0] = 3.3; | |
source_data[2*width + 0] = 4.3; | |
int* twochannel_data; | |
cudaCheckErrors(cudaMallocManaged(&twochannel_data, 2*width*height*sizeof(int))); | |
split_into_channels<<<1, 1>>>(twochannel_data, source_data, width, height); | |
cudaCheckErrors(cudaDeviceSynchronize()); | |
struct cudaResourceDesc resourceDesc; | |
memset(&resourceDesc, 0, sizeof(resourceDesc)); | |
resourceDesc.resType = cudaResourceTypeLinear; | |
resourceDesc.res.linear.devPtr = twochannel_data; | |
resourceDesc.res.linear.desc.f = cudaChannelFormatKindSigned; | |
resourceDesc.res.linear.desc.x = 32; | |
resourceDesc.res.linear.desc.y = 32; | |
resourceDesc.res.linear.sizeInBytes = 2*width*height*sizeof(int); | |
struct cudaTextureDesc textureDesc; | |
memset(&textureDesc, 0, sizeof(textureDesc)); | |
textureDesc.addressMode[0] = cudaAddressModeClamp; | |
textureDesc.addressMode[1] = cudaAddressModeClamp; | |
textureDesc.filterMode = cudaFilterModePoint; | |
textureDesc.readMode = cudaReadModeElementType; | |
textureDesc.normalizedCoords = false; | |
cudaTextureObject_t texture = 0; | |
cudaCheckErrors(cudaCreateTextureObject(&texture, &resourceDesc, &textureDesc, nullptr)); | |
print<<<1, 1>>>(texture); | |
cudaCheckErrors(cudaDeviceSynchronize()); | |
cudaCheckErrors(cudaDestroyTextureObject(texture)); | |
cudaCheckErrors(cudaFree(twochannel_data)); | |
cudaCheckErrors(cudaFree(source_data)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment