Skip to content

Instantly share code, notes, and snippets.

@hertzsprung
Last active February 26, 2021 15:30
Show Gist options
  • Save hertzsprung/589f6636401d46c9d74a544c23731520 to your computer and use it in GitHub Desktop.
Save hertzsprung/589f6636401d46c9d74a544c23731520 to your computer and use it in GitHub Desktop.
CUDA 2D texture object with double values split into hi/lo int channels
// inspired by https://devtalk.nvidia.com/default/topic/419190/texture-fetching-for-double-precision-floats/
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cstdio>
#define cudaCheckErrors(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line)
{
if (code != cudaSuccess)
{
fprintf(stderr, "%s %s %d\n", cudaGetErrorString(code), file, line);
exit(code);
}
}
__global__ void split_into_channels(int* dest, double* src, int width, int height)
{
for (int j=0; j<height; j++)
{
for (int i=0; i<width; i++)
{
double value = src[j*width + i];
dest[j*width*2 + 2*i] = __double2hiint(value);
dest[j*width*2 + (2*i)+1] = __double2loint(value);
}
}
}
__global__ void print(cudaTextureObject_t t)
{
for (int j=0; j<8; j++)
{
for (int i=0; i<8; i++)
{
int2 v = tex1Dfetch<int2>(t, j*8 + i);
double value = __hiloint2double(v.x, v.y);
printf("GPU: %i %i %lf\n", i, j, value);
}
}
}
int main()
{
const int width = 8;
const int height = 8;
double* source_data;
cudaCheckErrors(cudaMallocManaged(&source_data, width*height*sizeof(double)));;
source_data[0*width + 0] = 1.3;
source_data[0*width + 1] = 2.3;
source_data[1*width + 0] = 3.3;
source_data[2*width + 0] = 4.3;
int* twochannel_data;
cudaCheckErrors(cudaMallocManaged(&twochannel_data, 2*width*height*sizeof(int)));
split_into_channels<<<1, 1>>>(twochannel_data, source_data, width, height);
cudaCheckErrors(cudaDeviceSynchronize());
struct cudaResourceDesc resourceDesc;
memset(&resourceDesc, 0, sizeof(resourceDesc));
resourceDesc.resType = cudaResourceTypeLinear;
resourceDesc.res.linear.devPtr = twochannel_data;
resourceDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
resourceDesc.res.linear.desc.x = 32;
resourceDesc.res.linear.desc.y = 32;
resourceDesc.res.linear.sizeInBytes = 2*width*height*sizeof(int);
struct cudaTextureDesc textureDesc;
memset(&textureDesc, 0, sizeof(textureDesc));
textureDesc.addressMode[0] = cudaAddressModeClamp;
textureDesc.addressMode[1] = cudaAddressModeClamp;
textureDesc.filterMode = cudaFilterModePoint;
textureDesc.readMode = cudaReadModeElementType;
textureDesc.normalizedCoords = false;
cudaTextureObject_t texture = 0;
cudaCheckErrors(cudaCreateTextureObject(&texture, &resourceDesc, &textureDesc, nullptr));
print<<<1, 1>>>(texture);
cudaCheckErrors(cudaDeviceSynchronize());
cudaCheckErrors(cudaDestroyTextureObject(texture));
cudaCheckErrors(cudaFree(twochannel_data));
cudaCheckErrors(cudaFree(source_data));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment