Created
June 21, 2022 19:10
-
-
Save Artem-B/fc7a40ba69b37e09a8abacd927caa71b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ABI compatibility shims for CUDA-11.7. | |
// Patch affected libraries with: | |
// objcopy \ | |
// --redefine-sym cudaCreateTextureObject=cudaCreateTextureObject_v115 \ | |
// --redefine-sym cudaGetTextureObjectTextureDesc=cudaGetTextureObjectTextureDesc_v115 \ | |
// --redefine-sym cublasGetVersion_v2=cublasGetVersion_v2_v115 \ | |
// --redefine-sym cublasLtGetVersion=cublasLtGetVersion_v115 \ | |
// libnvinfer_static.a libcudnn_static.a | |
// | |
#include <string.h> | |
#include <algorithm> | |
#include "cublas.h" | |
#include "cublasLt.h" | |
#include "cuda.h" | |
#include "cuda_runtime_api.h" | |
// Tensorrt v7/v8 binaries we have now were built with the older version of | |
// cuBLAS which encoded its own version differently. Until we update TensorRT we | |
// must convert the new encoding scheme into something TRT can live with. | |
// TensorRT static libraries are patched to call this `cublasGetVersion_v115` | |
// shim instead of the normal cublasGetVersion_v2. The shim converts the version | |
// from `MMmmpp` encoding into `MMmpp` expected by TensorRT. | |
namespace { | |
size_t ConvertVersionEncoding(size_t real_version) { | |
size_t patch = real_version % 100; | |
// Old encoding only had one digit available for the minor version, so it | |
// can't get higher than 9. | |
size_t minor = std::min<size_t>(9, (real_version / 100) % 100); | |
size_t major = real_version / 10000; | |
return major * 1000 + minor * 100 + patch; | |
} | |
} // namespace | |
extern "C" cublasStatus_t cublasGetVersion_v2_v115(cublasHandle_t handle, | |
int *version) { | |
#if CUDA_VERSION >= 11070 | |
int real_version; | |
cublasStatus_t result = cublasGetVersion_v2(handle, &real_version); | |
*version = ConvertVersionEncoding(real_version); | |
return result; | |
#else | |
// Just pass-through the call to cuBLAS. | |
return cublasGetVersion_v2(handle, version); | |
#endif | |
} | |
extern "C" size_t cublasLtGetVersion_v115(void) { | |
#if CUDA_VERSION >= 11070 | |
return ConvertVersionEncoding(cublasLtGetVersion()); | |
#else | |
return cublasLtGetVersion(); | |
#endif | |
} | |
// cudaTextureDesc as it was in CUDA-11.5 | |
struct __device_builtin__ cudaTextureDesc_v115 { | |
/** | |
* Texture address mode for up to 3 dimensions | |
*/ | |
enum cudaTextureAddressMode addressMode[3]; | |
/** | |
* Texture filter mode | |
*/ | |
enum cudaTextureFilterMode filterMode; | |
/** | |
* Texture read mode | |
*/ | |
enum cudaTextureReadMode readMode; | |
/** | |
* Perform sRGB->linear conversion during texture read | |
*/ | |
int sRGB; | |
/** | |
* Texture Border Color | |
*/ | |
float borderColor[4]; | |
/** | |
* Indicates whether texture reads are normalized or not | |
*/ | |
int normalizedCoords; | |
/** | |
* Limit to the anisotropy ratio | |
*/ | |
unsigned int maxAnisotropy; | |
/** | |
* Mipmap filter mode | |
*/ | |
enum cudaTextureFilterMode mipmapFilterMode; | |
/** | |
* Offset applied to the supplied mipmap level | |
*/ | |
float mipmapLevelBias; | |
/** | |
* Lower end of the mipmap level range to clamp access to | |
*/ | |
float minMipmapLevelClamp; | |
/** | |
* Upper end of the mipmap level range to clamp access to | |
*/ | |
float maxMipmapLevelClamp; | |
/** | |
* Disable any trilinear filtering optimizations. | |
*/ | |
int disableTrilinearOptimization; | |
}; | |
extern "C" cudaError_t cudaCreateTextureObject_v115( | |
cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, | |
const struct cudaTextureDesc_v115 *pTexDesc, | |
const struct cudaResourceViewDesc *pResViewDesc) { | |
#if CUDA_VERSION >= 11070 | |
// Zero-init the full structure and then fill it in with the old-format data, | |
// leaving new fields with the default value of 0. | |
cudaTextureDesc texDesc = {}; | |
memcpy((void *)&texDesc, (void *)pTexDesc, sizeof(cudaTextureDesc_v115)); | |
return cudaCreateTextureObject(pTexObject, pResDesc, &texDesc, pResViewDesc); | |
#else | |
return cudaCreateTextureObject(pTexObject, pResDesc, | |
(cudaTextureDesc *)pTexDesc, pResViewDesc); | |
#endif | |
} | |
extern "C" cudaError_t cudaGetTextureObjectTextureDesc_v115( | |
struct cudaTextureDesc_v115 *pTexDesc, cudaTextureObject_t texObject) { | |
#if CUDA_VERSION >= 11070 | |
cudaTextureDesc texDesc; | |
// Provide the function with the correctly-sized buffer, and then copy the | |
// fields the old version knew about. | |
cudaError_t result = cudaGetTextureObjectTextureDesc(&texDesc, texObject); | |
memcpy((void *)pTexDesc, (void *)&texDesc, sizeof(cudaTextureDesc_v115)); | |
return result; | |
#else | |
return cudaGetTextureObjectTextureDesc((cudaTextureDesc *)pTexDesc, | |
texObject); | |
#endif | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment