Created
February 15, 2021 19:41
-
-
Save rrika/f5f89c2a8c9932c2630ef1c73bac08de to your computer and use it in GitHub Desktop.
HIP without HIP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// clang++ | |
// -x hip | |
// test.cpp | |
// -O3 | |
// --cuda-gpu-arch=gfx1010 | |
// --hip-device-lib=dummy.bc | |
// --hip-device-lib-path=path_to_dummy_bc | |
// -nogpuinc | |
// -fuse-ld=lld | |
// -fgpu-rdc | |
// -o test | |
#include <cstdio> | |
#include <vector> | |
#define __device__ __attribute__((device)) | |
#define __global__ __attribute__((global)) | |
#define __host__ __attribute__((host)) | |
struct dim3 { | |
unsigned x, y, z; | |
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {} | |
}; | |
typedef struct hipStream *hipStream_t; | |
typedef enum hipError { hipSuccess = 0 } hipError_t; | |
typedef struct ihipModule_t* hipModule_t; | |
extern "C" hipError_t __hipPushCallConfiguration( | |
dim3 gridSize, | |
dim3 blockSize, | |
size_t sharedSize = 0, | |
hipStream_t stream = 0) | |
{ | |
return hipSuccess; // important, else stuff gets dead-code eliminated | |
} | |
extern "C" hipError_t __hipPopCallConfiguration( | |
dim3 *gridDim, | |
dim3 *blockDim, | |
size_t *sharedMem, | |
hipStream_t *stream) | |
{ | |
return hipSuccess; | |
} | |
extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data) { | |
struct ClangOffloadBlob { | |
char magic[24]; | |
unsigned long long num_entries; | |
}; | |
struct BlobEntry { | |
unsigned long long offset, size, label_size; | |
char label[]; | |
}; | |
struct W { | |
int dummy1, dummy2; | |
ClangOffloadBlob *blob; | |
} *wrapper = (W*)data; | |
printf("\nregistering clang offload bundle: %s\n", (char*)wrapper->blob); | |
auto num_entries = wrapper->blob->num_entries; | |
auto entry = (BlobEntry*)&wrapper->blob[1]; | |
for (int i=0; i<num_entries; i++) { | |
auto ls = entry->label_size; | |
printf(" entry %d: %.*s\n", i, (int)ls, entry->label); | |
// entry->offset leads you do an embedded ELF file where you can find your actual GPU code | |
entry++; | |
entry = (BlobEntry*)(ls+(char*)entry); | |
} | |
puts(""); | |
return (std::vector<hipModule_t>*) 0; // aaa | |
} | |
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules) {} | |
extern "C" void __hipRegisterFunction( | |
std::vector<hipModule_t>* modules, | |
const void* hostFunction, | |
char* deviceFunction, | |
const char* deviceName, | |
unsigned int threadLimit, | |
unsigned* tid, | |
unsigned* bid, | |
dim3* blockDim, | |
dim3* gridDim, | |
int* wSize) | |
{ | |
void *aaa = (void*)modules; // see above | |
printf("__hipRegisterFunction hostFunction=%p deviceFunction=%s deviceName=%s\n", | |
hostFunction, deviceFunction, deviceName); | |
} | |
extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim, | |
dim3 blockDim, void **args, | |
size_t sharedMem, | |
hipStream_t stream) | |
{ | |
printf("launch funcptr=%p\n", func); | |
return hipSuccess; // important, else endless loop | |
} | |
__global__ void offload_me(int *z) { | |
z[0] = -z[0]; | |
} | |
int main(int argc, char **argv) { | |
int q = 99; | |
offload_me<<<1, 1>>>(&q); | |
puts(""); | |
return q; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment