Skip to content

Instantly share code, notes, and snippets.

@simon-mo
Created January 30, 2019 09:07
Show Gist options
  • Select an option

  • Save simon-mo/7f26dbd2dce608eaa2d1faecda4f31b0 to your computer and use it in GitHub Desktop.

Select an option

Save simon-mo/7f26dbd2dce608eaa2d1faecda4f31b0 to your computer and use it in GitHub Desktop.
Dynamic Kernel Loading using CUDA Driver API

Dynamic Kernel Loading using CUDA Driver API

This demo shows how to load a function compiles to fatbin format using CUDA Driver API.

This is nice because the fatbin packages all compiled code for all device version during compile time (given some compiler flags), so we don't need to use the JIT linker/compiler.

How to run

make
./hello # test regular kernel call
./dyload # test dynamically loaded function call
#include <stdio.h>
#define FUNC_NAME hello
__global__ void FUNC_NAME(void){
printf("hello\n");
}
int main(void){
FUNC_NAME<<<2,5>>>();
cudaDeviceReset();
}
#include <cuda.h>
#include <fstream>
#include <stdio.h>
#include <iostream>
#define FATBIN_FILE "hello.fatbin"
#define FUNC_NAME "_Z5hellov"
// inferred from ptx assembly, prefix _Z, then len(func_name), then postfix v
#define CHECK(expression) \
{ \
CUresult status = (expression); \
if (status != CUDA_SUCCESS) { \
const char* err_str; \
cuGetErrorString(status, &err_str); \
std::cerr << "Error on line " << __LINE__ << ": " \
<< err_str << std::endl; \
std::exit(EXIT_FAILURE); \
} \
}
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
CUfunction cuFunc;
CUstream cuStream;
CUresult cuResult;
void* getFile() {
std::ifstream file(FATBIN_FILE, std::ios::binary | std::ios::ate);
std::streamsize size = file.tellg();
file.seekg(0, std::ios::beg);
void* buffer = malloc(size);
file.read((char*) buffer, size);
return buffer;
}
int main(void) {
CHECK(cuInit(0));
CHECK(cuDeviceGet(&cuDevice, 0));
CHECK(cuCtxCreate(&cuContext, 0, cuDevice));
CHECK(cuModuleLoadFatBinary(&cuModule, getFile()));
CHECK(cuModuleGetFunction(&cuFunc, cuModule, FUNC_NAME));
CHECK(cuStreamCreate(&cuStream, CU_STREAM_DEFAULT));
CHECK(cuLaunchKernel(
cuFunc,
2,1,1, //grid
5,1,1, //block
0, //sharedMem
cuStream,
NULL, //void** kernelParams,
NULL //void** extra
));
CHECK(cuStreamSynchronize(cuStream));
}
all: hello hello.fatbin hello.ptx dyload
hello: kernel.cu
nvcc -std=c++11 -arch sm_60 kernel.cu -o hello -lcuda
hello.fatbin: kernel.cu
nvcc -std=c++11 -arch sm_60 kernel.cu -o hello.fatbin --fatbin -lcuda
hello.ptx: kernel.cu
nvcc -std=c++11 -arch sm_60 kernel.cu -o hello.ptx -ptx
dyload: hello.fatbin load.cu
nvcc -std=c++11 -arch sm_60 load.cu -o dyload -lcuda
clean:
rm -f dyload hello hello.*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment