Skip to content

Instantly share code, notes, and snippets.

@robertmaynard
Created February 6, 2017 19:18
Show Gist options
  • Save robertmaynard/1be7984a6eed31b7f59000a64411f029 to your computer and use it in GitHub Desktop.
Save robertmaynard/1be7984a6eed31b7f59000a64411f029 to your computer and use it in GitHub Desktop.
separablecompilation
#ifdef _WIN32
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
EXPORT int dynamic_base_func(int x)
{
return x * x;
}
#!/bin/bash
mkdir -p temp
mkdir -p bin
/usr/bin/clang++ -DBase_EXPORTS -fPIC -std=gnu++11 -o temp/base.cpp.o -c base.cpp
/usr/bin/clang++ -fPIC -shared -Wl,-soname,bin/libBase.so -o bin/libBase.so temp/base.cpp.o
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -dc sep.cu -o temp/sep.cu.o
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -dc sep_kernel.cu -o temp/sep_kernel.cu.o
/usr/bin/ar qc bin/libCudaSep.a temp/sep.cu.o temp/sep_kernel.cu.o
/usr/bin/ranlib bin/libCudaSep.a
/usr/local/cuda/bin/nvcc -DDerived_EXPORTS -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -c derived.cu -o temp/derived.cu.o
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -shared -dlink temp/derived.cu.o -o temp/derived_device_link.o
g++ -fPIC -shared -Wl,-soname,libDerived.so -o bin/libDerived.so temp/derived.cu.o temp/derived_device_link.o -Wl,-rpath,bin bin/libBase.so -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl
/usr/local/cuda/bin/nvcc -DCombine_EXPORTS -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -c combine.cu -o temp/combine.cu.o
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -shared -dlink temp/combine.cu.o -o temp/combine_device_link.o bin/libCudaSep.a
g++ -fPIC -shared -Wl,-soname,libCombine.so -o bin/libCombine.so temp/combine.cu.o temp/combine_device_link.o -Wl,-rpath,bin bin/libDerived.so bin/libCudaSep.a bin/libBase.so -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl
/usr/bin/clang++ -std=gnu++11 -o temp/main.cpp.o -c main.cpp
/usr/bin/clang++ temp/main.cpp.o -o bin/Bug -Wl,-rpath,bin bin/libCombine.so bin/libDerived.so bin/libBase.so
cmake_minimum_required(VERSION 3.7)
project(SepBug CXX CUDA)
#Goal for this example:
# show off separable compilation
set(CMAKE_CUDA_FLAGS "-gencode arch=compute_50,code=compute_50")
set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
add_library(Base SHARED base.cpp)
add_library(Derived SHARED derived.cu)
add_library(CudaSep STATIC sep.cu sep_kernel.cu)
add_library(Combine SHARED combine.cu)
add_executable(Bug main.cpp)
target_link_libraries(Derived PUBLIC Base)
target_link_libraries(Combine
PUBLIC Derived
PRIVATE CudaSep)
target_link_libraries(Bug PUBLIC Combine)
#Setup the flags to enable separable compilation
set_target_properties(CudaSep
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(CudaSep
PROPERTIES POSITION_INDEPENDENT_CODE ON)
#include <iostream>
#ifdef _WIN32
#define EXPORT __declspec(dllexport)
#define IMPORT __declspec(dllimport)
#else
#define EXPORT
#define IMPORT
#endif
IMPORT int sep_launch_kernel(int);
EXPORT int call_cuda_seperable_code(int x)
{
return sep_launch_kernel(x);
}
IMPORT int cuda_dynamic_host_func(int);
EXPORT int call_host_function(int x)
{
return cuda_dynamic_host_func(x);
}
#include <string>
#include <cuda.h>
#include <iostream>
#ifdef _WIN32
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
int dynamic_base_func(int);
EXPORT int __host__ cuda_dynamic_host_func(int x)
{
int driverVersion = 0, runtimeVersion = 0;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
std::cout << "cudaRuntimeGetVersion(): " << runtimeVersion << std::endl;
std::cout << "cudaDriverGetVersion(): " << driverVersion << std::endl;
return dynamic_base_func(x);
}
#include <iostream>
#ifdef _WIN32
#define IMPORT __declspec(dllimport)
#else
#define IMPORT
#endif
IMPORT int call_cuda_seperable_code(int x);
IMPORT int call_host_function(int x);
int main(int argc, char** argv)
{
std::cout << "about to call separable code" << std::endl;
call_cuda_seperable_code(42);
std::cout << "about to call host fuction" << std::endl;
call_host_function(42);
return 0;
}
#include <iostream>
int __device__ kernel_func(int);
static
__global__
void sep_kernel(int x)
{
kernel_func(x);
}
int sep_launch_kernel(int x)
{
std::cout << "sep_launch_kernel" << std::endl;
sep_kernel <<<1,1>>> (x);
cudaError_t err = cudaGetLastError();
if(err != cudaSuccess)
{
std::cerr << __FILE__ << " " << cudaGetErrorString(err) << std::endl;
return 1;
}
err = cudaDeviceSynchronize();
if(err != cudaSuccess)
{
std::cerr << __FILE__ << " " << cudaGetErrorString(cudaGetLastError()) << std::endl;
return 1;
}
return 0;
}
int __device__ kernel_func(int input)
{
return input * input;
}
#!/bin/bash
mkdir -p temp
mkdir -p bin
/usr/bin/clang++ -DBase_EXPORTS -fPIC -std=gnu++11 -o temp/base.cpp.o -c base.cpp
/usr/bin/clang++ -fPIC -shared -Wl,-soname,bin/libBase.so -o bin/libBase.so temp/base.cpp.o
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -dc sep.cu -o temp/sep.cu.o
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -dc sep_kernel.cu -o temp/sep_kernel.cu.o
/usr/bin/ar qc bin/libCudaSep.a temp/sep.cu.o temp/sep_kernel.cu.o
/usr/bin/ranlib bin/libCudaSep.a
/usr/local/cuda/bin/nvcc -DDerived_EXPORTS -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -c derived.cu -o temp/derived.cu.o
/usr/local/cuda/bin/nvcc -DCombine_EXPORTS -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -c combine.cu -o temp/combine.cu.o
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -shared -dlink temp/derived.cu.o temp/combine.cu.o -o temp/combine_device_link.o bin/libCudaSep.a
g++ -fPIC -shared -Wl,-soname,libCombine.so -o bin/libCombine.so temp/derived.cu.o temp/combine.cu.o temp/combine_device_link.o -Wl,-rpath,bin bin/libCudaSep.a bin/libBase.so -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl
/usr/bin/clang++ -std=gnu++11 -o temp/main.cpp.o -c main.cpp
/usr/bin/clang++ temp/main.cpp.o -o bin/Bug -Wl,-rpath,bin bin/libCombine.so bin/libBase.so
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment