Created
February 6, 2017 19:18
-
-
Save robertmaynard/1be7984a6eed31b7f59000a64411f029 to your computer and use it in GitHub Desktop.
separablecompilation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifdef _WIN32 | |
#define EXPORT __declspec(dllexport) | |
#else | |
#define EXPORT | |
#endif | |
EXPORT int dynamic_base_func(int x) | |
{ | |
return x * x; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
mkdir -p temp | |
mkdir -p bin | |
/usr/bin/clang++ -DBase_EXPORTS -fPIC -std=gnu++11 -o temp/base.cpp.o -c base.cpp | |
/usr/bin/clang++ -fPIC -shared -Wl,-soname,bin/libBase.so -o bin/libBase.so temp/base.cpp.o | |
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -dc sep.cu -o temp/sep.cu.o | |
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -dc sep_kernel.cu -o temp/sep_kernel.cu.o | |
/usr/bin/ar qc bin/libCudaSep.a temp/sep.cu.o temp/sep_kernel.cu.o | |
/usr/bin/ranlib bin/libCudaSep.a | |
/usr/local/cuda/bin/nvcc -DDerived_EXPORTS -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -c derived.cu -o temp/derived.cu.o | |
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -shared -dlink temp/derived.cu.o -o temp/derived_device_link.o | |
g++ -fPIC -shared -Wl,-soname,libDerived.so -o bin/libDerived.so temp/derived.cu.o temp/derived_device_link.o -Wl,-rpath,bin bin/libBase.so -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl | |
/usr/local/cuda/bin/nvcc -DCombine_EXPORTS -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -c combine.cu -o temp/combine.cu.o | |
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -shared -dlink temp/combine.cu.o -o temp/combine_device_link.o bin/libCudaSep.a | |
g++ -fPIC -shared -Wl,-soname,libCombine.so -o bin/libCombine.so temp/combine.cu.o temp/combine_device_link.o -Wl,-rpath,bin bin/libDerived.so bin/libCudaSep.a bin/libBase.so -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl | |
/usr/bin/clang++ -std=gnu++11 -o temp/main.cpp.o -c main.cpp | |
/usr/bin/clang++ temp/main.cpp.o -o bin/Bug -Wl,-rpath,bin bin/libCombine.so bin/libDerived.so bin/libBase.so |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmake_minimum_required(VERSION 3.7) | |
project(SepBug CXX CUDA) | |
#Goal for this example: | |
# show off separable compilation | |
set(CMAKE_CUDA_FLAGS "-gencode arch=compute_50,code=compute_50") | |
set(CMAKE_CUDA_STANDARD 11) | |
set(CMAKE_CXX_STANDARD 11) | |
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) | |
set(CMAKE_CXX_STANDARD_REQUIRED TRUE) | |
add_library(Base SHARED base.cpp) | |
add_library(Derived SHARED derived.cu) | |
add_library(CudaSep STATIC sep.cu sep_kernel.cu) | |
add_library(Combine SHARED combine.cu) | |
add_executable(Bug main.cpp) | |
target_link_libraries(Derived PUBLIC Base) | |
target_link_libraries(Combine | |
PUBLIC Derived | |
PRIVATE CudaSep) | |
target_link_libraries(Bug PUBLIC Combine) | |
#Setup the flags to enable separable compilation | |
set_target_properties(CudaSep | |
PROPERTIES CUDA_SEPARABLE_COMPILATION ON) | |
set_target_properties(CudaSep | |
PROPERTIES POSITION_INDEPENDENT_CODE ON) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#ifdef _WIN32 | |
#define EXPORT __declspec(dllexport) | |
#define IMPORT __declspec(dllimport) | |
#else | |
#define EXPORT | |
#define IMPORT | |
#endif | |
IMPORT int sep_launch_kernel(int); | |
EXPORT int call_cuda_seperable_code(int x) | |
{ | |
return sep_launch_kernel(x); | |
} | |
IMPORT int cuda_dynamic_host_func(int); | |
EXPORT int call_host_function(int x) | |
{ | |
return cuda_dynamic_host_func(x); | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <cuda.h> | |
#include <iostream> | |
#ifdef _WIN32 | |
#define EXPORT __declspec(dllexport) | |
#else | |
#define EXPORT | |
#endif | |
int dynamic_base_func(int); | |
EXPORT int __host__ cuda_dynamic_host_func(int x) | |
{ | |
int driverVersion = 0, runtimeVersion = 0; | |
cudaDriverGetVersion(&driverVersion); | |
cudaRuntimeGetVersion(&runtimeVersion); | |
std::cout << "cudaRuntimeGetVersion(): " << runtimeVersion << std::endl; | |
std::cout << "cudaDriverGetVersion(): " << driverVersion << std::endl; | |
return dynamic_base_func(x); | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#ifdef _WIN32 | |
#define IMPORT __declspec(dllimport) | |
#else | |
#define IMPORT | |
#endif | |
IMPORT int call_cuda_seperable_code(int x); | |
IMPORT int call_host_function(int x); | |
int main(int argc, char** argv) | |
{ | |
std::cout << "about to call separable code" << std::endl; | |
call_cuda_seperable_code(42); | |
std::cout << "about to call host fuction" << std::endl; | |
call_host_function(42); | |
return 0; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
int __device__ kernel_func(int); | |
static | |
__global__ | |
void sep_kernel(int x) | |
{ | |
kernel_func(x); | |
} | |
int sep_launch_kernel(int x) | |
{ | |
std::cout << "sep_launch_kernel" << std::endl; | |
sep_kernel <<<1,1>>> (x); | |
cudaError_t err = cudaGetLastError(); | |
if(err != cudaSuccess) | |
{ | |
std::cerr << __FILE__ << " " << cudaGetErrorString(err) << std::endl; | |
return 1; | |
} | |
err = cudaDeviceSynchronize(); | |
if(err != cudaSuccess) | |
{ | |
std::cerr << __FILE__ << " " << cudaGetErrorString(cudaGetLastError()) << std::endl; | |
return 1; | |
} | |
return 0; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int __device__ kernel_func(int input) | |
{ | |
return input * input; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
mkdir -p temp | |
mkdir -p bin | |
/usr/bin/clang++ -DBase_EXPORTS -fPIC -std=gnu++11 -o temp/base.cpp.o -c base.cpp | |
/usr/bin/clang++ -fPIC -shared -Wl,-soname,bin/libBase.so -o bin/libBase.so temp/base.cpp.o | |
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -dc sep.cu -o temp/sep.cu.o | |
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -dc sep_kernel.cu -o temp/sep_kernel.cu.o | |
/usr/bin/ar qc bin/libCudaSep.a temp/sep.cu.o temp/sep_kernel.cu.o | |
/usr/bin/ranlib bin/libCudaSep.a | |
/usr/local/cuda/bin/nvcc -DDerived_EXPORTS -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -c derived.cu -o temp/derived.cu.o | |
/usr/local/cuda/bin/nvcc -DCombine_EXPORTS -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -std=c++11 -x cu -c combine.cu -o temp/combine.cu.o | |
/usr/local/cuda/bin/nvcc -gencode arch=compute_50,code=compute_50 -Xcompiler=-fPIC -shared -dlink temp/derived.cu.o temp/combine.cu.o -o temp/combine_device_link.o bin/libCudaSep.a | |
g++ -fPIC -shared -Wl,-soname,libCombine.so -o bin/libCombine.so temp/derived.cu.o temp/combine.cu.o temp/combine_device_link.o -Wl,-rpath,bin bin/libCudaSep.a bin/libBase.so -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl | |
/usr/bin/clang++ -std=gnu++11 -o temp/main.cpp.o -c main.cpp | |
/usr/bin/clang++ temp/main.cpp.o -o bin/Bug -Wl,-rpath,bin bin/libCombine.so bin/libBase.so |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment