akors · December 14, 2017 15:02
diff --git a/build_cudart-shared.sh b/build_cudart-shared.sh
 #!/bin/sh

 # kernel.a
 nvcc    -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC  -x cu -dc kernel.cu -o kernel.cu.o
 ar qc libkernel.a kernel.cu.o
 ranlib libkernel.a

 # kernel2.a
 nvcc    -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC  -x cu -dc kernel2.cu -o kernel2.cu.o
 ar qc libkernel2.a kernel2.cu.o
 ranlib libkernel2.a

 # allkernels.so
 nvcc    -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC  -x cu -dc empty.cu -o empty.cu.o
 nvcc   --cudart shared -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink empty.cu.o -o allkernels_device_link.o libkernel.a libkernel2.a
 g++ -fPIC  -shared -Wl,-soname,liballkernels.so -o liballkernels.so empty.cu.o allkernels_device_link.o libkernel.a libkernel2.a  -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart

 # rdctest
 g++    -fPIE   -o main.cpp.o -c main.cpp
 nvcc  --cudart shared -gencode arch=compute_61,code=compute_61  -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink main.cpp.o -o main_device_link.o  -L/usr/local/cuda/lib64/stubs  -L/usr/local/cuda/lib64 libkernel.a libkernel2.a
 g++ main.cpp.o main_device_link.o  -o rdctest  -L/usr/local/cuda/lib64/stubs  -L/usr/local/cuda/lib64 -Wl,-rpath,. liballkernels.so libkernel.a libkernel2.a -lcudadevrt -lcudart
diff --git a/build_cudart-static.sh b/build_cudart-static.sh
 #!/bin/sh

 # kernel.a
 nvcc    -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC  -x cu -dc kernel.cu -o kernel.cu.o
 ar qc libkernel.a kernel.cu.o
 ranlib libkernel.a

 # kernel2.a
 nvcc    -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC  -x cu -dc kernel2.cu -o kernel2.cu.o
 ar qc libkernel2.a kernel2.cu.o
 ranlib libkernel2.a

 # allkernels.so
 nvcc    -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC  -x cu -dc empty.cu -o empty.cu.o
 nvcc   --cudart shared -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink empty.cu.o -o allkernels_device_link.o libkernel.a libkernel2.a
 g++ -fPIC  -shared -Wl,-soname,liballkernels.so -o liballkernels.so empty.cu.o allkernels_device_link.o libkernel.a libkernel2.a -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl

 # rdctest
 g++    -fPIE   -o main.cpp.o -c main.cpp
 nvcc  --cudart shared -gencode arch=compute_61,code=compute_61  -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink main.cpp.o -o main_device_link.o  -L/usr/local/cuda/lib64/stubs  -L/usr/local/cuda/lib64 libkernel.a libkernel2.a
 g++ main.cpp.o main_device_link.o  -o rdctest  -L/usr/local/cuda/lib64/stubs  -L/usr/local/cuda/lib64 -Wl,-rpath,. liballkernels.so libkernel.a libkernel2.a -lcudadevrt -lcudart_static -lrt -lpthread -ldl
diff --git a/CMakeLists.txt b/CMakeLists.txt

 cmake_minimum_required(VERSION 3.7)
 project (CudaSharedThrust CXX CUDA)

 string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_61,code=compute_61")

 if(BUILD_SHARED_LIBS)
  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 endif()

 add_library(kernel STATIC kernel.cu)
 set_target_properties(kernel PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

 add_library(kernel2 STATIC kernel2.cu)
 set_target_properties(kernel2  PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

 add_library(allkernels empty.cu) # empty.cu is an empty file
 set_target_properties(allkernels PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 target_link_libraries(allkernels kernel kernel2)


 add_executable(rdctest main.cpp)
 set_target_properties(rdctest PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 target_link_libraries(rdctest allkernels)
diff --git a/empty.cu b/empty.cu
 //
diff --git a/kernel.cu b/kernel.cu
 #include "kernel.cuh"
 #include <stdio.h>
 #include <iostream>
 #include <thrust/device_vector.h>

 __global__
 void thekernel(int *data){
  if (threadIdx.x == 0)
    printf("the kernel says hello\n");

  data[threadIdx.x] = threadIdx.x * 2;
 }

 void Kernel::callKernel(){

  thrust::device_vector<int> D2;
  D2.resize(11);
  int * raw_ptr = thrust::raw_pointer_cast(&D2[0]);

  printf("Kernel::callKernel called\n");
  thekernel <<< 1, 10 >>> (raw_ptr);

  cudaThreadSynchronize();
  cudaError_t code = cudaGetLastError();
  if (code != cudaSuccess) {
    std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel!" << std::endl;
  }

  for (int i = 0; i < D2.size(); i++)
  std::cout << "Kernel D[" << i << "]=" << D2[i] << std::endl;
 }

diff --git a/kernel.cuh b/kernel.cuh
 #ifndef __KERNEL_CUH__
 #define __KERNEL_CUH__
  class Kernel{
  public:
    void callKernel();
  };
 #endif
diff --git a/kernel2.cu b/kernel2.cu
 #include "kernel2.cuh"
 #include <stdio.h>
 #include <iostream>
 #include <thrust/device_vector.h>

 __global__
 void thekernel2(int *data2){
  if (threadIdx.x == 0)
    printf("the kernel2 says hello\n");

  data2[threadIdx.x] = threadIdx.x * 2;
 }

 void Kernel2::callKernel2(){
  thrust::device_vector<int> D;
  D.resize(11);
  int * raw_ptr = thrust::raw_pointer_cast(&D[0]);
  printf("Kernel2::callKernel2 called\n");
  thekernel2 <<< 1, 10 >>> (raw_ptr);

  cudaThreadSynchronize();
  cudaError_t code = cudaGetLastError();
  if (code != cudaSuccess) {
    std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel2!" << std::endl;
 }

  for (int i = 0; i < D.size(); i++)
  std::cout << "Kernel2 D[" << i << "]=" << D[i] << std::endl;
 }
diff --git a/kernel2.cuh b/kernel2.cuh
 #ifndef __KERNEL2_CUH__
 #define __KERNEL2_CUH__
  class Kernel2{
  public:
    void callKernel2();
  };
 #endif
diff --git a/main.cpp b/main.cpp
 #include "kernel.cuh"
 #include "kernel2.cuh"

 int main(){
  Kernel k;
  k.callKernel();

  Kernel2 k2;
  k2.callKernel2();
 }
	#!/bin/sh

	# kernel.a
	nvcc -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC -x cu -dc kernel.cu -o kernel.cu.o
	ar qc libkernel.a kernel.cu.o
	ranlib libkernel.a

	# kernel2.a
	nvcc -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC -x cu -dc kernel2.cu -o kernel2.cu.o
	ar qc libkernel2.a kernel2.cu.o
	ranlib libkernel2.a

	# allkernels.so
	nvcc -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC -x cu -dc empty.cu -o empty.cu.o
	nvcc --cudart shared -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink empty.cu.o -o allkernels_device_link.o libkernel.a libkernel2.a
	g++ -fPIC -shared -Wl,-soname,liballkernels.so -o liballkernels.so empty.cu.o allkernels_device_link.o libkernel.a libkernel2.a -L"/usr/local/cuda/lib64/stubs" -L"/usr/local/cuda/lib64" -lcudadevrt -lcudart

	# rdctest
	g++ -fPIE -o main.cpp.o -c main.cpp
	nvcc --cudart shared -gencode arch=compute_61,code=compute_61 -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink main.cpp.o -o main_device_link.o -L/usr/local/cuda/lib64/stubs -L/usr/local/cuda/lib64 libkernel.a libkernel2.a
	g++ main.cpp.o main_device_link.o -o rdctest -L/usr/local/cuda/lib64/stubs -L/usr/local/cuda/lib64 -Wl,-rpath,. liballkernels.so libkernel.a libkernel2.a -lcudadevrt -lcudart

	cmake_minimum_required(VERSION 3.7)
	project (CudaSharedThrust CXX CUDA)

	string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_61,code=compute_61")

	if(BUILD_SHARED_LIBS)
	set(CMAKE_POSITION_INDEPENDENT_CODE ON)
	endif()

	add_library(kernel STATIC kernel.cu)
	set_target_properties(kernel PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

	add_library(kernel2 STATIC kernel2.cu)
	set_target_properties(kernel2 PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

	add_library(allkernels empty.cu) # empty.cu is an empty file
	set_target_properties(allkernels PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
	target_link_libraries(allkernels kernel kernel2)


	add_executable(rdctest main.cpp)
	set_target_properties(rdctest PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
	target_link_libraries(rdctest allkernels)
	#include "kernel.cuh"
	#include <stdio.h>
	#include <iostream>
	#include <thrust/device_vector.h>

	__global__
	void thekernel(int *data){
	if (threadIdx.x == 0)
	printf("the kernel says hello\n");

	data[threadIdx.x] = threadIdx.x * 2;
	}

	void Kernel::callKernel(){

	thrust::device_vector<int> D2;
	D2.resize(11);
	int * raw_ptr = thrust::raw_pointer_cast(&D2[0]);

	printf("Kernel::callKernel called\n");
	thekernel <<< 1, 10 >>> (raw_ptr);

	cudaThreadSynchronize();
	cudaError_t code = cudaGetLastError();
	if (code != cudaSuccess) {
	std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel!" << std::endl;
	}

	for (int i = 0; i < D2.size(); i++)
	std::cout << "Kernel D[" << i << "]=" << D2[i] << std::endl;
	}
	#ifndef __KERNEL_CUH__
	#define __KERNEL_CUH__
	class Kernel{
	public:
	void callKernel();
	};
	#endif
	#include "kernel2.cuh"
	#include <stdio.h>
	#include <iostream>
	#include <thrust/device_vector.h>

	__global__
	void thekernel2(int *data2){
	if (threadIdx.x == 0)
	printf("the kernel2 says hello\n");

	data2[threadIdx.x] = threadIdx.x * 2;
	}

	void Kernel2::callKernel2(){
	thrust::device_vector<int> D;
	D.resize(11);
	int * raw_ptr = thrust::raw_pointer_cast(&D[0]);
	printf("Kernel2::callKernel2 called\n");
	thekernel2 <<< 1, 10 >>> (raw_ptr);

	cudaThreadSynchronize();
	cudaError_t code = cudaGetLastError();
	if (code != cudaSuccess) {
	std::cout << "Cuda error: " << cudaGetErrorString(code) << " after callKernel2!" << std::endl;
	}

	for (int i = 0; i < D.size(); i++)
	std::cout << "Kernel2 D[" << i << "]=" << D[i] << std::endl;
	}
	#ifndef __KERNEL2_CUH__
	#define __KERNEL2_CUH__
	class Kernel2{
	public:
	void callKernel2();
	};
	#endif
	#include "kernel.cuh"
	#include "kernel2.cuh"

	int main(){
	Kernel k;
	k.callKernel();

	Kernel2 k2;
	k2.callKernel2();
	}