RicherMans · August 29, 2015 14:22
diff --git a/Definition b/Definition
 //Hook Class
 template<typename I , typename O>
 template<std::size_t P , typename... Tp>
    inline typename std::enable_if<P == sizeof...(Tp), void>::type
    Cl_Interface<I,O>::addkernelargs(std::tuple<Tp...> & t,cl::Kernel &kernel)
    {
    }

    //Iteration for Class
 template<typename I , typename O>
 template<std::size_t P, typename... Tp>
 inline typename std::enable_if<P < sizeof...(Tp), void>::type
 Cl_Interface<I,O>::addkernelargs(std::tuple<Tp...> & t,cl::Kernel &kernel)
    {
        // Type
        typedef typename std::tuple_element<P, std::tuple<Tp...>>::type type;

        // Value
        type arg = std::get<P>(t);

        if(std::is_pointer<type>::value){
            std::cout << "PINTER";

        elif
        }

        // Vector detected
        if (is_specialization<type,std::vector>::value){
            // std::cout << *arg << std::endl;
            // std::cout << type::value_type;
            // for (int i = 0; i <2; ++i)
            // {
            //     std::cout << arg[i] << std::endl;
            // }
            // std::cout <<"NUM " << P << " " << arg.size();
            // std::cout <<" NUM " <<  P << "  " << arg << " | size = " << sizeof(type) << " " << sizeof(arg)  << std::endl;
        }
        else{
            // cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,(*output)[i].size()*sizeof(O));

            // kernel.setArg(I,buffer);
            std::cout <<" NUM " << P << " " << arg << " | size = " << sizeof(type) << std::endl;

        }


        addkernelargs<P + 1, Tp...>(t,kernel);
    }
diff --git a/definition b/definition
 template<typename I, typename O> class Cl_Interface {
 public:
 	Cl_Interface(const char* path);
 	Cl_Interface(const Cl_Interface&);

 	virtual ~Cl_Interface();
 	void loadProgram(const char* path);

 	template<typename... Tp>
 	void runKernel(const char* kernelname,const int blocksize,Tp ...args);


 	void runKernel(const char* kernelname,const int blocksize,const std::vector<std::vector<I>> &input,std::vector<std::vector<O>> *output);

 //	Does the same as runKernel, except that it doesnt read out the result and keeps it on the GPU
 	std::vector<util::GPU_Buffer> runKernelBuffer(const char* kernelname,const int blocksize,const std::vector<std::vector<I>> &input,std::vector<std::vector<O>> *output);


 	void chainKernel(const char* kernelname,const int blocksize,const std::vector<util::GPU_Buffer> &input,const std::vector<util::GPU_Buffer> &output);
 //	Reads buffer outputbuffer into output output
 	void readResult(std::vector<util::GPU_Buffer> outputbuffer,std::vector<std::vector<O>> *output);


 private:
 	//handles for creating an opencl context
 	cl::Platform platform;

 	//buildExecutable is called by loadProgram
 	//build runtime executable from a program
 	void buildExecutable();
 	cl::Device device;
 	cl::Context context;

 //	Hook for the iteration
 	template<std::size_t P=0,typename... Tp>
 	typename std::enable_if<P == sizeof...(Tp), void>::type addkernelargs(std::tuple<Tp ...>& t,cl::Kernel &k);

 //  Start of the iteration
 	template<std::size_t P = 0, typename... Tp>
 	typename std::enable_if< P < sizeof...(Tp), void>::type addkernelargs(std::tuple<Tp...> & t,cl::Kernel &kernel);

 	char *contents;
 };
diff --git a/gistfile1.cpp b/gistfile1.cpp
 template<typename I, typename O>
 template<typename ... Tp>
 void Cl_Interface<I,O>::runKernel(const char* kernelname,const int blocksize,Tp ... args){

    cl::Program::Sources sources;
    //Include the read out contents from the vector file into the sources to parse
    sources.push_back(std::make_pair(this->contents,strlen(this->contents)+1));
    //Initiate a program from the sources
    cl::Program program(this->context,sources);
    if(program.build({this->device})!=CL_SUCCESS){
        std::cout<<" Error building: "<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(this->device)<<"\n";
        exit(1);
    }

    ////////////////////////////////////////////////////
    // Initalize the transfer and executeable objects //
    ////////////////////////////////////////////////////
    // Queue is reponsible for transferring data and the kernel_operator executes the code
    // The quene pushes and returns the buffer objects between host and device
    cl::CommandQueue queue(this->context,this->device);
    cl::Kernel kernel_operator(program,kernelname);

    auto tup = std::make_tuple(args...);


    addkernelargs(tup,kernel_operator);
    // addkernelargs(tup);
 }
diff --git a/Run b/Run
 Cl_Interface<float,float> cl_intf("sigmoid.cl");
    std::vector<float> b(100);
    float a[1000];
    for (int i = 0; i < 1000; ++i)
    {
        a[i] = (float)i;
    }
    cl_intf.runKernel("sigmoid",15,12.5f,3.1,1,a,b);
	//Hook Class
	template<typename I , typename O>
	template<std::size_t P , typename... Tp>
	inline typename std::enable_if<P == sizeof...(Tp), void>::type
	Cl_Interface<I,O>::addkernelargs(std::tuple<Tp...> & t,cl::Kernel &kernel)
	{
	}

	//Iteration for Class
	template<typename I , typename O>
	template<std::size_t P, typename... Tp>
	inline typename std::enable_if<P < sizeof...(Tp), void>::type
	Cl_Interface<I,O>::addkernelargs(std::tuple<Tp...> & t,cl::Kernel &kernel)
	{
	// Type
	typedef typename std::tuple_element<P, std::tuple<Tp...>>::type type;

	// Value
	type arg = std::get<P>(t);

	if(std::is_pointer<type>::value){
	std::cout << "PINTER";

	elif
	}

	// Vector detected
	if (is_specialization<type,std::vector>::value){
	// std::cout << *arg << std::endl;
	// std::cout << type::value_type;
	// for (int i = 0; i <2; ++i)
	// {
	// std::cout << arg[i] << std::endl;
	// }
	// std::cout <<"NUM " << P << " " << arg.size();
	// std::cout <<" NUM " << P << " " << arg << " \| size = " << sizeof(type) << " " << sizeof(arg) << std::endl;
	}
	else{
	// cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,(output)[i].size()sizeof(O));

	// kernel.setArg(I,buffer);
	std::cout <<" NUM " << P << " " << arg << " \| size = " << sizeof(type) << std::endl;

	}


	addkernelargs<P + 1, Tp...>(t,kernel);
	}
	template<typename I, typename O> class Cl_Interface {
	public:
	Cl_Interface(const char* path);
	Cl_Interface(const Cl_Interface&);

	virtual ~Cl_Interface();
	void loadProgram(const char* path);

	template<typename... Tp>
	void runKernel(const char* kernelname,const int blocksize,Tp ...args);


	void runKernel(const char* kernelname,const int blocksize,const std::vector<std::vector<I>> &input,std::vector<std::vector<O>> *output);

	// Does the same as runKernel, except that it doesnt read out the result and keeps it on the GPU
	std::vector<util::GPU_Buffer> runKernelBuffer(const char* kernelname,const int blocksize,const std::vector<std::vector<I>> &input,std::vector<std::vector<O>> *output);


	void chainKernel(const char* kernelname,const int blocksize,const std::vector<util::GPU_Buffer> &input,const std::vector<util::GPU_Buffer> &output);
	// Reads buffer outputbuffer into output output
	void readResult(std::vector<util::GPU_Buffer> outputbuffer,std::vector<std::vector<O>> *output);


	private:
	//handles for creating an opencl context
	cl::Platform platform;

	//buildExecutable is called by loadProgram
	//build runtime executable from a program
	void buildExecutable();
	cl::Device device;
	cl::Context context;

	// Hook for the iteration
	template<std::size_t P=0,typename... Tp>
	typename std::enable_if<P == sizeof...(Tp), void>::type addkernelargs(std::tuple<Tp ...>& t,cl::Kernel &k);

	// Start of the iteration
	template<std::size_t P = 0, typename... Tp>
	typename std::enable_if< P < sizeof...(Tp), void>::type addkernelargs(std::tuple<Tp...> & t,cl::Kernel &kernel);

	char *contents;
	};
	template<typename I, typename O>
	template<typename ... Tp>
	void Cl_Interface<I,O>::runKernel(const char* kernelname,const int blocksize,Tp ... args){

	cl::Program::Sources sources;
	//Include the read out contents from the vector file into the sources to parse
	sources.push_back(std::make_pair(this->contents,strlen(this->contents)+1));
	//Initiate a program from the sources
	cl::Program program(this->context,sources);
	if(program.build({this->device})!=CL_SUCCESS){
	std::cout<<" Error building: "<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(this->device)<<"\n";
	exit(1);
	}

	////////////////////////////////////////////////////
	// Initalize the transfer and executeable objects //
	////////////////////////////////////////////////////
	// Queue is reponsible for transferring data and the kernel_operator executes the code
	// The quene pushes and returns the buffer objects between host and device
	cl::CommandQueue queue(this->context,this->device);
	cl::Kernel kernel_operator(program,kernelname);

	auto tup = std::make_tuple(args...);


	addkernelargs(tup,kernel_operator);
	// addkernelargs(tup);
	}
	Cl_Interface<float,float> cl_intf("sigmoid.cl");
	std::vector<float> b(100);
	float a[1000];
	for (int i = 0; i < 1000; ++i)
	{
	a[i] = (float)i;
	}
	cl_intf.runKernel("sigmoid",15,12.5f,3.1,1,a,b);