Skip to content

Instantly share code, notes, and snippets.

@alphaville
Forked from ajaykumarsampath/main_file.m
Last active August 29, 2015 14:07
Show Gist options
  • Save alphaville/506d0e65a11b7add345a to your computer and use it in GitHub Desktop.
Save alphaville/506d0e65a11b7add345a to your computer and use it in GitHub Desktop.
#ifndef MAT_VECTOR_
#define MAT_VECTOR_
#include<cuda_runtime.h>
#include"cublas_v2.h"
#include "helper_cuda.h"
/**
* Definition of the function that perform multiple matrix-vector operations
* @param matrices: Vector of pointers that point to multiple matrix vector operations,
* @param x: Vector containing X,
* @param nrows: Rows of the matrix in each matrix-vector multiplications,
* @param ncols: Columns of the matrix in each matrix-vector multiplications,
* @param nmatrices: number of matrix-vector multiplications done in this implementation
* @param y: The output of the matrix vectors is stored in vector y.
*/
void multiple_mv(const float **matrices,const float **x,const int *nrows,
const int *ncols,int nmatrices, float *y);
void test_function(float **matrices,int nmatrices);
__global__ void copy_terms(float* a,int b);
__global__ void copy_terms1(float** a,int b,int c);
// Implementation
void multiple_mv(float **matrices,float **x,const int *nrows,
const int *ncols,int nmatrices, float *y){
cublasHandle_t handle;
cublasStatus_t stat;
int start=0;
stat=cublasCreate(&handle);
float al=1.0;
float bet=0.0;
for(int i=0;i<nmatrices;i++){
// Use _CUBLAS (i.e., checkCudaErrors from handler_cuda.h)
stat=cublasSgemv(handle, CUBLAS_OP_N,nrows[i],ncols[i],&al,*(matrices+i),nrows[i],*(x+i),1,&bet,&y[start],1);
start=start+nrows[i];
}
cublasDestroy(handle);
}
void test_function(float **matrices,int nmatrices){
for(int i=0;i<nmatrices;i++){
printf("%f ",*(matrices[i]+i));
}
}
__global__ void copy_terms(float* a, int b){
printf("%p \n", a);
for(int i=0;i<b;i++){
printf("%f ",a[i]);
}
}
__global__ void copy_terms1(float** a,int b,int c){
printf("%p \n", a[b]);
for(int i=0;i<c;i++){
//printf("%d ",i);
//printf("%f ",*(a[b]+i));
}
}
#endif /* MV_TEST_HEADER_ */
#include<stdio.h>
#include<stdlib.h>
#include "mat_vector.cuh"
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
#define m 3
#define n 2
int main(void){
float* a;
float* b;
float* y;
float* matA;
float* matB;
int* nrows;
int* ncols;
int nmatrix=2;
a=(float*)malloc(n*sizeof(*a));
b=(float*)malloc(n*sizeof(*b));
y=(float*)malloc(nmatrix*m*sizeof(*y));
nrows=(int*)malloc(nmatrix*sizeof(*nrows));
ncols=(int*)malloc(nmatrix*sizeof(*ncols));
matA=(float*)malloc(m*n*sizeof(*matA));
matB=(float*)malloc(m*n*sizeof(*matB));
for(int i=0;i<n;i++)
a[i]=(float)i;
for(int i=0;i<n;i++)
b[i]=(float)i;
for(int j=0;j<n;j++){
for(int i=0;i<m;i++){
matA[IDX2C(i,j,m)]=(float) IDX2C(i,j,m);
matB[IDX2C(i,j,m)]=(float) 2*IDX2C(i,j,m);
}
}
float* d_a;
float* d_b;
float* d_y;
float* d_matA;
float* d_matB;
float *ptr[2]; // For vectors
float *ptr1[2]; // For matrices
checkCudaErrors(cudaMalloc((void**)&d_a,n*sizeof(*d_a)));
checkCudaErrors(cudaMalloc((void**)&d_b,n*sizeof(*d_b)));
checkCudaErrors(cudaMalloc((void**)&d_y,nmatrix*m*sizeof(*d_y)));
checkCudaErrors(cudaMalloc((void**)&d_matA,m*n*sizeof(*d_matA)));
checkCudaErrors(cudaMalloc((void**)&d_matB,m*n*sizeof(*d_matB)));
checkCudaErrors(cudaMemcpy((void**)d_a,a,n*sizeof(*a),cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy((void**)d_b,b,n*sizeof(*b),cudaMemcpyHostToDevice));
checkCudaErrors(cublasSetMatrix(m,n,sizeof(*matA),matA,m,d_matA,m));
checkCudaErrors(cublasSetMatrix(m,n,sizeof(*matB),matB,m,d_matB,m));
ptr[0]=d_a;
ptr[1]=d_b;
ptr1[0]=d_matA;
ptr1[1]=d_matB;
/*float** ptrs_d;
float** ptrs1_d;
checkCudaErrors(cudaMalloc(&ptrs_d,2*sizeof(float*)));
checkCudaErrors(cudaMemcpy(ptrs_d,ptr,2*sizeof(float*),cudaMemcpyHostToDevice));
checkCudaErrors(cudaMalloc(&ptrs1_d,2*sizeof(float*)));
checkCudaErrors(cudaMemcpy(ptrs1_d,ptr1,2*sizeof(float*),cudaMemcpyHostToDevice));*/
nrows[0]=m;
nrows[1]=m;
ncols[0]=n;
ncols[1]=n;
for(int i=0;i<m;i++){
for(int j=0;j<n;j++){
printf("%f ",matA[IDX2C(i,j,m)]);
}
printf("\n");
}
printf("\n");
for(int i=0;i<m;i++){
for(int j=0;j<n;j++){
printf("%f ",matB[IDX2C(i,j,m)]);
}
printf("\n");
}
for(int i=0;i<n;i++)
printf("%f %f \n",a[i],b[i]);
multiple_mv(ptr1,ptr,nrows,ncols,nmatrix,d_y);
//multiple_mv(ptrs1_d,ptrs_d,nrows,ncols,nmatrix,d_y);
checkCudaErrors(cudaMemcpy(y,d_y,m*nmatrix*sizeof(*d_y),cudaMemcpyDeviceToHost));
for(int i=0;i<nmatrix*m;i++)
printf("%f ",y[i]);
free(a);
free(b);
free(y);
free(matA);
free(matB);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_y);
cudaFree(d_matA);
cudaFree(d_matB);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment