-
-
Save alphaville/506d0e65a11b7add345a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef MAT_VECTOR_ | |
#define MAT_VECTOR_ | |
#include<cuda_runtime.h> | |
#include"cublas_v2.h" | |
#include "helper_cuda.h" | |
/** | |
* Definition of the function that perform multiple matrix-vector operations | |
* @param matrices: Vector of pointers that point to multiple matrix vector operations, | |
* @param x: Vector containing X, | |
* @param nrows: Rows of the matrix in each matrix-vector multiplications, | |
* @param ncols: Columns of the matrix in each matrix-vector multiplications, | |
* @param nmatrices: number of matrix-vector multiplications done in this implementation | |
* @param y: The output of the matrix vectors is stored in vector y. | |
*/ | |
void multiple_mv(const float **matrices,const float **x,const int *nrows, | |
const int *ncols,int nmatrices, float *y); | |
void test_function(float **matrices,int nmatrices); | |
__global__ void copy_terms(float* a,int b); | |
__global__ void copy_terms1(float** a,int b,int c); | |
// Implementation | |
void multiple_mv(float **matrices,float **x,const int *nrows, | |
const int *ncols,int nmatrices, float *y){ | |
cublasHandle_t handle; | |
cublasStatus_t stat; | |
int start=0; | |
stat=cublasCreate(&handle); | |
float al=1.0; | |
float bet=0.0; | |
for(int i=0;i<nmatrices;i++){ | |
// Use _CUBLAS (i.e., checkCudaErrors from handler_cuda.h) | |
stat=cublasSgemv(handle, CUBLAS_OP_N,nrows[i],ncols[i],&al,*(matrices+i),nrows[i],*(x+i),1,&bet,&y[start],1); | |
start=start+nrows[i]; | |
} | |
cublasDestroy(handle); | |
} | |
void test_function(float **matrices,int nmatrices){ | |
for(int i=0;i<nmatrices;i++){ | |
printf("%f ",*(matrices[i]+i)); | |
} | |
} | |
__global__ void copy_terms(float* a, int b){ | |
printf("%p \n", a); | |
for(int i=0;i<b;i++){ | |
printf("%f ",a[i]); | |
} | |
} | |
__global__ void copy_terms1(float** a,int b,int c){ | |
printf("%p \n", a[b]); | |
for(int i=0;i<c;i++){ | |
//printf("%d ",i); | |
//printf("%f ",*(a[b]+i)); | |
} | |
} | |
#endif /* MV_TEST_HEADER_ */ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<stdlib.h> | |
#include "mat_vector.cuh" | |
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) | |
#define m 3 | |
#define n 2 | |
int main(void){ | |
float* a; | |
float* b; | |
float* y; | |
float* matA; | |
float* matB; | |
int* nrows; | |
int* ncols; | |
int nmatrix=2; | |
a=(float*)malloc(n*sizeof(*a)); | |
b=(float*)malloc(n*sizeof(*b)); | |
y=(float*)malloc(nmatrix*m*sizeof(*y)); | |
nrows=(int*)malloc(nmatrix*sizeof(*nrows)); | |
ncols=(int*)malloc(nmatrix*sizeof(*ncols)); | |
matA=(float*)malloc(m*n*sizeof(*matA)); | |
matB=(float*)malloc(m*n*sizeof(*matB)); | |
for(int i=0;i<n;i++) | |
a[i]=(float)i; | |
for(int i=0;i<n;i++) | |
b[i]=(float)i; | |
for(int j=0;j<n;j++){ | |
for(int i=0;i<m;i++){ | |
matA[IDX2C(i,j,m)]=(float) IDX2C(i,j,m); | |
matB[IDX2C(i,j,m)]=(float) 2*IDX2C(i,j,m); | |
} | |
} | |
float* d_a; | |
float* d_b; | |
float* d_y; | |
float* d_matA; | |
float* d_matB; | |
float *ptr[2]; // For vectors | |
float *ptr1[2]; // For matrices | |
checkCudaErrors(cudaMalloc((void**)&d_a,n*sizeof(*d_a))); | |
checkCudaErrors(cudaMalloc((void**)&d_b,n*sizeof(*d_b))); | |
checkCudaErrors(cudaMalloc((void**)&d_y,nmatrix*m*sizeof(*d_y))); | |
checkCudaErrors(cudaMalloc((void**)&d_matA,m*n*sizeof(*d_matA))); | |
checkCudaErrors(cudaMalloc((void**)&d_matB,m*n*sizeof(*d_matB))); | |
checkCudaErrors(cudaMemcpy((void**)d_a,a,n*sizeof(*a),cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy((void**)d_b,b,n*sizeof(*b),cudaMemcpyHostToDevice)); | |
checkCudaErrors(cublasSetMatrix(m,n,sizeof(*matA),matA,m,d_matA,m)); | |
checkCudaErrors(cublasSetMatrix(m,n,sizeof(*matB),matB,m,d_matB,m)); | |
ptr[0]=d_a; | |
ptr[1]=d_b; | |
ptr1[0]=d_matA; | |
ptr1[1]=d_matB; | |
/*float** ptrs_d; | |
float** ptrs1_d; | |
checkCudaErrors(cudaMalloc(&ptrs_d,2*sizeof(float*))); | |
checkCudaErrors(cudaMemcpy(ptrs_d,ptr,2*sizeof(float*),cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMalloc(&ptrs1_d,2*sizeof(float*))); | |
checkCudaErrors(cudaMemcpy(ptrs1_d,ptr1,2*sizeof(float*),cudaMemcpyHostToDevice));*/ | |
nrows[0]=m; | |
nrows[1]=m; | |
ncols[0]=n; | |
ncols[1]=n; | |
for(int i=0;i<m;i++){ | |
for(int j=0;j<n;j++){ | |
printf("%f ",matA[IDX2C(i,j,m)]); | |
} | |
printf("\n"); | |
} | |
printf("\n"); | |
for(int i=0;i<m;i++){ | |
for(int j=0;j<n;j++){ | |
printf("%f ",matB[IDX2C(i,j,m)]); | |
} | |
printf("\n"); | |
} | |
for(int i=0;i<n;i++) | |
printf("%f %f \n",a[i],b[i]); | |
multiple_mv(ptr1,ptr,nrows,ncols,nmatrix,d_y); | |
//multiple_mv(ptrs1_d,ptrs_d,nrows,ncols,nmatrix,d_y); | |
checkCudaErrors(cudaMemcpy(y,d_y,m*nmatrix*sizeof(*d_y),cudaMemcpyDeviceToHost)); | |
for(int i=0;i<nmatrix*m;i++) | |
printf("%f ",y[i]); | |
free(a); | |
free(b); | |
free(y); | |
free(matA); | |
free(matB); | |
cudaFree(d_a); | |
cudaFree(d_b); | |
cudaFree(d_y); | |
cudaFree(d_matA); | |
cudaFree(d_matB); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment