Created
October 15, 2018 10:27
-
-
Save dangkhoasdc/013783fe2fb26d60962bea8e8a8c3410 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Example 2. Application Using C and CUBLAS: 0-based indexing | |
//----------------------------------------------------------- | |
// to compile: nvcc filename.cpp -lcublas -std=c++11 | |
#include <iostream> | |
#include <chrono> | |
#include <thread> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <cuda_runtime.h> | |
#include "cublas_v2.h" | |
#define M 6 | |
#define N 5 | |
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) | |
static __inline__ void modify (cublasHandle_t handle, float *m, int ldm, int n, int p, int q, float alpha, float beta){ | |
cublasSscal (handle, n-p, &alpha, &m[IDX2C(p,q,ldm)], ldm); | |
cublasSscal (handle, ldm-p, &beta, &m[IDX2C(p,q,ldm)], 1); | |
} | |
int main (void){ | |
cudaError_t cudaStat; | |
cublasStatus_t stat; | |
cublasHandle_t handle; | |
int i, j; | |
float* devPtrA; | |
float* a = 0; | |
a = (float *)malloc (M * N * sizeof (*a)); | |
if (!a) { | |
printf ("host memory allocation failed"); | |
return EXIT_FAILURE; | |
} | |
for (j = 0; j < N; j++) { | |
for (i = 0; i < M; i++) { | |
a[IDX2C(i,j,M)] = (float)(i * M + j + 1); | |
} | |
} | |
cudaStat = cudaMalloc ((void**)&devPtrA, M*N*sizeof(*a)); | |
if (cudaStat != cudaSuccess) { | |
printf ("device memory allocation failed"); | |
return EXIT_FAILURE; | |
} | |
stat = cublasCreate(&handle); | |
if (stat != CUBLAS_STATUS_SUCCESS) { | |
printf ("CUBLAS initialization failed\n"); | |
return EXIT_FAILURE; | |
} | |
std::cout << "Create the cublas handler" << std::endl; | |
std::this_thread::sleep_for(std::chrono::seconds(20)); | |
stat = cublasSetMatrix (M, N, sizeof(*a), a, M, devPtrA, M); | |
if (stat != CUBLAS_STATUS_SUCCESS) { | |
printf ("data download failed"); | |
cudaFree (devPtrA); | |
cublasDestroy(handle); | |
return EXIT_FAILURE; | |
} | |
modify (handle, devPtrA, M, N, 1, 2, 16.0f, 12.0f); | |
stat = cublasGetMatrix (M, N, sizeof(*a), devPtrA, M, a, M); | |
if (stat != CUBLAS_STATUS_SUCCESS) { | |
printf ("data upload failed"); | |
cudaFree (devPtrA); | |
cublasDestroy(handle); | |
return EXIT_FAILURE; | |
} | |
cudaFree (devPtrA); | |
cublasDestroy(handle); | |
for (j = 0; j < N; j++) { | |
for (i = 0; i < M; i++) { | |
printf ("%7.0f", a[IDX2C(i,j,M)]); | |
} | |
printf ("\n"); | |
} | |
free(a); | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment